mirror of
				https://git.zaroz.cloud/nintendo-back-up/yuzu/yuzu-mainline.git
				synced 2025-03-21 01:53:15 +00:00 
			
		
		
		
	Merge pull request #3677 from FernandoS27/better-sync
Introduce Predictive Flushing and Improve ASYNC GPU
This commit is contained in:
		
						commit
						bf2ddb8fd5
					
				@ -92,7 +92,7 @@ void LogSettings() {
 | 
				
			|||||||
    LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
 | 
					    LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
 | 
				
			||||||
    LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
 | 
					    LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
 | 
				
			||||||
    LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
 | 
					    LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
 | 
				
			||||||
    LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
 | 
					    LogSetting("Renderer_GPUAccuracyLevel", Settings::values.gpu_accuracy);
 | 
				
			||||||
    LogSetting("Renderer_UseAsynchronousGpuEmulation",
 | 
					    LogSetting("Renderer_UseAsynchronousGpuEmulation",
 | 
				
			||||||
               Settings::values.use_asynchronous_gpu_emulation);
 | 
					               Settings::values.use_asynchronous_gpu_emulation);
 | 
				
			||||||
    LogSetting("Renderer_UseVsync", Settings::values.use_vsync);
 | 
					    LogSetting("Renderer_UseVsync", Settings::values.use_vsync);
 | 
				
			||||||
@ -109,4 +109,12 @@ void LogSettings() {
 | 
				
			|||||||
    LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local);
 | 
					    LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool IsGPULevelExtreme() {
 | 
				
			||||||
 | 
					    return values.gpu_accuracy == GPUAccuracy::Extreme;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool IsGPULevelHigh() {
 | 
				
			||||||
 | 
					    return values.gpu_accuracy == GPUAccuracy::Extreme || values.gpu_accuracy == GPUAccuracy::High;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
} // namespace Settings
 | 
					} // namespace Settings
 | 
				
			||||||
 | 
				
			|||||||
@ -376,6 +376,12 @@ enum class RendererBackend {
 | 
				
			|||||||
    Vulkan = 1,
 | 
					    Vulkan = 1,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					enum class GPUAccuracy : u32 {
 | 
				
			||||||
 | 
					    Normal = 0,
 | 
				
			||||||
 | 
					    High = 1,
 | 
				
			||||||
 | 
					    Extreme = 2,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct Values {
 | 
					struct Values {
 | 
				
			||||||
    // System
 | 
					    // System
 | 
				
			||||||
    bool use_docked_mode;
 | 
					    bool use_docked_mode;
 | 
				
			||||||
@ -436,7 +442,7 @@ struct Values {
 | 
				
			|||||||
    bool use_frame_limit;
 | 
					    bool use_frame_limit;
 | 
				
			||||||
    u16 frame_limit;
 | 
					    u16 frame_limit;
 | 
				
			||||||
    bool use_disk_shader_cache;
 | 
					    bool use_disk_shader_cache;
 | 
				
			||||||
    bool use_accurate_gpu_emulation;
 | 
					    GPUAccuracy gpu_accuracy;
 | 
				
			||||||
    bool use_asynchronous_gpu_emulation;
 | 
					    bool use_asynchronous_gpu_emulation;
 | 
				
			||||||
    bool use_vsync;
 | 
					    bool use_vsync;
 | 
				
			||||||
    bool force_30fps_mode;
 | 
					    bool force_30fps_mode;
 | 
				
			||||||
@ -480,6 +486,9 @@ struct Values {
 | 
				
			|||||||
    std::map<u64, std::vector<std::string>> disabled_addons;
 | 
					    std::map<u64, std::vector<std::string>> disabled_addons;
 | 
				
			||||||
} extern values;
 | 
					} extern values;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool IsGPULevelExtreme();
 | 
				
			||||||
 | 
					bool IsGPULevelHigh();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void Apply();
 | 
					void Apply();
 | 
				
			||||||
void LogSettings();
 | 
					void LogSettings();
 | 
				
			||||||
} // namespace Settings
 | 
					} // namespace Settings
 | 
				
			||||||
 | 
				
			|||||||
@ -56,6 +56,18 @@ static const char* TranslateRenderer(Settings::RendererBackend backend) {
 | 
				
			|||||||
    return "Unknown";
 | 
					    return "Unknown";
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) {
 | 
				
			||||||
 | 
					    switch (backend) {
 | 
				
			||||||
 | 
					    case Settings::GPUAccuracy::Normal:
 | 
				
			||||||
 | 
					        return "Normal";
 | 
				
			||||||
 | 
					    case Settings::GPUAccuracy::High:
 | 
				
			||||||
 | 
					        return "High";
 | 
				
			||||||
 | 
					    case Settings::GPUAccuracy::Extreme:
 | 
				
			||||||
 | 
					        return "Extreme";
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return "Unknown";
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
u64 GetTelemetryId() {
 | 
					u64 GetTelemetryId() {
 | 
				
			||||||
    u64 telemetry_id{};
 | 
					    u64 telemetry_id{};
 | 
				
			||||||
    const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) +
 | 
					    const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) +
 | 
				
			||||||
@ -184,8 +196,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
 | 
				
			|||||||
    AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit);
 | 
					    AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit);
 | 
				
			||||||
    AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit);
 | 
					    AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit);
 | 
				
			||||||
    AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
 | 
					    AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
 | 
				
			||||||
    AddField(field_type, "Renderer_UseAccurateGpuEmulation",
 | 
					    AddField(field_type, "Renderer_GPUAccuracyLevel",
 | 
				
			||||||
             Settings::values.use_accurate_gpu_emulation);
 | 
					             TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy));
 | 
				
			||||||
    AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
 | 
					    AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
 | 
				
			||||||
             Settings::values.use_asynchronous_gpu_emulation);
 | 
					             Settings::values.use_asynchronous_gpu_emulation);
 | 
				
			||||||
    AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync);
 | 
					    AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync);
 | 
				
			||||||
 | 
				
			|||||||
@ -23,6 +23,7 @@ add_library(video_core STATIC
 | 
				
			|||||||
    engines/shader_bytecode.h
 | 
					    engines/shader_bytecode.h
 | 
				
			||||||
    engines/shader_header.h
 | 
					    engines/shader_header.h
 | 
				
			||||||
    engines/shader_type.h
 | 
					    engines/shader_type.h
 | 
				
			||||||
 | 
					    fence_manager.h
 | 
				
			||||||
    gpu.cpp
 | 
					    gpu.cpp
 | 
				
			||||||
    gpu.h
 | 
					    gpu.h
 | 
				
			||||||
    gpu_asynch.cpp
 | 
					    gpu_asynch.cpp
 | 
				
			||||||
@ -51,6 +52,8 @@ add_library(video_core STATIC
 | 
				
			|||||||
    renderer_opengl/gl_buffer_cache.h
 | 
					    renderer_opengl/gl_buffer_cache.h
 | 
				
			||||||
    renderer_opengl/gl_device.cpp
 | 
					    renderer_opengl/gl_device.cpp
 | 
				
			||||||
    renderer_opengl/gl_device.h
 | 
					    renderer_opengl/gl_device.h
 | 
				
			||||||
 | 
					    renderer_opengl/gl_fence_manager.cpp
 | 
				
			||||||
 | 
					    renderer_opengl/gl_fence_manager.h
 | 
				
			||||||
    renderer_opengl/gl_framebuffer_cache.cpp
 | 
					    renderer_opengl/gl_framebuffer_cache.cpp
 | 
				
			||||||
    renderer_opengl/gl_framebuffer_cache.h
 | 
					    renderer_opengl/gl_framebuffer_cache.h
 | 
				
			||||||
    renderer_opengl/gl_rasterizer.cpp
 | 
					    renderer_opengl/gl_rasterizer.cpp
 | 
				
			||||||
@ -176,6 +179,8 @@ if (ENABLE_VULKAN)
 | 
				
			|||||||
        renderer_vulkan/vk_descriptor_pool.h
 | 
					        renderer_vulkan/vk_descriptor_pool.h
 | 
				
			||||||
        renderer_vulkan/vk_device.cpp
 | 
					        renderer_vulkan/vk_device.cpp
 | 
				
			||||||
        renderer_vulkan/vk_device.h
 | 
					        renderer_vulkan/vk_device.h
 | 
				
			||||||
 | 
					        renderer_vulkan/vk_fence_manager.cpp
 | 
				
			||||||
 | 
					        renderer_vulkan/vk_fence_manager.h
 | 
				
			||||||
        renderer_vulkan/vk_graphics_pipeline.cpp
 | 
					        renderer_vulkan/vk_graphics_pipeline.cpp
 | 
				
			||||||
        renderer_vulkan/vk_graphics_pipeline.h
 | 
					        renderer_vulkan/vk_graphics_pipeline.h
 | 
				
			||||||
        renderer_vulkan/vk_image.cpp
 | 
					        renderer_vulkan/vk_image.cpp
 | 
				
			||||||
 | 
				
			|||||||
@ -5,6 +5,7 @@
 | 
				
			|||||||
#pragma once
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <array>
 | 
					#include <array>
 | 
				
			||||||
 | 
					#include <list>
 | 
				
			||||||
#include <memory>
 | 
					#include <memory>
 | 
				
			||||||
#include <mutex>
 | 
					#include <mutex>
 | 
				
			||||||
#include <unordered_map>
 | 
					#include <unordered_map>
 | 
				
			||||||
@ -18,8 +19,10 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#include "common/alignment.h"
 | 
					#include "common/alignment.h"
 | 
				
			||||||
#include "common/common_types.h"
 | 
					#include "common/common_types.h"
 | 
				
			||||||
 | 
					#include "common/logging/log.h"
 | 
				
			||||||
#include "core/core.h"
 | 
					#include "core/core.h"
 | 
				
			||||||
#include "core/memory.h"
 | 
					#include "core/memory.h"
 | 
				
			||||||
 | 
					#include "core/settings.h"
 | 
				
			||||||
#include "video_core/buffer_cache/buffer_block.h"
 | 
					#include "video_core/buffer_cache/buffer_block.h"
 | 
				
			||||||
#include "video_core/buffer_cache/map_interval.h"
 | 
					#include "video_core/buffer_cache/map_interval.h"
 | 
				
			||||||
#include "video_core/memory_manager.h"
 | 
					#include "video_core/memory_manager.h"
 | 
				
			||||||
@ -79,6 +82,9 @@ public:
 | 
				
			|||||||
        auto map = MapAddress(block, gpu_addr, cpu_addr, size);
 | 
					        auto map = MapAddress(block, gpu_addr, cpu_addr, size);
 | 
				
			||||||
        if (is_written) {
 | 
					        if (is_written) {
 | 
				
			||||||
            map->MarkAsModified(true, GetModifiedTicks());
 | 
					            map->MarkAsModified(true, GetModifiedTicks());
 | 
				
			||||||
 | 
					            if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
 | 
				
			||||||
 | 
					                MarkForAsyncFlush(map);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
            if (!map->IsWritten()) {
 | 
					            if (!map->IsWritten()) {
 | 
				
			||||||
                map->MarkAsWritten(true);
 | 
					                map->MarkAsWritten(true);
 | 
				
			||||||
                MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
 | 
					                MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
 | 
				
			||||||
@ -137,11 +143,22 @@ public:
 | 
				
			|||||||
        });
 | 
					        });
 | 
				
			||||||
        for (auto& object : objects) {
 | 
					        for (auto& object : objects) {
 | 
				
			||||||
            if (object->IsModified() && object->IsRegistered()) {
 | 
					            if (object->IsModified() && object->IsRegistered()) {
 | 
				
			||||||
 | 
					                mutex.unlock();
 | 
				
			||||||
                FlushMap(object);
 | 
					                FlushMap(object);
 | 
				
			||||||
 | 
					                mutex.lock();
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bool MustFlushRegion(VAddr addr, std::size_t size) {
 | 
				
			||||||
 | 
					        std::lock_guard lock{mutex};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        const std::vector<MapInterval> objects = GetMapsInRange(addr, size);
 | 
				
			||||||
 | 
					        return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) {
 | 
				
			||||||
 | 
					            return map->IsModified() && map->IsRegistered();
 | 
				
			||||||
 | 
					        });
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Mark the specified region as being invalidated
 | 
					    /// Mark the specified region as being invalidated
 | 
				
			||||||
    void InvalidateRegion(VAddr addr, u64 size) {
 | 
					    void InvalidateRegion(VAddr addr, u64 size) {
 | 
				
			||||||
        std::lock_guard lock{mutex};
 | 
					        std::lock_guard lock{mutex};
 | 
				
			||||||
@ -154,6 +171,77 @@ public:
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void OnCPUWrite(VAddr addr, std::size_t size) {
 | 
				
			||||||
 | 
					        std::lock_guard lock{mutex};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for (const auto& object : GetMapsInRange(addr, size)) {
 | 
				
			||||||
 | 
					            if (object->IsMemoryMarked() && object->IsRegistered()) {
 | 
				
			||||||
 | 
					                UnmarkMemory(object);
 | 
				
			||||||
 | 
					                object->SetSyncPending(true);
 | 
				
			||||||
 | 
					                marked_for_unregister.emplace_back(object);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void SyncGuestHost() {
 | 
				
			||||||
 | 
					        std::lock_guard lock{mutex};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for (const auto& object : marked_for_unregister) {
 | 
				
			||||||
 | 
					            if (object->IsRegistered()) {
 | 
				
			||||||
 | 
					                object->SetSyncPending(false);
 | 
				
			||||||
 | 
					                Unregister(object);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        marked_for_unregister.clear();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void CommitAsyncFlushes() {
 | 
				
			||||||
 | 
					        if (uncommitted_flushes) {
 | 
				
			||||||
 | 
					            auto commit_list = std::make_shared<std::list<MapInterval>>();
 | 
				
			||||||
 | 
					            for (auto& map : *uncommitted_flushes) {
 | 
				
			||||||
 | 
					                if (map->IsRegistered() && map->IsModified()) {
 | 
				
			||||||
 | 
					                    // TODO(Blinkhawk): Implement backend asynchronous flushing
 | 
				
			||||||
 | 
					                    // AsyncFlushMap(map)
 | 
				
			||||||
 | 
					                    commit_list->push_back(map);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            if (!commit_list->empty()) {
 | 
				
			||||||
 | 
					                committed_flushes.push_back(commit_list);
 | 
				
			||||||
 | 
					            } else {
 | 
				
			||||||
 | 
					                committed_flushes.emplace_back();
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            committed_flushes.emplace_back();
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        uncommitted_flushes.reset();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bool ShouldWaitAsyncFlushes() const {
 | 
				
			||||||
 | 
					        return !committed_flushes.empty() && committed_flushes.front() != nullptr;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bool HasUncommittedFlushes() const {
 | 
				
			||||||
 | 
					        return uncommitted_flushes != nullptr;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void PopAsyncFlushes() {
 | 
				
			||||||
 | 
					        if (committed_flushes.empty()) {
 | 
				
			||||||
 | 
					            return;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        auto& flush_list = committed_flushes.front();
 | 
				
			||||||
 | 
					        if (!flush_list) {
 | 
				
			||||||
 | 
					            committed_flushes.pop_front();
 | 
				
			||||||
 | 
					            return;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        for (MapInterval& map : *flush_list) {
 | 
				
			||||||
 | 
					            if (map->IsRegistered()) {
 | 
				
			||||||
 | 
					                // TODO(Blinkhawk): Replace this for reading the asynchronous flush
 | 
				
			||||||
 | 
					                FlushMap(map);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        committed_flushes.pop_front();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    virtual BufferType GetEmptyBuffer(std::size_t size) = 0;
 | 
					    virtual BufferType GetEmptyBuffer(std::size_t size) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
protected:
 | 
					protected:
 | 
				
			||||||
@ -196,17 +284,30 @@ protected:
 | 
				
			|||||||
        const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
 | 
					        const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
 | 
				
			||||||
        mapped_addresses.insert({interval, new_map});
 | 
					        mapped_addresses.insert({interval, new_map});
 | 
				
			||||||
        rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
 | 
					        rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
 | 
				
			||||||
 | 
					        new_map->SetMemoryMarked(true);
 | 
				
			||||||
        if (inherit_written) {
 | 
					        if (inherit_written) {
 | 
				
			||||||
            MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
 | 
					            MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
 | 
				
			||||||
            new_map->MarkAsWritten(true);
 | 
					            new_map->MarkAsWritten(true);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Unregisters an object from the cache
 | 
					    void UnmarkMemory(const MapInterval& map) {
 | 
				
			||||||
    void Unregister(MapInterval& map) {
 | 
					        if (!map->IsMemoryMarked()) {
 | 
				
			||||||
 | 
					            return;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
        const std::size_t size = map->GetEnd() - map->GetStart();
 | 
					        const std::size_t size = map->GetEnd() - map->GetStart();
 | 
				
			||||||
        rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
 | 
					        rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
 | 
				
			||||||
 | 
					        map->SetMemoryMarked(false);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Unregisters an object from the cache
 | 
				
			||||||
 | 
					    void Unregister(const MapInterval& map) {
 | 
				
			||||||
 | 
					        UnmarkMemory(map);
 | 
				
			||||||
        map->MarkAsRegistered(false);
 | 
					        map->MarkAsRegistered(false);
 | 
				
			||||||
 | 
					        if (map->IsSyncPending()) {
 | 
				
			||||||
 | 
					            marked_for_unregister.remove(map);
 | 
				
			||||||
 | 
					            map->SetSyncPending(false);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
        if (map->IsWritten()) {
 | 
					        if (map->IsWritten()) {
 | 
				
			||||||
            UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
 | 
					            UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
@ -264,6 +365,9 @@ private:
 | 
				
			|||||||
        MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr);
 | 
					        MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr);
 | 
				
			||||||
        if (modified_inheritance) {
 | 
					        if (modified_inheritance) {
 | 
				
			||||||
            new_map->MarkAsModified(true, GetModifiedTicks());
 | 
					            new_map->MarkAsModified(true, GetModifiedTicks());
 | 
				
			||||||
 | 
					            if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
 | 
				
			||||||
 | 
					                MarkForAsyncFlush(new_map);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        Register(new_map, write_inheritance);
 | 
					        Register(new_map, write_inheritance);
 | 
				
			||||||
        return new_map;
 | 
					        return new_map;
 | 
				
			||||||
@ -450,6 +554,13 @@ private:
 | 
				
			|||||||
        return false;
 | 
					        return false;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void MarkForAsyncFlush(MapInterval& map) {
 | 
				
			||||||
 | 
					        if (!uncommitted_flushes) {
 | 
				
			||||||
 | 
					            uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval>>();
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        uncommitted_flushes->insert(map);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    VideoCore::RasterizerInterface& rasterizer;
 | 
					    VideoCore::RasterizerInterface& rasterizer;
 | 
				
			||||||
    Core::System& system;
 | 
					    Core::System& system;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -479,6 +590,10 @@ private:
 | 
				
			|||||||
    u64 modified_ticks = 0;
 | 
					    u64 modified_ticks = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::vector<u8> staging_buffer;
 | 
					    std::vector<u8> staging_buffer;
 | 
				
			||||||
 | 
					    std::list<MapInterval> marked_for_unregister;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::shared_ptr<std::unordered_set<MapInterval>> uncommitted_flushes{};
 | 
				
			||||||
 | 
					    std::list<std::shared_ptr<std::list<MapInterval>>> committed_flushes;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::recursive_mutex mutex;
 | 
					    std::recursive_mutex mutex;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
				
			|||||||
@ -46,6 +46,22 @@ public:
 | 
				
			|||||||
        return is_registered;
 | 
					        return is_registered;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void SetMemoryMarked(bool is_memory_marked_) {
 | 
				
			||||||
 | 
					        is_memory_marked = is_memory_marked_;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bool IsMemoryMarked() const {
 | 
				
			||||||
 | 
					        return is_memory_marked;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void SetSyncPending(bool is_sync_pending_) {
 | 
				
			||||||
 | 
					        is_sync_pending = is_sync_pending_;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bool IsSyncPending() const {
 | 
				
			||||||
 | 
					        return is_sync_pending;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    VAddr GetStart() const {
 | 
					    VAddr GetStart() const {
 | 
				
			||||||
        return start;
 | 
					        return start;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -83,6 +99,8 @@ private:
 | 
				
			|||||||
    bool is_written{};
 | 
					    bool is_written{};
 | 
				
			||||||
    bool is_modified{};
 | 
					    bool is_modified{};
 | 
				
			||||||
    bool is_registered{};
 | 
					    bool is_registered{};
 | 
				
			||||||
 | 
					    bool is_memory_marked{};
 | 
				
			||||||
 | 
					    bool is_sync_pending{};
 | 
				
			||||||
    u64 ticks{};
 | 
					    u64 ticks{};
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -21,6 +21,7 @@ MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128,
 | 
				
			|||||||
void DmaPusher::DispatchCalls() {
 | 
					void DmaPusher::DispatchCalls() {
 | 
				
			||||||
    MICROPROFILE_SCOPE(DispatchCalls);
 | 
					    MICROPROFILE_SCOPE(DispatchCalls);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    gpu.SyncGuestHost();
 | 
				
			||||||
    // On entering GPU code, assume all memory may be touched by the ARM core.
 | 
					    // On entering GPU code, assume all memory may be touched by the ARM core.
 | 
				
			||||||
    gpu.Maxwell3D().OnMemoryWrite();
 | 
					    gpu.Maxwell3D().OnMemoryWrite();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -32,6 +33,8 @@ void DmaPusher::DispatchCalls() {
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    gpu.FlushCommands();
 | 
					    gpu.FlushCommands();
 | 
				
			||||||
 | 
					    gpu.SyncGuestHost();
 | 
				
			||||||
 | 
					    gpu.OnCommandListEnd();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bool DmaPusher::Step() {
 | 
					bool DmaPusher::Step() {
 | 
				
			||||||
 | 
				
			|||||||
@ -404,7 +404,11 @@ void Maxwell3D::ProcessQueryGet() {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    switch (regs.query.query_get.operation) {
 | 
					    switch (regs.query.query_get.operation) {
 | 
				
			||||||
    case Regs::QueryOperation::Release:
 | 
					    case Regs::QueryOperation::Release:
 | 
				
			||||||
 | 
					        if (regs.query.query_get.fence == 1) {
 | 
				
			||||||
 | 
					            rasterizer.SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
            StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
 | 
					            StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
        break;
 | 
					        break;
 | 
				
			||||||
    case Regs::QueryOperation::Acquire:
 | 
					    case Regs::QueryOperation::Acquire:
 | 
				
			||||||
        // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
 | 
					        // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
 | 
				
			||||||
@ -483,7 +487,7 @@ void Maxwell3D::ProcessSyncPoint() {
 | 
				
			|||||||
    const u32 increment = regs.sync_info.increment.Value();
 | 
					    const u32 increment = regs.sync_info.increment.Value();
 | 
				
			||||||
    [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
 | 
					    [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
 | 
				
			||||||
    if (increment) {
 | 
					    if (increment) {
 | 
				
			||||||
        system.GPU().IncrementSyncPoint(sync_point);
 | 
					        rasterizer.SignalSyncPoint(sync_point);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -104,8 +104,13 @@ void MaxwellDMA::HandleCopy() {
 | 
				
			|||||||
            write_buffer.resize(dst_size);
 | 
					            write_buffer.resize(dst_size);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if (Settings::IsGPULevelExtreme()) {
 | 
				
			||||||
            memory_manager.ReadBlock(source, read_buffer.data(), src_size);
 | 
					            memory_manager.ReadBlock(source, read_buffer.data(), src_size);
 | 
				
			||||||
            memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
 | 
					            memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size);
 | 
				
			||||||
 | 
					            memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        Texture::UnswizzleSubrect(
 | 
					        Texture::UnswizzleSubrect(
 | 
				
			||||||
            regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel,
 | 
					            regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel,
 | 
				
			||||||
@ -136,7 +141,7 @@ void MaxwellDMA::HandleCopy() {
 | 
				
			|||||||
            write_buffer.resize(dst_size);
 | 
					            write_buffer.resize(dst_size);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (Settings::values.use_accurate_gpu_emulation) {
 | 
					        if (Settings::IsGPULevelExtreme()) {
 | 
				
			||||||
            memory_manager.ReadBlock(source, read_buffer.data(), src_size);
 | 
					            memory_manager.ReadBlock(source, read_buffer.data(), src_size);
 | 
				
			||||||
            memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
 | 
					            memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										170
									
								
								src/video_core/fence_manager.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										170
									
								
								src/video_core/fence_manager.h
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,170 @@
 | 
				
			|||||||
 | 
					// Copyright 2020 yuzu Emulator Project
 | 
				
			||||||
 | 
					// Licensed under GPLv2 or any later version
 | 
				
			||||||
 | 
					// Refer to the license.txt file included.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <algorithm>
 | 
				
			||||||
 | 
					#include <array>
 | 
				
			||||||
 | 
					#include <memory>
 | 
				
			||||||
 | 
					#include <queue>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "common/assert.h"
 | 
				
			||||||
 | 
					#include "common/common_types.h"
 | 
				
			||||||
 | 
					#include "core/core.h"
 | 
				
			||||||
 | 
					#include "core/memory.h"
 | 
				
			||||||
 | 
					#include "core/settings.h"
 | 
				
			||||||
 | 
					#include "video_core/gpu.h"
 | 
				
			||||||
 | 
					#include "video_core/memory_manager.h"
 | 
				
			||||||
 | 
					#include "video_core/rasterizer_interface.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace VideoCommon {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class FenceBase {
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					    FenceBase(u32 payload, bool is_stubbed)
 | 
				
			||||||
 | 
					        : address{}, payload{payload}, is_semaphore{false}, is_stubbed{is_stubbed} {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    FenceBase(GPUVAddr address, u32 payload, bool is_stubbed)
 | 
				
			||||||
 | 
					        : address{address}, payload{payload}, is_semaphore{true}, is_stubbed{is_stubbed} {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    GPUVAddr GetAddress() const {
 | 
				
			||||||
 | 
					        return address;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    u32 GetPayload() const {
 | 
				
			||||||
 | 
					        return payload;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bool IsSemaphore() const {
 | 
				
			||||||
 | 
					        return is_semaphore;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					private:
 | 
				
			||||||
 | 
					    GPUVAddr address;
 | 
				
			||||||
 | 
					    u32 payload;
 | 
				
			||||||
 | 
					    bool is_semaphore;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					protected:
 | 
				
			||||||
 | 
					    bool is_stubbed;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache>
 | 
				
			||||||
 | 
					class FenceManager {
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					    void SignalSemaphore(GPUVAddr addr, u32 value) {
 | 
				
			||||||
 | 
					        TryReleasePendingFences();
 | 
				
			||||||
 | 
					        const bool should_flush = ShouldFlush();
 | 
				
			||||||
 | 
					        CommitAsyncFlushes();
 | 
				
			||||||
 | 
					        TFence new_fence = CreateFence(addr, value, !should_flush);
 | 
				
			||||||
 | 
					        fences.push(new_fence);
 | 
				
			||||||
 | 
					        QueueFence(new_fence);
 | 
				
			||||||
 | 
					        if (should_flush) {
 | 
				
			||||||
 | 
					            rasterizer.FlushCommands();
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        rasterizer.SyncGuestHost();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void SignalSyncPoint(u32 value) {
 | 
				
			||||||
 | 
					        TryReleasePendingFences();
 | 
				
			||||||
 | 
					        const bool should_flush = ShouldFlush();
 | 
				
			||||||
 | 
					        CommitAsyncFlushes();
 | 
				
			||||||
 | 
					        TFence new_fence = CreateFence(value, !should_flush);
 | 
				
			||||||
 | 
					        fences.push(new_fence);
 | 
				
			||||||
 | 
					        QueueFence(new_fence);
 | 
				
			||||||
 | 
					        if (should_flush) {
 | 
				
			||||||
 | 
					            rasterizer.FlushCommands();
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        rasterizer.SyncGuestHost();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void WaitPendingFences() {
 | 
				
			||||||
 | 
					        auto& gpu{system.GPU()};
 | 
				
			||||||
 | 
					        auto& memory_manager{gpu.MemoryManager()};
 | 
				
			||||||
 | 
					        while (!fences.empty()) {
 | 
				
			||||||
 | 
					            TFence& current_fence = fences.front();
 | 
				
			||||||
 | 
					            if (ShouldWait()) {
 | 
				
			||||||
 | 
					                WaitFence(current_fence);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            PopAsyncFlushes();
 | 
				
			||||||
 | 
					            if (current_fence->IsSemaphore()) {
 | 
				
			||||||
 | 
					                memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
 | 
				
			||||||
 | 
					            } else {
 | 
				
			||||||
 | 
					                gpu.IncrementSyncPoint(current_fence->GetPayload());
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            fences.pop();
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					protected:
 | 
				
			||||||
 | 
					    FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
 | 
				
			||||||
 | 
					                 TTextureCache& texture_cache, TTBufferCache& buffer_cache,
 | 
				
			||||||
 | 
					                 TQueryCache& query_cache)
 | 
				
			||||||
 | 
					        : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache},
 | 
				
			||||||
 | 
					          buffer_cache{buffer_cache}, query_cache{query_cache} {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    virtual ~FenceManager() {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is
 | 
				
			||||||
 | 
					    /// true
 | 
				
			||||||
 | 
					    virtual TFence CreateFence(u32 value, bool is_stubbed) = 0;
 | 
				
			||||||
 | 
					    /// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
 | 
				
			||||||
 | 
					    virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0;
 | 
				
			||||||
 | 
					    /// Queues a fence into the backend if the fence isn't stubbed.
 | 
				
			||||||
 | 
					    virtual void QueueFence(TFence& fence) = 0;
 | 
				
			||||||
 | 
					    /// Notifies that the backend fence has been signaled/reached in host GPU.
 | 
				
			||||||
 | 
					    virtual bool IsFenceSignaled(TFence& fence) const = 0;
 | 
				
			||||||
 | 
					    /// Waits until a fence has been signalled by the host GPU.
 | 
				
			||||||
 | 
					    virtual void WaitFence(TFence& fence) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Core::System& system;
 | 
				
			||||||
 | 
					    VideoCore::RasterizerInterface& rasterizer;
 | 
				
			||||||
 | 
					    TTextureCache& texture_cache;
 | 
				
			||||||
 | 
					    TTBufferCache& buffer_cache;
 | 
				
			||||||
 | 
					    TQueryCache& query_cache;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					private:
 | 
				
			||||||
 | 
					    void TryReleasePendingFences() {
 | 
				
			||||||
 | 
					        auto& gpu{system.GPU()};
 | 
				
			||||||
 | 
					        auto& memory_manager{gpu.MemoryManager()};
 | 
				
			||||||
 | 
					        while (!fences.empty()) {
 | 
				
			||||||
 | 
					            TFence& current_fence = fences.front();
 | 
				
			||||||
 | 
					            if (ShouldWait() && !IsFenceSignaled(current_fence)) {
 | 
				
			||||||
 | 
					                return;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            PopAsyncFlushes();
 | 
				
			||||||
 | 
					            if (current_fence->IsSemaphore()) {
 | 
				
			||||||
 | 
					                memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
 | 
				
			||||||
 | 
					            } else {
 | 
				
			||||||
 | 
					                gpu.IncrementSyncPoint(current_fence->GetPayload());
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            fences.pop();
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bool ShouldWait() const {
 | 
				
			||||||
 | 
					        return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() ||
 | 
				
			||||||
 | 
					               query_cache.ShouldWaitAsyncFlushes();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bool ShouldFlush() const {
 | 
				
			||||||
 | 
					        return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() ||
 | 
				
			||||||
 | 
					               query_cache.HasUncommittedFlushes();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void PopAsyncFlushes() {
 | 
				
			||||||
 | 
					        texture_cache.PopAsyncFlushes();
 | 
				
			||||||
 | 
					        buffer_cache.PopAsyncFlushes();
 | 
				
			||||||
 | 
					        query_cache.PopAsyncFlushes();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void CommitAsyncFlushes() {
 | 
				
			||||||
 | 
					        texture_cache.CommitAsyncFlushes();
 | 
				
			||||||
 | 
					        buffer_cache.CommitAsyncFlushes();
 | 
				
			||||||
 | 
					        query_cache.CommitAsyncFlushes();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::queue<TFence> fences;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					} // namespace VideoCommon
 | 
				
			||||||
@ -125,6 +125,28 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
 | 
				
			|||||||
    return true;
 | 
					    return true;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
 | 
				
			||||||
 | 
					    std::unique_lock lck{flush_request_mutex};
 | 
				
			||||||
 | 
					    const u64 fence = ++last_flush_fence;
 | 
				
			||||||
 | 
					    flush_requests.emplace_back(fence, addr, size);
 | 
				
			||||||
 | 
					    return fence;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void GPU::TickWork() {
 | 
				
			||||||
 | 
					    std::unique_lock lck{flush_request_mutex};
 | 
				
			||||||
 | 
					    while (!flush_requests.empty()) {
 | 
				
			||||||
 | 
					        auto& request = flush_requests.front();
 | 
				
			||||||
 | 
					        const u64 fence = request.fence;
 | 
				
			||||||
 | 
					        const VAddr addr = request.addr;
 | 
				
			||||||
 | 
					        const std::size_t size = request.size;
 | 
				
			||||||
 | 
					        flush_requests.pop_front();
 | 
				
			||||||
 | 
					        flush_request_mutex.unlock();
 | 
				
			||||||
 | 
					        renderer->Rasterizer().FlushRegion(addr, size);
 | 
				
			||||||
 | 
					        current_flush_fence.store(fence);
 | 
				
			||||||
 | 
					        flush_request_mutex.lock();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
u64 GPU::GetTicks() const {
 | 
					u64 GPU::GetTicks() const {
 | 
				
			||||||
    // This values were reversed engineered by fincs from NVN
 | 
					    // This values were reversed engineered by fincs from NVN
 | 
				
			||||||
    // The gpu clock is reported in units of 385/625 nanoseconds
 | 
					    // The gpu clock is reported in units of 385/625 nanoseconds
 | 
				
			||||||
@ -142,6 +164,13 @@ void GPU::FlushCommands() {
 | 
				
			|||||||
    renderer->Rasterizer().FlushCommands();
 | 
					    renderer->Rasterizer().FlushCommands();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void GPU::SyncGuestHost() {
 | 
				
			||||||
 | 
					    renderer->Rasterizer().SyncGuestHost();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void GPU::OnCommandListEnd() {
 | 
				
			||||||
 | 
					    renderer->Rasterizer().ReleaseFences();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
 | 
					// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
 | 
				
			||||||
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
 | 
					// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
 | 
				
			||||||
// So the values you see in docs might be multiplied by 4.
 | 
					// So the values you see in docs might be multiplied by 4.
 | 
				
			||||||
 | 
				
			|||||||
@ -155,7 +155,23 @@ public:
 | 
				
			|||||||
    /// Calls a GPU method.
 | 
					    /// Calls a GPU method.
 | 
				
			||||||
    void CallMethod(const MethodCall& method_call);
 | 
					    void CallMethod(const MethodCall& method_call);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Flush all current written commands into the host GPU for execution.
 | 
				
			||||||
    void FlushCommands();
 | 
					    void FlushCommands();
 | 
				
			||||||
 | 
					    /// Synchronizes CPU writes with Host GPU memory.
 | 
				
			||||||
 | 
					    void SyncGuestHost();
 | 
				
			||||||
 | 
					    /// Signal the ending of command list.
 | 
				
			||||||
 | 
					    virtual void OnCommandListEnd();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Request a host GPU memory flush from the CPU.
 | 
				
			||||||
 | 
					    u64 RequestFlush(VAddr addr, std::size_t size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Obtains current flush request fence id.
 | 
				
			||||||
 | 
					    u64 CurrentFlushRequestFence() const {
 | 
				
			||||||
 | 
					        return current_flush_fence.load(std::memory_order_relaxed);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Tick pending requests within the GPU.
 | 
				
			||||||
 | 
					    void TickWork();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Returns a reference to the Maxwell3D GPU engine.
 | 
					    /// Returns a reference to the Maxwell3D GPU engine.
 | 
				
			||||||
    Engines::Maxwell3D& Maxwell3D();
 | 
					    Engines::Maxwell3D& Maxwell3D();
 | 
				
			||||||
@ -325,6 +341,19 @@ private:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    std::condition_variable sync_cv;
 | 
					    std::condition_variable sync_cv;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    struct FlushRequest {
 | 
				
			||||||
 | 
					        FlushRequest(u64 fence, VAddr addr, std::size_t size)
 | 
				
			||||||
 | 
					            : fence{fence}, addr{addr}, size{size} {}
 | 
				
			||||||
 | 
					        u64 fence;
 | 
				
			||||||
 | 
					        VAddr addr;
 | 
				
			||||||
 | 
					        std::size_t size;
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::list<FlushRequest> flush_requests;
 | 
				
			||||||
 | 
					    std::atomic<u64> current_flush_fence{};
 | 
				
			||||||
 | 
					    u64 last_flush_fence{};
 | 
				
			||||||
 | 
					    std::mutex flush_request_mutex;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const bool is_async;
 | 
					    const bool is_async;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -52,4 +52,8 @@ void GPUAsynch::WaitIdle() const {
 | 
				
			|||||||
    gpu_thread.WaitIdle();
 | 
					    gpu_thread.WaitIdle();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void GPUAsynch::OnCommandListEnd() {
 | 
				
			||||||
 | 
					    gpu_thread.OnCommandListEnd();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
} // namespace VideoCommon
 | 
					} // namespace VideoCommon
 | 
				
			||||||
 | 
				
			|||||||
@ -32,6 +32,8 @@ public:
 | 
				
			|||||||
    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
 | 
					    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
    void WaitIdle() const override;
 | 
					    void WaitIdle() const override;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void OnCommandListEnd() override;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
protected:
 | 
					protected:
 | 
				
			||||||
    void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
 | 
					    void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -6,6 +6,7 @@
 | 
				
			|||||||
#include "common/microprofile.h"
 | 
					#include "common/microprofile.h"
 | 
				
			||||||
#include "core/core.h"
 | 
					#include "core/core.h"
 | 
				
			||||||
#include "core/frontend/emu_window.h"
 | 
					#include "core/frontend/emu_window.h"
 | 
				
			||||||
 | 
					#include "core/settings.h"
 | 
				
			||||||
#include "video_core/dma_pusher.h"
 | 
					#include "video_core/dma_pusher.h"
 | 
				
			||||||
#include "video_core/gpu.h"
 | 
					#include "video_core/gpu.h"
 | 
				
			||||||
#include "video_core/gpu_thread.h"
 | 
					#include "video_core/gpu_thread.h"
 | 
				
			||||||
@ -14,8 +15,9 @@
 | 
				
			|||||||
namespace VideoCommon::GPUThread {
 | 
					namespace VideoCommon::GPUThread {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/// Runs the GPU thread
 | 
					/// Runs the GPU thread
 | 
				
			||||||
static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
 | 
					static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
 | 
				
			||||||
                      Tegra::DmaPusher& dma_pusher, SynchState& state) {
 | 
					                      Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
 | 
				
			||||||
 | 
					                      SynchState& state) {
 | 
				
			||||||
    MicroProfileOnThreadCreate("GpuThread");
 | 
					    MicroProfileOnThreadCreate("GpuThread");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Wait for first GPU command before acquiring the window context
 | 
					    // Wait for first GPU command before acquiring the window context
 | 
				
			||||||
@ -37,10 +39,14 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic
 | 
				
			|||||||
            dma_pusher.DispatchCalls();
 | 
					            dma_pusher.DispatchCalls();
 | 
				
			||||||
        } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
 | 
					        } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
 | 
				
			||||||
            renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
 | 
					            renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
 | 
				
			||||||
 | 
					        } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) {
 | 
				
			||||||
 | 
					            renderer.Rasterizer().ReleaseFences();
 | 
				
			||||||
 | 
					        } else if (const auto data = std::get_if<GPUTickCommand>(&next.data)) {
 | 
				
			||||||
 | 
					            system.GPU().TickWork();
 | 
				
			||||||
        } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
 | 
					        } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
 | 
				
			||||||
            renderer.Rasterizer().FlushRegion(data->addr, data->size);
 | 
					            renderer.Rasterizer().FlushRegion(data->addr, data->size);
 | 
				
			||||||
        } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
 | 
					        } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
 | 
				
			||||||
            renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
 | 
					            renderer.Rasterizer().OnCPUWrite(data->addr, data->size);
 | 
				
			||||||
        } else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
 | 
					        } else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
 | 
				
			||||||
            return;
 | 
					            return;
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
@ -65,8 +71,8 @@ ThreadManager::~ThreadManager() {
 | 
				
			|||||||
void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
 | 
					void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
 | 
				
			||||||
                                Core::Frontend::GraphicsContext& context,
 | 
					                                Core::Frontend::GraphicsContext& context,
 | 
				
			||||||
                                Tegra::DmaPusher& dma_pusher) {
 | 
					                                Tegra::DmaPusher& dma_pusher) {
 | 
				
			||||||
    thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher),
 | 
					    thread = std::thread{RunThread,         std::ref(system),     std::ref(renderer),
 | 
				
			||||||
                         std::ref(state)};
 | 
					                         std::ref(context), std::ref(dma_pusher), std::ref(state)};
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
 | 
					void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
 | 
				
			||||||
@ -78,16 +84,29 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ThreadManager::FlushRegion(VAddr addr, u64 size) {
 | 
					void ThreadManager::FlushRegion(VAddr addr, u64 size) {
 | 
				
			||||||
 | 
					    if (!Settings::IsGPULevelHigh()) {
 | 
				
			||||||
        PushCommand(FlushRegionCommand(addr, size));
 | 
					        PushCommand(FlushRegionCommand(addr, size));
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if (!Settings::IsGPULevelExtreme()) {
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) {
 | 
				
			||||||
 | 
					        auto& gpu = system.GPU();
 | 
				
			||||||
 | 
					        u64 fence = gpu.RequestFlush(addr, size);
 | 
				
			||||||
 | 
					        PushCommand(GPUTickCommand());
 | 
				
			||||||
 | 
					        while (fence > gpu.CurrentFlushRequestFence()) {
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
 | 
					void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
 | 
				
			||||||
    system.Renderer().Rasterizer().InvalidateRegion(addr, size);
 | 
					    system.Renderer().Rasterizer().OnCPUWrite(addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
 | 
					void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
 | 
				
			||||||
    // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
 | 
					    // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
 | 
				
			||||||
    InvalidateRegion(addr, size);
 | 
					    system.Renderer().Rasterizer().OnCPUWrite(addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ThreadManager::WaitIdle() const {
 | 
					void ThreadManager::WaitIdle() const {
 | 
				
			||||||
@ -95,6 +114,10 @@ void ThreadManager::WaitIdle() const {
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void ThreadManager::OnCommandListEnd() {
 | 
				
			||||||
 | 
					    PushCommand(OnCommandListEndCommand());
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
u64 ThreadManager::PushCommand(CommandData&& command_data) {
 | 
					u64 ThreadManager::PushCommand(CommandData&& command_data) {
 | 
				
			||||||
    const u64 fence{++state.last_fence};
 | 
					    const u64 fence{++state.last_fence};
 | 
				
			||||||
    state.queue.Push(CommandDataContainer(std::move(command_data), fence));
 | 
					    state.queue.Push(CommandDataContainer(std::move(command_data), fence));
 | 
				
			||||||
 | 
				
			|||||||
@ -70,9 +70,16 @@ struct FlushAndInvalidateRegionCommand final {
 | 
				
			|||||||
    u64 size;
 | 
					    u64 size;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// Command called within the gpu, to schedule actions after a command list end
 | 
				
			||||||
 | 
					struct OnCommandListEndCommand final {};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// Command to make the gpu look into pending requests
 | 
				
			||||||
 | 
					struct GPUTickCommand final {};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
using CommandData =
 | 
					using CommandData =
 | 
				
			||||||
    std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
 | 
					    std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
 | 
				
			||||||
                 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
 | 
					                 InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand,
 | 
				
			||||||
 | 
					                 GPUTickCommand>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct CommandDataContainer {
 | 
					struct CommandDataContainer {
 | 
				
			||||||
    CommandDataContainer() = default;
 | 
					    CommandDataContainer() = default;
 | 
				
			||||||
@ -122,6 +129,8 @@ public:
 | 
				
			|||||||
    // Wait until the gpu thread is idle.
 | 
					    // Wait until the gpu thread is idle.
 | 
				
			||||||
    void WaitIdle() const;
 | 
					    void WaitIdle() const;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void OnCommandListEnd();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
private:
 | 
					private:
 | 
				
			||||||
    /// Pushes a command to be executed by the GPU thread
 | 
					    /// Pushes a command to be executed by the GPU thread
 | 
				
			||||||
    u64 PushCommand(CommandData&& command_data);
 | 
					    u64 PushCommand(CommandData&& command_data);
 | 
				
			||||||
 | 
				
			|||||||
@ -12,10 +12,12 @@
 | 
				
			|||||||
#include <mutex>
 | 
					#include <mutex>
 | 
				
			||||||
#include <optional>
 | 
					#include <optional>
 | 
				
			||||||
#include <unordered_map>
 | 
					#include <unordered_map>
 | 
				
			||||||
 | 
					#include <unordered_set>
 | 
				
			||||||
#include <vector>
 | 
					#include <vector>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "common/assert.h"
 | 
					#include "common/assert.h"
 | 
				
			||||||
#include "core/core.h"
 | 
					#include "core/core.h"
 | 
				
			||||||
 | 
					#include "core/settings.h"
 | 
				
			||||||
#include "video_core/engines/maxwell_3d.h"
 | 
					#include "video_core/engines/maxwell_3d.h"
 | 
				
			||||||
#include "video_core/gpu.h"
 | 
					#include "video_core/gpu.h"
 | 
				
			||||||
#include "video_core/memory_manager.h"
 | 
					#include "video_core/memory_manager.h"
 | 
				
			||||||
@ -130,6 +132,9 @@ public:
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        query->BindCounter(Stream(type).Current(), timestamp);
 | 
					        query->BindCounter(Stream(type).Current(), timestamp);
 | 
				
			||||||
 | 
					        if (Settings::values.use_asynchronous_gpu_emulation) {
 | 
				
			||||||
 | 
					            AsyncFlushQuery(cpu_addr);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
 | 
					    /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
 | 
				
			||||||
@ -170,6 +175,37 @@ public:
 | 
				
			|||||||
        return streams[static_cast<std::size_t>(type)];
 | 
					        return streams[static_cast<std::size_t>(type)];
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void CommitAsyncFlushes() {
 | 
				
			||||||
 | 
					        committed_flushes.push_back(uncommitted_flushes);
 | 
				
			||||||
 | 
					        uncommitted_flushes.reset();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bool HasUncommittedFlushes() const {
 | 
				
			||||||
 | 
					        return uncommitted_flushes != nullptr;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bool ShouldWaitAsyncFlushes() const {
 | 
				
			||||||
 | 
					        if (committed_flushes.empty()) {
 | 
				
			||||||
 | 
					            return false;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        return committed_flushes.front() != nullptr;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void PopAsyncFlushes() {
 | 
				
			||||||
 | 
					        if (committed_flushes.empty()) {
 | 
				
			||||||
 | 
					            return;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        auto& flush_list = committed_flushes.front();
 | 
				
			||||||
 | 
					        if (!flush_list) {
 | 
				
			||||||
 | 
					            committed_flushes.pop_front();
 | 
				
			||||||
 | 
					            return;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        for (VAddr query_address : *flush_list) {
 | 
				
			||||||
 | 
					            FlushAndRemoveRegion(query_address, 4);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        committed_flushes.pop_front();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
protected:
 | 
					protected:
 | 
				
			||||||
    std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
 | 
					    std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -224,6 +260,13 @@ private:
 | 
				
			|||||||
        return found != std::end(contents) ? &*found : nullptr;
 | 
					        return found != std::end(contents) ? &*found : nullptr;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void AsyncFlushQuery(VAddr addr) {
 | 
				
			||||||
 | 
					        if (!uncommitted_flushes) {
 | 
				
			||||||
 | 
					            uncommitted_flushes = std::make_shared<std::unordered_set<VAddr>>();
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        uncommitted_flushes->insert(addr);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    static constexpr std::uintptr_t PAGE_SIZE = 4096;
 | 
					    static constexpr std::uintptr_t PAGE_SIZE = 4096;
 | 
				
			||||||
    static constexpr unsigned PAGE_SHIFT = 12;
 | 
					    static constexpr unsigned PAGE_SHIFT = 12;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -235,6 +278,9 @@ private:
 | 
				
			|||||||
    std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
 | 
					    std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::array<CounterStream, VideoCore::NumQueryTypes> streams;
 | 
					    std::array<CounterStream, VideoCore::NumQueryTypes> streams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::shared_ptr<std::unordered_set<VAddr>> uncommitted_flushes{};
 | 
				
			||||||
 | 
					    std::list<std::shared_ptr<std::unordered_set<VAddr>>> committed_flushes;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template <class QueryCache, class HostCounter>
 | 
					template <class QueryCache, class HostCounter>
 | 
				
			||||||
 | 
				
			|||||||
@ -49,15 +49,33 @@ public:
 | 
				
			|||||||
    /// Records a GPU query and caches it
 | 
					    /// Records a GPU query and caches it
 | 
				
			||||||
    virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
 | 
					    virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Signal a GPU based semaphore as a fence
 | 
				
			||||||
 | 
					    virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Signal a GPU based syncpoint as a fence
 | 
				
			||||||
 | 
					    virtual void SignalSyncPoint(u32 value) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Release all pending fences.
 | 
				
			||||||
 | 
					    virtual void ReleaseFences() = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Notify rasterizer that all caches should be flushed to Switch memory
 | 
					    /// Notify rasterizer that all caches should be flushed to Switch memory
 | 
				
			||||||
    virtual void FlushAll() = 0;
 | 
					    virtual void FlushAll() = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | 
					    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | 
				
			||||||
    virtual void FlushRegion(VAddr addr, u64 size) = 0;
 | 
					    virtual void FlushRegion(VAddr addr, u64 size) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Check if the the specified memory area requires flushing to CPU Memory.
 | 
				
			||||||
 | 
					    virtual bool MustFlushRegion(VAddr addr, u64 size) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Notify rasterizer that any caches of the specified region should be invalidated
 | 
					    /// Notify rasterizer that any caches of the specified region should be invalidated
 | 
				
			||||||
    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
 | 
					    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Notify rasterizer that any caches of the specified region are desync with guest
 | 
				
			||||||
 | 
					    virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Sync memory between guest and host.
 | 
				
			||||||
 | 
					    virtual void SyncGuestHost() = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | 
					    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | 
				
			||||||
    /// and invalidated
 | 
					    /// and invalidated
 | 
				
			||||||
    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
 | 
					    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
 | 
				
			||||||
 | 
				
			|||||||
@ -52,7 +52,7 @@ Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void OGLBufferCache::WriteBarrier() {
 | 
					void OGLBufferCache::WriteBarrier() {
 | 
				
			||||||
    glMemoryBarrier(GL_ALL_BARRIER_BITS);
 | 
					    glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
 | 
					GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
 | 
				
			||||||
@ -72,6 +72,7 @@ void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, s
 | 
				
			|||||||
void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
 | 
					void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
 | 
				
			||||||
                                       u8* data) {
 | 
					                                       u8* data) {
 | 
				
			||||||
    MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
 | 
					    MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
 | 
				
			||||||
 | 
					    glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
 | 
				
			||||||
    glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
 | 
					    glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
 | 
				
			||||||
                            static_cast<GLsizeiptr>(size), data);
 | 
					                            static_cast<GLsizeiptr>(size), data);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										72
									
								
								src/video_core/renderer_opengl/gl_fence_manager.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								src/video_core/renderer_opengl/gl_fence_manager.cpp
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,72 @@
 | 
				
			|||||||
 | 
					// Copyright 2020 yuzu Emulator Project
 | 
				
			||||||
 | 
					// Licensed under GPLv2 or any later version
 | 
				
			||||||
 | 
					// Refer to the license.txt file included.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "common/assert.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "video_core/renderer_opengl/gl_fence_manager.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace OpenGL {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed)
 | 
				
			||||||
 | 
					    : VideoCommon::FenceBase(payload, is_stubbed), sync_object{} {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed)
 | 
				
			||||||
 | 
					    : VideoCommon::FenceBase(address, payload, is_stubbed), sync_object{} {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					GLInnerFence::~GLInnerFence() = default;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void GLInnerFence::Queue() {
 | 
				
			||||||
 | 
					    if (is_stubbed) {
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    ASSERT(sync_object.handle == 0);
 | 
				
			||||||
 | 
					    sync_object.Create();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool GLInnerFence::IsSignaled() const {
 | 
				
			||||||
 | 
					    if (is_stubbed) {
 | 
				
			||||||
 | 
					        return true;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    ASSERT(sync_object.handle != 0);
 | 
				
			||||||
 | 
					    GLsizei length;
 | 
				
			||||||
 | 
					    GLint sync_status;
 | 
				
			||||||
 | 
					    glGetSynciv(sync_object.handle, GL_SYNC_STATUS, sizeof(GLint), &length, &sync_status);
 | 
				
			||||||
 | 
					    return sync_status == GL_SIGNALED;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void GLInnerFence::Wait() {
 | 
				
			||||||
 | 
					    if (is_stubbed) {
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    ASSERT(sync_object.handle != 0);
 | 
				
			||||||
 | 
					    glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system,
 | 
				
			||||||
 | 
					                                       VideoCore::RasterizerInterface& rasterizer,
 | 
				
			||||||
 | 
					                                       TextureCacheOpenGL& texture_cache,
 | 
				
			||||||
 | 
					                                       OGLBufferCache& buffer_cache, QueryCache& query_cache)
 | 
				
			||||||
 | 
					    : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache) {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
 | 
				
			||||||
 | 
					    return std::make_shared<GLInnerFence>(value, is_stubbed);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
 | 
				
			||||||
 | 
					    return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void FenceManagerOpenGL::QueueFence(Fence& fence) {
 | 
				
			||||||
 | 
					    fence->Queue();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool FenceManagerOpenGL::IsFenceSignaled(Fence& fence) const {
 | 
				
			||||||
 | 
					    return fence->IsSignaled();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void FenceManagerOpenGL::WaitFence(Fence& fence) {
 | 
				
			||||||
 | 
					    fence->Wait();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					} // namespace OpenGL
 | 
				
			||||||
							
								
								
									
										53
									
								
								src/video_core/renderer_opengl/gl_fence_manager.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								src/video_core/renderer_opengl/gl_fence_manager.h
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,53 @@
 | 
				
			|||||||
 | 
					// Copyright 2020 yuzu Emulator Project
 | 
				
			||||||
 | 
					// Licensed under GPLv2 or any later version
 | 
				
			||||||
 | 
					// Refer to the license.txt file included.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <memory>
 | 
				
			||||||
 | 
					#include <glad/glad.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "common/common_types.h"
 | 
				
			||||||
 | 
					#include "video_core/fence_manager.h"
 | 
				
			||||||
 | 
					#include "video_core/renderer_opengl/gl_buffer_cache.h"
 | 
				
			||||||
 | 
					#include "video_core/renderer_opengl/gl_query_cache.h"
 | 
				
			||||||
 | 
					#include "video_core/renderer_opengl/gl_resource_manager.h"
 | 
				
			||||||
 | 
					#include "video_core/renderer_opengl/gl_texture_cache.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace OpenGL {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class GLInnerFence : public VideoCommon::FenceBase {
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					    GLInnerFence(u32 payload, bool is_stubbed);
 | 
				
			||||||
 | 
					    GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed);
 | 
				
			||||||
 | 
					    ~GLInnerFence();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void Queue();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bool IsSignaled() const;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void Wait();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					private:
 | 
				
			||||||
 | 
					    OGLSync sync_object;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					using Fence = std::shared_ptr<GLInnerFence>;
 | 
				
			||||||
 | 
					using GenericFenceManager =
 | 
				
			||||||
 | 
					    VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class FenceManagerOpenGL final : public GenericFenceManager {
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					    FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
 | 
				
			||||||
 | 
					                       TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache,
 | 
				
			||||||
 | 
					                       QueryCache& query_cache);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					protected:
 | 
				
			||||||
 | 
					    Fence CreateFence(u32 value, bool is_stubbed) override;
 | 
				
			||||||
 | 
					    Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
 | 
				
			||||||
 | 
					    void QueueFence(Fence& fence) override;
 | 
				
			||||||
 | 
					    bool IsFenceSignaled(Fence& fence) const override;
 | 
				
			||||||
 | 
					    void WaitFence(Fence& fence) override;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					} // namespace OpenGL
 | 
				
			||||||
@ -99,9 +99,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
 | 
				
			|||||||
                                   ScreenInfo& info, GLShader::ProgramManager& program_manager,
 | 
					                                   ScreenInfo& info, GLShader::ProgramManager& program_manager,
 | 
				
			||||||
                                   StateTracker& state_tracker)
 | 
					                                   StateTracker& state_tracker)
 | 
				
			||||||
    : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker},
 | 
					    : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker},
 | 
				
			||||||
      shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system},
 | 
					      shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
 | 
				
			||||||
      screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker},
 | 
					      buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
 | 
				
			||||||
      buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
 | 
					      fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
 | 
				
			||||||
 | 
					      screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
 | 
				
			||||||
    CheckExtensions();
 | 
					    CheckExtensions();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -599,6 +600,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
 | 
				
			|||||||
    EndTransformFeedback();
 | 
					    EndTransformFeedback();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ++num_queued_commands;
 | 
					    ++num_queued_commands;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    system.GPU().TickWork();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
 | 
					void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
 | 
				
			||||||
@ -649,6 +652,13 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
 | 
				
			|||||||
    query_cache.FlushRegion(addr, size);
 | 
					    query_cache.FlushRegion(addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
 | 
				
			||||||
 | 
					    if (!Settings::IsGPULevelHigh()) {
 | 
				
			||||||
 | 
					        return buffer_cache.MustFlushRegion(addr, size);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
 | 
					void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
 | 
				
			||||||
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
 | 
					    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
 | 
				
			||||||
    if (addr == 0 || size == 0) {
 | 
					    if (addr == 0 || size == 0) {
 | 
				
			||||||
@ -660,8 +670,52 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
 | 
				
			|||||||
    query_cache.InvalidateRegion(addr, size);
 | 
					    query_cache.InvalidateRegion(addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
 | 
				
			||||||
 | 
					    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
 | 
				
			||||||
 | 
					    if (addr == 0 || size == 0) {
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    texture_cache.OnCPUWrite(addr, size);
 | 
				
			||||||
 | 
					    shader_cache.InvalidateRegion(addr, size);
 | 
				
			||||||
 | 
					    buffer_cache.OnCPUWrite(addr, size);
 | 
				
			||||||
 | 
					    query_cache.InvalidateRegion(addr, size);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void RasterizerOpenGL::SyncGuestHost() {
 | 
				
			||||||
 | 
					    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
 | 
				
			||||||
 | 
					    texture_cache.SyncGuestHost();
 | 
				
			||||||
 | 
					    buffer_cache.SyncGuestHost();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
 | 
				
			||||||
 | 
					    auto& gpu{system.GPU()};
 | 
				
			||||||
 | 
					    if (!gpu.IsAsync()) {
 | 
				
			||||||
 | 
					        auto& memory_manager{gpu.MemoryManager()};
 | 
				
			||||||
 | 
					        memory_manager.Write<u32>(addr, value);
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    fence_manager.SignalSemaphore(addr, value);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void RasterizerOpenGL::SignalSyncPoint(u32 value) {
 | 
				
			||||||
 | 
					    auto& gpu{system.GPU()};
 | 
				
			||||||
 | 
					    if (!gpu.IsAsync()) {
 | 
				
			||||||
 | 
					        gpu.IncrementSyncPoint(value);
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    fence_manager.SignalSyncPoint(value);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void RasterizerOpenGL::ReleaseFences() {
 | 
				
			||||||
 | 
					    auto& gpu{system.GPU()};
 | 
				
			||||||
 | 
					    if (!gpu.IsAsync()) {
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    fence_manager.WaitPendingFences();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
 | 
					void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
 | 
				
			||||||
    if (Settings::values.use_accurate_gpu_emulation) {
 | 
					    if (Settings::IsGPULevelExtreme()) {
 | 
				
			||||||
        FlushRegion(addr, size);
 | 
					        FlushRegion(addr, size);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    InvalidateRegion(addr, size);
 | 
					    InvalidateRegion(addr, size);
 | 
				
			||||||
 | 
				
			|||||||
@ -23,6 +23,7 @@
 | 
				
			|||||||
#include "video_core/rasterizer_interface.h"
 | 
					#include "video_core/rasterizer_interface.h"
 | 
				
			||||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
 | 
					#include "video_core/renderer_opengl/gl_buffer_cache.h"
 | 
				
			||||||
#include "video_core/renderer_opengl/gl_device.h"
 | 
					#include "video_core/renderer_opengl/gl_device.h"
 | 
				
			||||||
 | 
					#include "video_core/renderer_opengl/gl_fence_manager.h"
 | 
				
			||||||
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
 | 
					#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
 | 
				
			||||||
#include "video_core/renderer_opengl/gl_query_cache.h"
 | 
					#include "video_core/renderer_opengl/gl_query_cache.h"
 | 
				
			||||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
 | 
					#include "video_core/renderer_opengl/gl_resource_manager.h"
 | 
				
			||||||
@ -66,7 +67,13 @@ public:
 | 
				
			|||||||
    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
 | 
					    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
 | 
				
			||||||
    void FlushAll() override;
 | 
					    void FlushAll() override;
 | 
				
			||||||
    void FlushRegion(VAddr addr, u64 size) override;
 | 
					    void FlushRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
 | 
					    bool MustFlushRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
    void InvalidateRegion(VAddr addr, u64 size) override;
 | 
					    void InvalidateRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
 | 
					    void OnCPUWrite(VAddr addr, u64 size) override;
 | 
				
			||||||
 | 
					    void SyncGuestHost() override;
 | 
				
			||||||
 | 
					    void SignalSemaphore(GPUVAddr addr, u32 value) override;
 | 
				
			||||||
 | 
					    void SignalSyncPoint(u32 value) override;
 | 
				
			||||||
 | 
					    void ReleaseFences() override;
 | 
				
			||||||
    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
 | 
					    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
    void FlushCommands() override;
 | 
					    void FlushCommands() override;
 | 
				
			||||||
    void TickFrame() override;
 | 
					    void TickFrame() override;
 | 
				
			||||||
@ -222,6 +229,8 @@ private:
 | 
				
			|||||||
    SamplerCacheOpenGL sampler_cache;
 | 
					    SamplerCacheOpenGL sampler_cache;
 | 
				
			||||||
    FramebufferCacheOpenGL framebuffer_cache;
 | 
					    FramebufferCacheOpenGL framebuffer_cache;
 | 
				
			||||||
    QueryCache query_cache;
 | 
					    QueryCache query_cache;
 | 
				
			||||||
 | 
					    OGLBufferCache buffer_cache;
 | 
				
			||||||
 | 
					    FenceManagerOpenGL fence_manager;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Core::System& system;
 | 
					    Core::System& system;
 | 
				
			||||||
    ScreenInfo& screen_info;
 | 
					    ScreenInfo& screen_info;
 | 
				
			||||||
@ -229,7 +238,6 @@ private:
 | 
				
			|||||||
    StateTracker& state_tracker;
 | 
					    StateTracker& state_tracker;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
 | 
					    static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
 | 
				
			||||||
    OGLBufferCache buffer_cache;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    GLint vertex_binding = 0;
 | 
					    GLint vertex_binding = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -448,7 +448,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    // Look up shader in the cache based on address
 | 
					    // Look up shader in the cache based on address
 | 
				
			||||||
    const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
 | 
					    const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
 | 
				
			||||||
    Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr};
 | 
					    Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader};
 | 
				
			||||||
    if (shader) {
 | 
					    if (shader) {
 | 
				
			||||||
        return last_shaders[static_cast<std::size_t>(program)] = shader;
 | 
					        return last_shaders[static_cast<std::size_t>(program)] = shader;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -477,7 +477,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
 | 
				
			|||||||
        const std::size_t size_in_bytes = code.size() * sizeof(u64);
 | 
					        const std::size_t size_in_bytes = code.size() * sizeof(u64);
 | 
				
			||||||
        shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
 | 
					        shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (cpu_addr) {
 | 
				
			||||||
        Register(shader);
 | 
					        Register(shader);
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        null_shader = shader;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return last_shaders[static_cast<std::size_t>(program)] = shader;
 | 
					    return last_shaders[static_cast<std::size_t>(program)] = shader;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -486,7 +491,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
 | 
				
			|||||||
    auto& memory_manager{system.GPU().MemoryManager()};
 | 
					    auto& memory_manager{system.GPU().MemoryManager()};
 | 
				
			||||||
    const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
 | 
					    const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr;
 | 
					    auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
 | 
				
			||||||
    if (kernel) {
 | 
					    if (kernel) {
 | 
				
			||||||
        return kernel;
 | 
					        return kernel;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -507,7 +512,11 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
 | 
				
			|||||||
        kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
 | 
					        kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (cpu_addr) {
 | 
				
			||||||
        Register(kernel);
 | 
					        Register(kernel);
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        null_kernel = kernel;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
    return kernel;
 | 
					    return kernel;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -125,6 +125,9 @@ private:
 | 
				
			|||||||
    ShaderDiskCacheOpenGL disk_cache;
 | 
					    ShaderDiskCacheOpenGL disk_cache;
 | 
				
			||||||
    std::unordered_map<u64, PrecompiledShader> runtime_cache;
 | 
					    std::unordered_map<u64, PrecompiledShader> runtime_cache;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Shader null_shader{};
 | 
				
			||||||
 | 
					    Shader null_kernel{};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
 | 
					    std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										101
									
								
								src/video_core/renderer_vulkan/vk_fence_manager.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										101
									
								
								src/video_core/renderer_vulkan/vk_fence_manager.cpp
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,101 @@
 | 
				
			|||||||
 | 
					// Copyright 2020 yuzu Emulator Project
 | 
				
			||||||
 | 
					// Licensed under GPLv2 or any later version
 | 
				
			||||||
 | 
					// Refer to the license.txt file included.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <memory>
 | 
				
			||||||
 | 
					#include <thread>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "video_core/renderer_vulkan/vk_buffer_cache.h"
 | 
				
			||||||
 | 
					#include "video_core/renderer_vulkan/vk_device.h"
 | 
				
			||||||
 | 
					#include "video_core/renderer_vulkan/vk_fence_manager.h"
 | 
				
			||||||
 | 
					#include "video_core/renderer_vulkan/vk_scheduler.h"
 | 
				
			||||||
 | 
					#include "video_core/renderer_vulkan/vk_texture_cache.h"
 | 
				
			||||||
 | 
					#include "video_core/renderer_vulkan/wrapper.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace Vulkan {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, bool is_stubbed)
 | 
				
			||||||
 | 
					    : VideoCommon::FenceBase(payload, is_stubbed), device{device}, scheduler{scheduler} {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address,
 | 
				
			||||||
 | 
					                       u32 payload, bool is_stubbed)
 | 
				
			||||||
 | 
					    : VideoCommon::FenceBase(address, payload, is_stubbed), device{device}, scheduler{scheduler} {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					InnerFence::~InnerFence() = default;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void InnerFence::Queue() {
 | 
				
			||||||
 | 
					    if (is_stubbed) {
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    ASSERT(!event);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    event = device.GetLogical().CreateEvent();
 | 
				
			||||||
 | 
					    ticks = scheduler.Ticks();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    scheduler.RequestOutsideRenderPassOperationContext();
 | 
				
			||||||
 | 
					    scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) {
 | 
				
			||||||
 | 
					        cmdbuf.SetEvent(event, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool InnerFence::IsSignaled() const {
 | 
				
			||||||
 | 
					    if (is_stubbed) {
 | 
				
			||||||
 | 
					        return true;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    ASSERT(event);
 | 
				
			||||||
 | 
					    return IsEventSignalled();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void InnerFence::Wait() {
 | 
				
			||||||
 | 
					    if (is_stubbed) {
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    ASSERT(event);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (ticks >= scheduler.Ticks()) {
 | 
				
			||||||
 | 
					        scheduler.Flush();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    while (!IsEventSignalled()) {
 | 
				
			||||||
 | 
					        std::this_thread::yield();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool InnerFence::IsEventSignalled() const {
 | 
				
			||||||
 | 
					    switch (const VkResult result = event.GetStatus()) {
 | 
				
			||||||
 | 
					    case VK_EVENT_SET:
 | 
				
			||||||
 | 
					        return true;
 | 
				
			||||||
 | 
					    case VK_EVENT_RESET:
 | 
				
			||||||
 | 
					        return false;
 | 
				
			||||||
 | 
					    default:
 | 
				
			||||||
 | 
					        throw vk::Exception(result);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
 | 
				
			||||||
 | 
					                               const VKDevice& device, VKScheduler& scheduler,
 | 
				
			||||||
 | 
					                               VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
 | 
				
			||||||
 | 
					                               VKQueryCache& query_cache)
 | 
				
			||||||
 | 
					    : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache),
 | 
				
			||||||
 | 
					      device{device}, scheduler{scheduler} {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) {
 | 
				
			||||||
 | 
					    return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Fence VKFenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
 | 
				
			||||||
 | 
					    return std::make_shared<InnerFence>(device, scheduler, addr, value, is_stubbed);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void VKFenceManager::QueueFence(Fence& fence) {
 | 
				
			||||||
 | 
					    fence->Queue();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool VKFenceManager::IsFenceSignaled(Fence& fence) const {
 | 
				
			||||||
 | 
					    return fence->IsSignaled();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void VKFenceManager::WaitFence(Fence& fence) {
 | 
				
			||||||
 | 
					    fence->Wait();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					} // namespace Vulkan
 | 
				
			||||||
							
								
								
									
										74
									
								
								src/video_core/renderer_vulkan/vk_fence_manager.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								src/video_core/renderer_vulkan/vk_fence_manager.h
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,74 @@
 | 
				
			|||||||
 | 
					// Copyright 2020 yuzu Emulator Project
 | 
				
			||||||
 | 
					// Licensed under GPLv2 or any later version
 | 
				
			||||||
 | 
					// Refer to the license.txt file included.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <memory>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "video_core/fence_manager.h"
 | 
				
			||||||
 | 
					#include "video_core/renderer_vulkan/wrapper.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace Core {
 | 
				
			||||||
 | 
					class System;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace VideoCore {
 | 
				
			||||||
 | 
					class RasterizerInterface;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace Vulkan {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class VKBufferCache;
 | 
				
			||||||
 | 
					class VKDevice;
 | 
				
			||||||
 | 
					class VKQueryCache;
 | 
				
			||||||
 | 
					class VKScheduler;
 | 
				
			||||||
 | 
					class VKTextureCache;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class InnerFence : public VideoCommon::FenceBase {
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					    explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload,
 | 
				
			||||||
 | 
					                        bool is_stubbed);
 | 
				
			||||||
 | 
					    explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address,
 | 
				
			||||||
 | 
					                        u32 payload, bool is_stubbed);
 | 
				
			||||||
 | 
					    ~InnerFence();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void Queue();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bool IsSignaled() const;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void Wait();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					private:
 | 
				
			||||||
 | 
					    bool IsEventSignalled() const;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const VKDevice& device;
 | 
				
			||||||
 | 
					    VKScheduler& scheduler;
 | 
				
			||||||
 | 
					    vk::Event event;
 | 
				
			||||||
 | 
					    u64 ticks = 0;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					using Fence = std::shared_ptr<InnerFence>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					using GenericFenceManager =
 | 
				
			||||||
 | 
					    VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class VKFenceManager final : public GenericFenceManager {
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					    explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
 | 
				
			||||||
 | 
					                            const VKDevice& device, VKScheduler& scheduler,
 | 
				
			||||||
 | 
					                            VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
 | 
				
			||||||
 | 
					                            VKQueryCache& query_cache);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					protected:
 | 
				
			||||||
 | 
					    Fence CreateFence(u32 value, bool is_stubbed) override;
 | 
				
			||||||
 | 
					    Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
 | 
				
			||||||
 | 
					    void QueueFence(Fence& fence) override;
 | 
				
			||||||
 | 
					    bool IsFenceSignaled(Fence& fence) const override;
 | 
				
			||||||
 | 
					    void WaitFence(Fence& fence) override;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					private:
 | 
				
			||||||
 | 
					    const VKDevice& device;
 | 
				
			||||||
 | 
					    VKScheduler& scheduler;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					} // namespace Vulkan
 | 
				
			||||||
@ -207,7 +207,7 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
 | 
				
			|||||||
        const GPUVAddr program_addr{GetShaderAddress(system, program)};
 | 
					        const GPUVAddr program_addr{GetShaderAddress(system, program)};
 | 
				
			||||||
        const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
 | 
					        const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
 | 
				
			||||||
        ASSERT(cpu_addr);
 | 
					        ASSERT(cpu_addr);
 | 
				
			||||||
        auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
 | 
					        auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
 | 
				
			||||||
        if (!shader) {
 | 
					        if (!shader) {
 | 
				
			||||||
            const auto host_ptr{memory_manager.GetPointer(program_addr)};
 | 
					            const auto host_ptr{memory_manager.GetPointer(program_addr)};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -218,7 +218,11 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
            shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
 | 
					            shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
 | 
				
			||||||
                                                    std::move(code), stage_offset);
 | 
					                                                    std::move(code), stage_offset);
 | 
				
			||||||
 | 
					            if (cpu_addr) {
 | 
				
			||||||
                Register(shader);
 | 
					                Register(shader);
 | 
				
			||||||
 | 
					            } else {
 | 
				
			||||||
 | 
					                null_shader = shader;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        shaders[index] = std::move(shader);
 | 
					        shaders[index] = std::move(shader);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -261,7 +265,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
 | 
				
			|||||||
    const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
 | 
					    const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
 | 
				
			||||||
    ASSERT(cpu_addr);
 | 
					    ASSERT(cpu_addr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
 | 
					    auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
 | 
				
			||||||
    if (!shader) {
 | 
					    if (!shader) {
 | 
				
			||||||
        // No shader found - create a new one
 | 
					        // No shader found - create a new one
 | 
				
			||||||
        const auto host_ptr = memory_manager.GetPointer(program_addr);
 | 
					        const auto host_ptr = memory_manager.GetPointer(program_addr);
 | 
				
			||||||
@ -271,7 +275,11 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
 | 
				
			|||||||
        shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
 | 
					        shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
 | 
				
			||||||
                                                program_addr, *cpu_addr, std::move(code),
 | 
					                                                program_addr, *cpu_addr, std::move(code),
 | 
				
			||||||
                                                kernel_main_offset);
 | 
					                                                kernel_main_offset);
 | 
				
			||||||
 | 
					        if (cpu_addr) {
 | 
				
			||||||
            Register(shader);
 | 
					            Register(shader);
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            null_kernel = shader;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Specialization specialization;
 | 
					    Specialization specialization;
 | 
				
			||||||
 | 
				
			|||||||
@ -182,6 +182,9 @@ private:
 | 
				
			|||||||
    VKUpdateDescriptorQueue& update_descriptor_queue;
 | 
					    VKUpdateDescriptorQueue& update_descriptor_queue;
 | 
				
			||||||
    VKRenderPassCache& renderpass_cache;
 | 
					    VKRenderPassCache& renderpass_cache;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Shader null_shader{};
 | 
				
			||||||
 | 
					    Shader null_kernel{};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
 | 
					    std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    GraphicsPipelineCacheKey last_graphics_key;
 | 
					    GraphicsPipelineCacheKey last_graphics_key;
 | 
				
			||||||
 | 
				
			|||||||
@ -17,6 +17,7 @@
 | 
				
			|||||||
#include "common/microprofile.h"
 | 
					#include "common/microprofile.h"
 | 
				
			||||||
#include "core/core.h"
 | 
					#include "core/core.h"
 | 
				
			||||||
#include "core/memory.h"
 | 
					#include "core/memory.h"
 | 
				
			||||||
 | 
					#include "core/settings.h"
 | 
				
			||||||
#include "video_core/engines/kepler_compute.h"
 | 
					#include "video_core/engines/kepler_compute.h"
 | 
				
			||||||
#include "video_core/engines/maxwell_3d.h"
 | 
					#include "video_core/engines/maxwell_3d.h"
 | 
				
			||||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
 | 
					#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
 | 
				
			||||||
@ -299,7 +300,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
 | 
				
			|||||||
      pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
 | 
					      pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
 | 
				
			||||||
                     renderpass_cache),
 | 
					                     renderpass_cache),
 | 
				
			||||||
      buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
 | 
					      buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
 | 
				
			||||||
      sampler_cache(device), query_cache(system, *this, device, scheduler) {
 | 
					      sampler_cache(device),
 | 
				
			||||||
 | 
					      fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
 | 
				
			||||||
 | 
					      query_cache(system, *this, device, scheduler) {
 | 
				
			||||||
    scheduler.SetQueryCache(query_cache);
 | 
					    scheduler.SetQueryCache(query_cache);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -360,6 +363,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 | 
				
			|||||||
    });
 | 
					    });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    EndTransformFeedback();
 | 
					    EndTransformFeedback();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    system.GPU().TickWork();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void RasterizerVulkan::Clear() {
 | 
					void RasterizerVulkan::Clear() {
 | 
				
			||||||
@ -504,6 +509,13 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
 | 
				
			|||||||
    query_cache.FlushRegion(addr, size);
 | 
					    query_cache.FlushRegion(addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
 | 
				
			||||||
 | 
					    if (!Settings::IsGPULevelHigh()) {
 | 
				
			||||||
 | 
					        return buffer_cache.MustFlushRegion(addr, size);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
 | 
					void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
 | 
				
			||||||
    if (addr == 0 || size == 0) {
 | 
					    if (addr == 0 || size == 0) {
 | 
				
			||||||
        return;
 | 
					        return;
 | 
				
			||||||
@ -514,6 +526,47 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
 | 
				
			|||||||
    query_cache.InvalidateRegion(addr, size);
 | 
					    query_cache.InvalidateRegion(addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
 | 
				
			||||||
 | 
					    if (addr == 0 || size == 0) {
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    texture_cache.OnCPUWrite(addr, size);
 | 
				
			||||||
 | 
					    pipeline_cache.InvalidateRegion(addr, size);
 | 
				
			||||||
 | 
					    buffer_cache.OnCPUWrite(addr, size);
 | 
				
			||||||
 | 
					    query_cache.InvalidateRegion(addr, size);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void RasterizerVulkan::SyncGuestHost() {
 | 
				
			||||||
 | 
					    texture_cache.SyncGuestHost();
 | 
				
			||||||
 | 
					    buffer_cache.SyncGuestHost();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
 | 
				
			||||||
 | 
					    auto& gpu{system.GPU()};
 | 
				
			||||||
 | 
					    if (!gpu.IsAsync()) {
 | 
				
			||||||
 | 
					        gpu.MemoryManager().Write<u32>(addr, value);
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    fence_manager.SignalSemaphore(addr, value);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void RasterizerVulkan::SignalSyncPoint(u32 value) {
 | 
				
			||||||
 | 
					    auto& gpu{system.GPU()};
 | 
				
			||||||
 | 
					    if (!gpu.IsAsync()) {
 | 
				
			||||||
 | 
					        gpu.IncrementSyncPoint(value);
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    fence_manager.SignalSyncPoint(value);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void RasterizerVulkan::ReleaseFences() {
 | 
				
			||||||
 | 
					    auto& gpu{system.GPU()};
 | 
				
			||||||
 | 
					    if (!gpu.IsAsync()) {
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    fence_manager.WaitPendingFences();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
 | 
					void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
 | 
				
			||||||
    FlushRegion(addr, size);
 | 
					    FlushRegion(addr, size);
 | 
				
			||||||
    InvalidateRegion(addr, size);
 | 
					    InvalidateRegion(addr, size);
 | 
				
			||||||
 | 
				
			|||||||
@ -21,6 +21,7 @@
 | 
				
			|||||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
 | 
					#include "video_core/renderer_vulkan/vk_buffer_cache.h"
 | 
				
			||||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
 | 
					#include "video_core/renderer_vulkan/vk_compute_pass.h"
 | 
				
			||||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 | 
					#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 | 
				
			||||||
 | 
					#include "video_core/renderer_vulkan/vk_fence_manager.h"
 | 
				
			||||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
 | 
					#include "video_core/renderer_vulkan/vk_memory_manager.h"
 | 
				
			||||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 | 
					#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 | 
				
			||||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
 | 
					#include "video_core/renderer_vulkan/vk_query_cache.h"
 | 
				
			||||||
@ -118,7 +119,13 @@ public:
 | 
				
			|||||||
    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
 | 
					    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
 | 
				
			||||||
    void FlushAll() override;
 | 
					    void FlushAll() override;
 | 
				
			||||||
    void FlushRegion(VAddr addr, u64 size) override;
 | 
					    void FlushRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
 | 
					    bool MustFlushRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
    void InvalidateRegion(VAddr addr, u64 size) override;
 | 
					    void InvalidateRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
 | 
					    void OnCPUWrite(VAddr addr, u64 size) override;
 | 
				
			||||||
 | 
					    void SyncGuestHost() override;
 | 
				
			||||||
 | 
					    void SignalSemaphore(GPUVAddr addr, u32 value) override;
 | 
				
			||||||
 | 
					    void SignalSyncPoint(u32 value) override;
 | 
				
			||||||
 | 
					    void ReleaseFences() override;
 | 
				
			||||||
    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
 | 
					    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
    void FlushCommands() override;
 | 
					    void FlushCommands() override;
 | 
				
			||||||
    void TickFrame() override;
 | 
					    void TickFrame() override;
 | 
				
			||||||
@ -261,6 +268,7 @@ private:
 | 
				
			|||||||
    VKPipelineCache pipeline_cache;
 | 
					    VKPipelineCache pipeline_cache;
 | 
				
			||||||
    VKBufferCache buffer_cache;
 | 
					    VKBufferCache buffer_cache;
 | 
				
			||||||
    VKSamplerCache sampler_cache;
 | 
					    VKSamplerCache sampler_cache;
 | 
				
			||||||
 | 
					    VKFenceManager fence_manager;
 | 
				
			||||||
    VKQueryCache query_cache;
 | 
					    VKQueryCache query_cache;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::array<View, Maxwell::NumRenderTargets> color_attachments;
 | 
					    std::array<View, Maxwell::NumRenderTargets> color_attachments;
 | 
				
			||||||
 | 
				
			|||||||
@ -63,6 +63,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
 | 
				
			|||||||
    X(vkCmdSetBlendConstants);
 | 
					    X(vkCmdSetBlendConstants);
 | 
				
			||||||
    X(vkCmdSetDepthBias);
 | 
					    X(vkCmdSetDepthBias);
 | 
				
			||||||
    X(vkCmdSetDepthBounds);
 | 
					    X(vkCmdSetDepthBounds);
 | 
				
			||||||
 | 
					    X(vkCmdSetEvent);
 | 
				
			||||||
    X(vkCmdSetScissor);
 | 
					    X(vkCmdSetScissor);
 | 
				
			||||||
    X(vkCmdSetStencilCompareMask);
 | 
					    X(vkCmdSetStencilCompareMask);
 | 
				
			||||||
    X(vkCmdSetStencilReference);
 | 
					    X(vkCmdSetStencilReference);
 | 
				
			||||||
@ -75,6 +76,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
 | 
				
			|||||||
    X(vkCreateDescriptorPool);
 | 
					    X(vkCreateDescriptorPool);
 | 
				
			||||||
    X(vkCreateDescriptorSetLayout);
 | 
					    X(vkCreateDescriptorSetLayout);
 | 
				
			||||||
    X(vkCreateDescriptorUpdateTemplateKHR);
 | 
					    X(vkCreateDescriptorUpdateTemplateKHR);
 | 
				
			||||||
 | 
					    X(vkCreateEvent);
 | 
				
			||||||
    X(vkCreateFence);
 | 
					    X(vkCreateFence);
 | 
				
			||||||
    X(vkCreateFramebuffer);
 | 
					    X(vkCreateFramebuffer);
 | 
				
			||||||
    X(vkCreateGraphicsPipelines);
 | 
					    X(vkCreateGraphicsPipelines);
 | 
				
			||||||
@ -93,6 +95,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
 | 
				
			|||||||
    X(vkDestroyDescriptorPool);
 | 
					    X(vkDestroyDescriptorPool);
 | 
				
			||||||
    X(vkDestroyDescriptorSetLayout);
 | 
					    X(vkDestroyDescriptorSetLayout);
 | 
				
			||||||
    X(vkDestroyDescriptorUpdateTemplateKHR);
 | 
					    X(vkDestroyDescriptorUpdateTemplateKHR);
 | 
				
			||||||
 | 
					    X(vkDestroyEvent);
 | 
				
			||||||
    X(vkDestroyFence);
 | 
					    X(vkDestroyFence);
 | 
				
			||||||
    X(vkDestroyFramebuffer);
 | 
					    X(vkDestroyFramebuffer);
 | 
				
			||||||
    X(vkDestroyImage);
 | 
					    X(vkDestroyImage);
 | 
				
			||||||
@ -112,6 +115,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
 | 
				
			|||||||
    X(vkFreeMemory);
 | 
					    X(vkFreeMemory);
 | 
				
			||||||
    X(vkGetBufferMemoryRequirements);
 | 
					    X(vkGetBufferMemoryRequirements);
 | 
				
			||||||
    X(vkGetDeviceQueue);
 | 
					    X(vkGetDeviceQueue);
 | 
				
			||||||
 | 
					    X(vkGetEventStatus);
 | 
				
			||||||
    X(vkGetFenceStatus);
 | 
					    X(vkGetFenceStatus);
 | 
				
			||||||
    X(vkGetImageMemoryRequirements);
 | 
					    X(vkGetImageMemoryRequirements);
 | 
				
			||||||
    X(vkGetQueryPoolResults);
 | 
					    X(vkGetQueryPoolResults);
 | 
				
			||||||
@ -269,6 +273,10 @@ void Destroy(VkDevice device, VkDeviceMemory handle, const DeviceDispatch& dld)
 | 
				
			|||||||
    dld.vkFreeMemory(device, handle, nullptr);
 | 
					    dld.vkFreeMemory(device, handle, nullptr);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void Destroy(VkDevice device, VkEvent handle, const DeviceDispatch& dld) noexcept {
 | 
				
			||||||
 | 
					    dld.vkDestroyEvent(device, handle, nullptr);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept {
 | 
					void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept {
 | 
				
			||||||
    dld.vkDestroyFence(device, handle, nullptr);
 | 
					    dld.vkDestroyFence(device, handle, nullptr);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -599,6 +607,16 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons
 | 
				
			|||||||
    return ShaderModule(object, handle, *dld);
 | 
					    return ShaderModule(object, handle, *dld);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Event Device::CreateEvent() const {
 | 
				
			||||||
 | 
					    VkEventCreateInfo ci;
 | 
				
			||||||
 | 
					    ci.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
 | 
				
			||||||
 | 
					    ci.pNext = nullptr;
 | 
				
			||||||
 | 
					    ci.flags = 0;
 | 
				
			||||||
 | 
					    VkEvent object;
 | 
				
			||||||
 | 
					    Check(dld->vkCreateEvent(handle, &ci, nullptr, &object));
 | 
				
			||||||
 | 
					    return Event(object, handle, *dld);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const {
 | 
					SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const {
 | 
				
			||||||
    VkSwapchainKHR object;
 | 
					    VkSwapchainKHR object;
 | 
				
			||||||
    Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object));
 | 
					    Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object));
 | 
				
			||||||
 | 
				
			|||||||
@ -199,6 +199,7 @@ struct DeviceDispatch : public InstanceDispatch {
 | 
				
			|||||||
    PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants;
 | 
					    PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants;
 | 
				
			||||||
    PFN_vkCmdSetDepthBias vkCmdSetDepthBias;
 | 
					    PFN_vkCmdSetDepthBias vkCmdSetDepthBias;
 | 
				
			||||||
    PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds;
 | 
					    PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds;
 | 
				
			||||||
 | 
					    PFN_vkCmdSetEvent vkCmdSetEvent;
 | 
				
			||||||
    PFN_vkCmdSetScissor vkCmdSetScissor;
 | 
					    PFN_vkCmdSetScissor vkCmdSetScissor;
 | 
				
			||||||
    PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask;
 | 
					    PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask;
 | 
				
			||||||
    PFN_vkCmdSetStencilReference vkCmdSetStencilReference;
 | 
					    PFN_vkCmdSetStencilReference vkCmdSetStencilReference;
 | 
				
			||||||
@ -211,6 +212,7 @@ struct DeviceDispatch : public InstanceDispatch {
 | 
				
			|||||||
    PFN_vkCreateDescriptorPool vkCreateDescriptorPool;
 | 
					    PFN_vkCreateDescriptorPool vkCreateDescriptorPool;
 | 
				
			||||||
    PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout;
 | 
					    PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout;
 | 
				
			||||||
    PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR;
 | 
					    PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR;
 | 
				
			||||||
 | 
					    PFN_vkCreateEvent vkCreateEvent;
 | 
				
			||||||
    PFN_vkCreateFence vkCreateFence;
 | 
					    PFN_vkCreateFence vkCreateFence;
 | 
				
			||||||
    PFN_vkCreateFramebuffer vkCreateFramebuffer;
 | 
					    PFN_vkCreateFramebuffer vkCreateFramebuffer;
 | 
				
			||||||
    PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines;
 | 
					    PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines;
 | 
				
			||||||
@ -229,6 +231,7 @@ struct DeviceDispatch : public InstanceDispatch {
 | 
				
			|||||||
    PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool;
 | 
					    PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool;
 | 
				
			||||||
    PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout;
 | 
					    PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout;
 | 
				
			||||||
    PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR;
 | 
					    PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR;
 | 
				
			||||||
 | 
					    PFN_vkDestroyEvent vkDestroyEvent;
 | 
				
			||||||
    PFN_vkDestroyFence vkDestroyFence;
 | 
					    PFN_vkDestroyFence vkDestroyFence;
 | 
				
			||||||
    PFN_vkDestroyFramebuffer vkDestroyFramebuffer;
 | 
					    PFN_vkDestroyFramebuffer vkDestroyFramebuffer;
 | 
				
			||||||
    PFN_vkDestroyImage vkDestroyImage;
 | 
					    PFN_vkDestroyImage vkDestroyImage;
 | 
				
			||||||
@ -248,6 +251,7 @@ struct DeviceDispatch : public InstanceDispatch {
 | 
				
			|||||||
    PFN_vkFreeMemory vkFreeMemory;
 | 
					    PFN_vkFreeMemory vkFreeMemory;
 | 
				
			||||||
    PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements;
 | 
					    PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements;
 | 
				
			||||||
    PFN_vkGetDeviceQueue vkGetDeviceQueue;
 | 
					    PFN_vkGetDeviceQueue vkGetDeviceQueue;
 | 
				
			||||||
 | 
					    PFN_vkGetEventStatus vkGetEventStatus;
 | 
				
			||||||
    PFN_vkGetFenceStatus vkGetFenceStatus;
 | 
					    PFN_vkGetFenceStatus vkGetFenceStatus;
 | 
				
			||||||
    PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
 | 
					    PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
 | 
				
			||||||
    PFN_vkGetQueryPoolResults vkGetQueryPoolResults;
 | 
					    PFN_vkGetQueryPoolResults vkGetQueryPoolResults;
 | 
				
			||||||
@ -279,6 +283,7 @@ void Destroy(VkDevice, VkDescriptorPool, const DeviceDispatch&) noexcept;
 | 
				
			|||||||
void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept;
 | 
					void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept;
 | 
				
			||||||
void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept;
 | 
					void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept;
 | 
				
			||||||
void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept;
 | 
					void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept;
 | 
				
			||||||
 | 
					void Destroy(VkDevice, VkEvent, const DeviceDispatch&) noexcept;
 | 
				
			||||||
void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept;
 | 
					void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept;
 | 
				
			||||||
void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept;
 | 
					void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept;
 | 
				
			||||||
void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept;
 | 
					void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept;
 | 
				
			||||||
@ -648,6 +653,15 @@ public:
 | 
				
			|||||||
    std::vector<VkImage> GetImages() const;
 | 
					    std::vector<VkImage> GetImages() const;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Event : public Handle<VkEvent, VkDevice, DeviceDispatch> {
 | 
				
			||||||
 | 
					    using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					    VkResult GetStatus() const noexcept {
 | 
				
			||||||
 | 
					        return dld->vkGetEventStatus(owner, handle);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
 | 
					class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
 | 
				
			||||||
    using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle;
 | 
					    using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -695,6 +709,8 @@ public:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;
 | 
					    ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Event CreateEvent() const;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;
 | 
					    SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept;
 | 
					    DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept;
 | 
				
			||||||
@ -938,6 +954,10 @@ public:
 | 
				
			|||||||
        dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds);
 | 
					        dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void SetEvent(VkEvent event, VkPipelineStageFlags stage_flags) const noexcept {
 | 
				
			||||||
 | 
					        dld->vkCmdSetEvent(handle, event, stage_flags);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
 | 
					    void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
 | 
				
			||||||
                                         const VkDeviceSize* offsets,
 | 
					                                         const VkDeviceSize* offsets,
 | 
				
			||||||
                                         const VkDeviceSize* sizes) const noexcept {
 | 
					                                         const VkDeviceSize* sizes) const noexcept {
 | 
				
			||||||
 | 
				
			|||||||
@ -192,6 +192,22 @@ public:
 | 
				
			|||||||
        index = index_;
 | 
					        index = index_;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void SetMemoryMarked(bool is_memory_marked_) {
 | 
				
			||||||
 | 
					        is_memory_marked = is_memory_marked_;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bool IsMemoryMarked() const {
 | 
				
			||||||
 | 
					        return is_memory_marked;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void SetSyncPending(bool is_sync_pending_) {
 | 
				
			||||||
 | 
					        is_sync_pending = is_sync_pending_;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bool IsSyncPending() const {
 | 
				
			||||||
 | 
					        return is_sync_pending;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void MarkAsPicked(bool is_picked_) {
 | 
					    void MarkAsPicked(bool is_picked_) {
 | 
				
			||||||
        is_picked = is_picked_;
 | 
					        is_picked = is_picked_;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -303,6 +319,8 @@ private:
 | 
				
			|||||||
    bool is_target{};
 | 
					    bool is_target{};
 | 
				
			||||||
    bool is_registered{};
 | 
					    bool is_registered{};
 | 
				
			||||||
    bool is_picked{};
 | 
					    bool is_picked{};
 | 
				
			||||||
 | 
					    bool is_memory_marked{};
 | 
				
			||||||
 | 
					    bool is_sync_pending{};
 | 
				
			||||||
    u32 index{NO_RT};
 | 
					    u32 index{NO_RT};
 | 
				
			||||||
    u64 modification_tick{};
 | 
					    u64 modification_tick{};
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
				
			|||||||
@ -6,6 +6,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#include <algorithm>
 | 
					#include <algorithm>
 | 
				
			||||||
#include <array>
 | 
					#include <array>
 | 
				
			||||||
 | 
					#include <list>
 | 
				
			||||||
#include <memory>
 | 
					#include <memory>
 | 
				
			||||||
#include <mutex>
 | 
					#include <mutex>
 | 
				
			||||||
#include <set>
 | 
					#include <set>
 | 
				
			||||||
@ -62,6 +63,30 @@ public:
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void OnCPUWrite(VAddr addr, std::size_t size) {
 | 
				
			||||||
 | 
					        std::lock_guard lock{mutex};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for (const auto& surface : GetSurfacesInRegion(addr, size)) {
 | 
				
			||||||
 | 
					            if (surface->IsMemoryMarked()) {
 | 
				
			||||||
 | 
					                UnmarkMemory(surface);
 | 
				
			||||||
 | 
					                surface->SetSyncPending(true);
 | 
				
			||||||
 | 
					                marked_for_unregister.emplace_back(surface);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void SyncGuestHost() {
 | 
				
			||||||
 | 
					        std::lock_guard lock{mutex};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for (const auto& surface : marked_for_unregister) {
 | 
				
			||||||
 | 
					            if (surface->IsRegistered()) {
 | 
				
			||||||
 | 
					                surface->SetSyncPending(false);
 | 
				
			||||||
 | 
					                Unregister(surface);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        marked_for_unregister.clear();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * Guarantees that rendertargets don't unregister themselves if the
 | 
					     * Guarantees that rendertargets don't unregister themselves if the
 | 
				
			||||||
     * collide. Protection is currently only done on 3D slices.
 | 
					     * collide. Protection is currently only done on 3D slices.
 | 
				
			||||||
@ -85,10 +110,20 @@ public:
 | 
				
			|||||||
            return a->GetModificationTick() < b->GetModificationTick();
 | 
					            return a->GetModificationTick() < b->GetModificationTick();
 | 
				
			||||||
        });
 | 
					        });
 | 
				
			||||||
        for (const auto& surface : surfaces) {
 | 
					        for (const auto& surface : surfaces) {
 | 
				
			||||||
 | 
					            mutex.unlock();
 | 
				
			||||||
            FlushSurface(surface);
 | 
					            FlushSurface(surface);
 | 
				
			||||||
 | 
					            mutex.lock();
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bool MustFlushRegion(VAddr addr, std::size_t size) {
 | 
				
			||||||
 | 
					        std::lock_guard lock{mutex};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        const auto surfaces = GetSurfacesInRegion(addr, size);
 | 
				
			||||||
 | 
					        return std::any_of(surfaces.cbegin(), surfaces.cend(),
 | 
				
			||||||
 | 
					                           [](const TSurface& surface) { return surface->IsModified(); });
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    TView GetTextureSurface(const Tegra::Texture::TICEntry& tic,
 | 
					    TView GetTextureSurface(const Tegra::Texture::TICEntry& tic,
 | 
				
			||||||
                            const VideoCommon::Shader::Sampler& entry) {
 | 
					                            const VideoCommon::Shader::Sampler& entry) {
 | 
				
			||||||
        std::lock_guard lock{mutex};
 | 
					        std::lock_guard lock{mutex};
 | 
				
			||||||
@ -206,8 +241,14 @@ public:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        auto surface_view = GetSurface(gpu_addr, *cpu_addr,
 | 
					        auto surface_view = GetSurface(gpu_addr, *cpu_addr,
 | 
				
			||||||
                                       SurfaceParams::CreateForFramebuffer(system, index), true);
 | 
					                                       SurfaceParams::CreateForFramebuffer(system, index), true);
 | 
				
			||||||
        if (render_targets[index].target)
 | 
					        if (render_targets[index].target) {
 | 
				
			||||||
            render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
 | 
					            auto& surface = render_targets[index].target;
 | 
				
			||||||
 | 
					            surface->MarkAsRenderTarget(false, NO_RT);
 | 
				
			||||||
 | 
					            const auto& cr_params = surface->GetSurfaceParams();
 | 
				
			||||||
 | 
					            if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation) {
 | 
				
			||||||
 | 
					                AsyncFlushSurface(surface);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
        render_targets[index].target = surface_view.first;
 | 
					        render_targets[index].target = surface_view.first;
 | 
				
			||||||
        render_targets[index].view = surface_view.second;
 | 
					        render_targets[index].view = surface_view.second;
 | 
				
			||||||
        if (render_targets[index].target)
 | 
					        if (render_targets[index].target)
 | 
				
			||||||
@ -284,6 +325,34 @@ public:
 | 
				
			|||||||
        return ++ticks;
 | 
					        return ++ticks;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void CommitAsyncFlushes() {
 | 
				
			||||||
 | 
					        committed_flushes.push_back(uncommitted_flushes);
 | 
				
			||||||
 | 
					        uncommitted_flushes.reset();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bool HasUncommittedFlushes() const {
 | 
				
			||||||
 | 
					        return uncommitted_flushes != nullptr;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bool ShouldWaitAsyncFlushes() const {
 | 
				
			||||||
 | 
					        return !committed_flushes.empty() && committed_flushes.front() != nullptr;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void PopAsyncFlushes() {
 | 
				
			||||||
 | 
					        if (committed_flushes.empty()) {
 | 
				
			||||||
 | 
					            return;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        auto& flush_list = committed_flushes.front();
 | 
				
			||||||
 | 
					        if (!flush_list) {
 | 
				
			||||||
 | 
					            committed_flushes.pop_front();
 | 
				
			||||||
 | 
					            return;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        for (TSurface& surface : *flush_list) {
 | 
				
			||||||
 | 
					            FlushSurface(surface);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        committed_flushes.pop_front();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
protected:
 | 
					protected:
 | 
				
			||||||
    explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
 | 
					    explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
 | 
				
			||||||
                          bool is_astc_supported)
 | 
					                          bool is_astc_supported)
 | 
				
			||||||
@ -345,9 +414,20 @@ protected:
 | 
				
			|||||||
        surface->SetCpuAddr(*cpu_addr);
 | 
					        surface->SetCpuAddr(*cpu_addr);
 | 
				
			||||||
        RegisterInnerCache(surface);
 | 
					        RegisterInnerCache(surface);
 | 
				
			||||||
        surface->MarkAsRegistered(true);
 | 
					        surface->MarkAsRegistered(true);
 | 
				
			||||||
 | 
					        surface->SetMemoryMarked(true);
 | 
				
			||||||
        rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
 | 
					        rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void UnmarkMemory(TSurface surface) {
 | 
				
			||||||
 | 
					        if (!surface->IsMemoryMarked()) {
 | 
				
			||||||
 | 
					            return;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        const std::size_t size = surface->GetSizeInBytes();
 | 
				
			||||||
 | 
					        const VAddr cpu_addr = surface->GetCpuAddr();
 | 
				
			||||||
 | 
					        rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
 | 
				
			||||||
 | 
					        surface->SetMemoryMarked(false);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void Unregister(TSurface surface) {
 | 
					    void Unregister(TSurface surface) {
 | 
				
			||||||
        if (guard_render_targets && surface->IsProtected()) {
 | 
					        if (guard_render_targets && surface->IsProtected()) {
 | 
				
			||||||
            return;
 | 
					            return;
 | 
				
			||||||
@ -355,9 +435,11 @@ protected:
 | 
				
			|||||||
        if (!guard_render_targets && surface->IsRenderTarget()) {
 | 
					        if (!guard_render_targets && surface->IsRenderTarget()) {
 | 
				
			||||||
            ManageRenderTargetUnregister(surface);
 | 
					            ManageRenderTargetUnregister(surface);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        const std::size_t size = surface->GetSizeInBytes();
 | 
					        UnmarkMemory(surface);
 | 
				
			||||||
        const VAddr cpu_addr = surface->GetCpuAddr();
 | 
					        if (surface->IsSyncPending()) {
 | 
				
			||||||
        rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
 | 
					            marked_for_unregister.remove(surface);
 | 
				
			||||||
 | 
					            surface->SetSyncPending(false);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
        UnregisterInnerCache(surface);
 | 
					        UnregisterInnerCache(surface);
 | 
				
			||||||
        surface->MarkAsRegistered(false);
 | 
					        surface->MarkAsRegistered(false);
 | 
				
			||||||
        ReserveSurface(surface->GetSurfaceParams(), surface);
 | 
					        ReserveSurface(surface->GetSurfaceParams(), surface);
 | 
				
			||||||
@ -417,7 +499,7 @@ private:
 | 
				
			|||||||
     **/
 | 
					     **/
 | 
				
			||||||
    RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
 | 
					    RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
 | 
				
			||||||
                                 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
 | 
					                                 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
 | 
				
			||||||
        if (Settings::values.use_accurate_gpu_emulation) {
 | 
					        if (Settings::IsGPULevelExtreme()) {
 | 
				
			||||||
            return RecycleStrategy::Flush;
 | 
					            return RecycleStrategy::Flush;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        // 3D Textures decision
 | 
					        // 3D Textures decision
 | 
				
			||||||
@ -461,7 +543,7 @@ private:
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
        switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
 | 
					        switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
 | 
				
			||||||
        case RecycleStrategy::Ignore: {
 | 
					        case RecycleStrategy::Ignore: {
 | 
				
			||||||
            return InitializeSurface(gpu_addr, params, Settings::values.use_accurate_gpu_emulation);
 | 
					            return InitializeSurface(gpu_addr, params, Settings::IsGPULevelExtreme());
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        case RecycleStrategy::Flush: {
 | 
					        case RecycleStrategy::Flush: {
 | 
				
			||||||
            std::sort(overlaps.begin(), overlaps.end(),
 | 
					            std::sort(overlaps.begin(), overlaps.end(),
 | 
				
			||||||
@ -509,7 +591,7 @@ private:
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
        const auto& final_params = new_surface->GetSurfaceParams();
 | 
					        const auto& final_params = new_surface->GetSurfaceParams();
 | 
				
			||||||
        if (cr_params.type != final_params.type) {
 | 
					        if (cr_params.type != final_params.type) {
 | 
				
			||||||
            if (Settings::values.use_accurate_gpu_emulation) {
 | 
					            if (Settings::IsGPULevelExtreme()) {
 | 
				
			||||||
                BufferCopy(current_surface, new_surface);
 | 
					                BufferCopy(current_surface, new_surface);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
@ -598,7 +680,7 @@ private:
 | 
				
			|||||||
        if (passed_tests == 0) {
 | 
					        if (passed_tests == 0) {
 | 
				
			||||||
            return {};
 | 
					            return {};
 | 
				
			||||||
            // In Accurate GPU all tests should pass, else we recycle
 | 
					            // In Accurate GPU all tests should pass, else we recycle
 | 
				
			||||||
        } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) {
 | 
					        } else if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) {
 | 
				
			||||||
            return {};
 | 
					            return {};
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        for (const auto& surface : overlaps) {
 | 
					        for (const auto& surface : overlaps) {
 | 
				
			||||||
@ -668,7 +750,7 @@ private:
 | 
				
			|||||||
            for (const auto& surface : overlaps) {
 | 
					            for (const auto& surface : overlaps) {
 | 
				
			||||||
                if (!surface->MatchTarget(params.target)) {
 | 
					                if (!surface->MatchTarget(params.target)) {
 | 
				
			||||||
                    if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
 | 
					                    if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
 | 
				
			||||||
                        if (Settings::values.use_accurate_gpu_emulation) {
 | 
					                        if (Settings::IsGPULevelExtreme()) {
 | 
				
			||||||
                            return std::nullopt;
 | 
					                            return std::nullopt;
 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
                        Unregister(surface);
 | 
					                        Unregister(surface);
 | 
				
			||||||
@ -1106,6 +1188,13 @@ private:
 | 
				
			|||||||
        TView view;
 | 
					        TView view;
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void AsyncFlushSurface(TSurface& surface) {
 | 
				
			||||||
 | 
					        if (!uncommitted_flushes) {
 | 
				
			||||||
 | 
					            uncommitted_flushes = std::make_shared<std::list<TSurface>>();
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        uncommitted_flushes->push_back(surface);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    VideoCore::RasterizerInterface& rasterizer;
 | 
					    VideoCore::RasterizerInterface& rasterizer;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    FormatLookupTable format_lookup_table;
 | 
					    FormatLookupTable format_lookup_table;
 | 
				
			||||||
@ -1150,6 +1239,11 @@ private:
 | 
				
			|||||||
    std::unordered_map<u32, TSurface> invalid_cache;
 | 
					    std::unordered_map<u32, TSurface> invalid_cache;
 | 
				
			||||||
    std::vector<u8> invalid_memory;
 | 
					    std::vector<u8> invalid_memory;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::list<TSurface> marked_for_unregister;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::shared_ptr<std::list<TSurface>> uncommitted_flushes{};
 | 
				
			||||||
 | 
					    std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    StagingCache staging_cache;
 | 
					    StagingCache staging_cache;
 | 
				
			||||||
    std::recursive_mutex mutex;
 | 
					    std::recursive_mutex mutex;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
				
			|||||||
@ -639,8 +639,8 @@ void Config::ReadRendererValues() {
 | 
				
			|||||||
    Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt();
 | 
					    Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt();
 | 
				
			||||||
    Settings::values.use_disk_shader_cache =
 | 
					    Settings::values.use_disk_shader_cache =
 | 
				
			||||||
        ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool();
 | 
					        ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool();
 | 
				
			||||||
    Settings::values.use_accurate_gpu_emulation =
 | 
					    const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt();
 | 
				
			||||||
        ReadSetting(QStringLiteral("use_accurate_gpu_emulation"), false).toBool();
 | 
					    Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level);
 | 
				
			||||||
    Settings::values.use_asynchronous_gpu_emulation =
 | 
					    Settings::values.use_asynchronous_gpu_emulation =
 | 
				
			||||||
        ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool();
 | 
					        ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool();
 | 
				
			||||||
    Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool();
 | 
					    Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool();
 | 
				
			||||||
@ -1080,8 +1080,8 @@ void Config::SaveRendererValues() {
 | 
				
			|||||||
    WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100);
 | 
					    WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100);
 | 
				
			||||||
    WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache,
 | 
					    WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache,
 | 
				
			||||||
                 true);
 | 
					                 true);
 | 
				
			||||||
    WriteSetting(QStringLiteral("use_accurate_gpu_emulation"),
 | 
					    WriteSetting(QStringLiteral("gpu_accuracy"), static_cast<int>(Settings::values.gpu_accuracy),
 | 
				
			||||||
                 Settings::values.use_accurate_gpu_emulation, false);
 | 
					                 0);
 | 
				
			||||||
    WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"),
 | 
					    WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"),
 | 
				
			||||||
                 Settings::values.use_asynchronous_gpu_emulation, false);
 | 
					                 Settings::values.use_asynchronous_gpu_emulation, false);
 | 
				
			||||||
    WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
 | 
					    WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
 | 
				
			||||||
 | 
				
			|||||||
@ -19,7 +19,7 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
void ConfigureGraphicsAdvanced::SetConfiguration() {
 | 
					void ConfigureGraphicsAdvanced::SetConfiguration() {
 | 
				
			||||||
    const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
 | 
					    const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
 | 
				
			||||||
    ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
 | 
					    ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy));
 | 
				
			||||||
    ui->use_vsync->setEnabled(runtime_lock);
 | 
					    ui->use_vsync->setEnabled(runtime_lock);
 | 
				
			||||||
    ui->use_vsync->setChecked(Settings::values.use_vsync);
 | 
					    ui->use_vsync->setChecked(Settings::values.use_vsync);
 | 
				
			||||||
    ui->force_30fps_mode->setEnabled(runtime_lock);
 | 
					    ui->force_30fps_mode->setEnabled(runtime_lock);
 | 
				
			||||||
@ -29,7 +29,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ConfigureGraphicsAdvanced::ApplyConfiguration() {
 | 
					void ConfigureGraphicsAdvanced::ApplyConfiguration() {
 | 
				
			||||||
    Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
 | 
					    auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex());
 | 
				
			||||||
 | 
					    Settings::values.gpu_accuracy = gpu_accuracy;
 | 
				
			||||||
    Settings::values.use_vsync = ui->use_vsync->isChecked();
 | 
					    Settings::values.use_vsync = ui->use_vsync->isChecked();
 | 
				
			||||||
    Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
 | 
					    Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
 | 
				
			||||||
    Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex();
 | 
					    Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex();
 | 
				
			||||||
 | 
				
			|||||||
@ -23,12 +23,35 @@
 | 
				
			|||||||
       </property>
 | 
					       </property>
 | 
				
			||||||
       <layout class="QVBoxLayout" name="verticalLayout_3">
 | 
					       <layout class="QVBoxLayout" name="verticalLayout_3">
 | 
				
			||||||
        <item>
 | 
					        <item>
 | 
				
			||||||
         <widget class="QCheckBox" name="use_accurate_gpu_emulation">
 | 
					         <layout class="QHBoxLayout" name="horizontalLayout_2">
 | 
				
			||||||
 | 
					          <item>
 | 
				
			||||||
 | 
					           <widget class="QLabel" name="label_gpu_accuracy">
 | 
				
			||||||
            <property name="text">
 | 
					            <property name="text">
 | 
				
			||||||
           <string>Use accurate GPU emulation (slow)</string>
 | 
					             <string>Accuracy Level:</string>
 | 
				
			||||||
            </property>
 | 
					            </property>
 | 
				
			||||||
           </widget>
 | 
					           </widget>
 | 
				
			||||||
          </item>
 | 
					          </item>
 | 
				
			||||||
 | 
					          <item>
 | 
				
			||||||
 | 
					           <widget class="QComboBox" name="gpu_accuracy">
 | 
				
			||||||
 | 
					            <item>
 | 
				
			||||||
 | 
					             <property name="text">
 | 
				
			||||||
 | 
					              <string notr="true">Normal</string>
 | 
				
			||||||
 | 
					             </property>
 | 
				
			||||||
 | 
					            </item>
 | 
				
			||||||
 | 
					            <item>
 | 
				
			||||||
 | 
					             <property name="text">
 | 
				
			||||||
 | 
					              <string notr="true">High</string>
 | 
				
			||||||
 | 
					             </property>
 | 
				
			||||||
 | 
					            </item>
 | 
				
			||||||
 | 
					            <item>
 | 
				
			||||||
 | 
					             <property name="text">
 | 
				
			||||||
 | 
					              <string notr="true">Extreme(very slow)</string>
 | 
				
			||||||
 | 
					             </property>
 | 
				
			||||||
 | 
					            </item>
 | 
				
			||||||
 | 
					           </widget>
 | 
				
			||||||
 | 
					          </item>
 | 
				
			||||||
 | 
					         </layout>
 | 
				
			||||||
 | 
					        </item>
 | 
				
			||||||
        <item>
 | 
					        <item>
 | 
				
			||||||
         <widget class="QCheckBox" name="use_vsync">
 | 
					         <widget class="QCheckBox" name="use_vsync">
 | 
				
			||||||
          <property name="toolTip">
 | 
					          <property name="toolTip">
 | 
				
			||||||
 | 
				
			|||||||
@ -388,8 +388,8 @@ void Config::ReadValues() {
 | 
				
			|||||||
        static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
 | 
					        static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
 | 
				
			||||||
    Settings::values.use_disk_shader_cache =
 | 
					    Settings::values.use_disk_shader_cache =
 | 
				
			||||||
        sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
 | 
					        sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
 | 
				
			||||||
    Settings::values.use_accurate_gpu_emulation =
 | 
					    const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0);
 | 
				
			||||||
        sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);
 | 
					    Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level);
 | 
				
			||||||
    Settings::values.use_asynchronous_gpu_emulation =
 | 
					    Settings::values.use_asynchronous_gpu_emulation =
 | 
				
			||||||
        sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
 | 
					        sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
 | 
				
			||||||
    Settings::values.use_vsync =
 | 
					    Settings::values.use_vsync =
 | 
				
			||||||
 | 
				
			|||||||
@ -146,9 +146,9 @@ frame_limit =
 | 
				
			|||||||
# 0 (default): Off, 1 : On
 | 
					# 0 (default): Off, 1 : On
 | 
				
			||||||
use_disk_shader_cache =
 | 
					use_disk_shader_cache =
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Whether to use accurate GPU emulation
 | 
					# Which gpu accuracy level to use
 | 
				
			||||||
# 0 (default): Off (fast), 1 : On (slow)
 | 
					# 0 (Normal), 1 (High), 2 (Extreme)
 | 
				
			||||||
use_accurate_gpu_emulation =
 | 
					gpu_accuracy =
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Whether to use asynchronous GPU emulation
 | 
					# Whether to use asynchronous GPU emulation
 | 
				
			||||||
# 0 : Off (slow), 1 (default): On (fast)
 | 
					# 0 : Off (slow), 1 (default): On (fast)
 | 
				
			||||||
 | 
				
			|||||||
@ -126,8 +126,8 @@ void Config::ReadValues() {
 | 
				
			|||||||
    Settings::values.frame_limit = 100;
 | 
					    Settings::values.frame_limit = 100;
 | 
				
			||||||
    Settings::values.use_disk_shader_cache =
 | 
					    Settings::values.use_disk_shader_cache =
 | 
				
			||||||
        sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
 | 
					        sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
 | 
				
			||||||
    Settings::values.use_accurate_gpu_emulation =
 | 
					    const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0);
 | 
				
			||||||
        sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);
 | 
					    Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level);
 | 
				
			||||||
    Settings::values.use_asynchronous_gpu_emulation =
 | 
					    Settings::values.use_asynchronous_gpu_emulation =
 | 
				
			||||||
        sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
 | 
					        sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user