mirror of
				https://git.zaroz.cloud/nintendo-back-up/yuzu/yuzu-mainline.git
				synced 2025-03-21 01:53:15 +00:00 
			
		
		
		
	Merge pull request #3610 from FernandoS27/gpu-caches
Refactor all the GPU Caches to use VAddr for cache addressing
This commit is contained in:
		
						commit
						36f607217f
					
				@ -242,7 +242,52 @@ struct Memory::Impl {
 | 
				
			|||||||
            }
 | 
					            }
 | 
				
			||||||
            case Common::PageType::RasterizerCachedMemory: {
 | 
					            case Common::PageType::RasterizerCachedMemory: {
 | 
				
			||||||
                const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
 | 
					                const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
 | 
				
			||||||
                system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
 | 
					                system.GPU().FlushRegion(current_vaddr, copy_amount);
 | 
				
			||||||
 | 
					                std::memcpy(dest_buffer, host_ptr, copy_amount);
 | 
				
			||||||
 | 
					                break;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            default:
 | 
				
			||||||
 | 
					                UNREACHABLE();
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            page_index++;
 | 
				
			||||||
 | 
					            page_offset = 0;
 | 
				
			||||||
 | 
					            dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
 | 
				
			||||||
 | 
					            remaining_size -= copy_amount;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer,
 | 
				
			||||||
 | 
					                         const std::size_t size) {
 | 
				
			||||||
 | 
					        const auto& page_table = process.VMManager().page_table;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        std::size_t remaining_size = size;
 | 
				
			||||||
 | 
					        std::size_t page_index = src_addr >> PAGE_BITS;
 | 
				
			||||||
 | 
					        std::size_t page_offset = src_addr & PAGE_MASK;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        while (remaining_size > 0) {
 | 
				
			||||||
 | 
					            const std::size_t copy_amount =
 | 
				
			||||||
 | 
					                std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
 | 
				
			||||||
 | 
					            const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            switch (page_table.attributes[page_index]) {
 | 
				
			||||||
 | 
					            case Common::PageType::Unmapped: {
 | 
				
			||||||
 | 
					                LOG_ERROR(HW_Memory,
 | 
				
			||||||
 | 
					                          "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
 | 
				
			||||||
 | 
					                          current_vaddr, src_addr, size);
 | 
				
			||||||
 | 
					                std::memset(dest_buffer, 0, copy_amount);
 | 
				
			||||||
 | 
					                break;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            case Common::PageType::Memory: {
 | 
				
			||||||
 | 
					                DEBUG_ASSERT(page_table.pointers[page_index]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                const u8* const src_ptr =
 | 
				
			||||||
 | 
					                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
 | 
				
			||||||
 | 
					                std::memcpy(dest_buffer, src_ptr, copy_amount);
 | 
				
			||||||
 | 
					                break;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            case Common::PageType::RasterizerCachedMemory: {
 | 
				
			||||||
 | 
					                const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
 | 
				
			||||||
                std::memcpy(dest_buffer, host_ptr, copy_amount);
 | 
					                std::memcpy(dest_buffer, host_ptr, copy_amount);
 | 
				
			||||||
                break;
 | 
					                break;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
@ -261,6 +306,10 @@ struct Memory::Impl {
 | 
				
			|||||||
        ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size);
 | 
					        ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
 | 
				
			||||||
 | 
					        ReadBlockUnsafe(*system.CurrentProcess(), src_addr, dest_buffer, size);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer,
 | 
					    void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer,
 | 
				
			||||||
                    const std::size_t size) {
 | 
					                    const std::size_t size) {
 | 
				
			||||||
        const auto& page_table = process.VMManager().page_table;
 | 
					        const auto& page_table = process.VMManager().page_table;
 | 
				
			||||||
@ -290,7 +339,50 @@ struct Memory::Impl {
 | 
				
			|||||||
            }
 | 
					            }
 | 
				
			||||||
            case Common::PageType::RasterizerCachedMemory: {
 | 
					            case Common::PageType::RasterizerCachedMemory: {
 | 
				
			||||||
                u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
 | 
					                u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
 | 
				
			||||||
                system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
 | 
					                system.GPU().InvalidateRegion(current_vaddr, copy_amount);
 | 
				
			||||||
 | 
					                std::memcpy(host_ptr, src_buffer, copy_amount);
 | 
				
			||||||
 | 
					                break;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            default:
 | 
				
			||||||
 | 
					                UNREACHABLE();
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            page_index++;
 | 
				
			||||||
 | 
					            page_offset = 0;
 | 
				
			||||||
 | 
					            src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
 | 
				
			||||||
 | 
					            remaining_size -= copy_amount;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void WriteBlockUnsafe(const Kernel::Process& process, const VAddr dest_addr,
 | 
				
			||||||
 | 
					                          const void* src_buffer, const std::size_t size) {
 | 
				
			||||||
 | 
					        const auto& page_table = process.VMManager().page_table;
 | 
				
			||||||
 | 
					        std::size_t remaining_size = size;
 | 
				
			||||||
 | 
					        std::size_t page_index = dest_addr >> PAGE_BITS;
 | 
				
			||||||
 | 
					        std::size_t page_offset = dest_addr & PAGE_MASK;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        while (remaining_size > 0) {
 | 
				
			||||||
 | 
					            const std::size_t copy_amount =
 | 
				
			||||||
 | 
					                std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
 | 
				
			||||||
 | 
					            const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            switch (page_table.attributes[page_index]) {
 | 
				
			||||||
 | 
					            case Common::PageType::Unmapped: {
 | 
				
			||||||
 | 
					                LOG_ERROR(HW_Memory,
 | 
				
			||||||
 | 
					                          "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
 | 
				
			||||||
 | 
					                          current_vaddr, dest_addr, size);
 | 
				
			||||||
 | 
					                break;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            case Common::PageType::Memory: {
 | 
				
			||||||
 | 
					                DEBUG_ASSERT(page_table.pointers[page_index]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                u8* const dest_ptr =
 | 
				
			||||||
 | 
					                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
 | 
				
			||||||
 | 
					                std::memcpy(dest_ptr, src_buffer, copy_amount);
 | 
				
			||||||
 | 
					                break;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            case Common::PageType::RasterizerCachedMemory: {
 | 
				
			||||||
 | 
					                u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
 | 
				
			||||||
                std::memcpy(host_ptr, src_buffer, copy_amount);
 | 
					                std::memcpy(host_ptr, src_buffer, copy_amount);
 | 
				
			||||||
                break;
 | 
					                break;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
@ -309,6 +401,10 @@ struct Memory::Impl {
 | 
				
			|||||||
        WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size);
 | 
					        WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, const std::size_t size) {
 | 
				
			||||||
 | 
					        WriteBlockUnsafe(*system.CurrentProcess(), dest_addr, src_buffer, size);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) {
 | 
					    void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) {
 | 
				
			||||||
        const auto& page_table = process.VMManager().page_table;
 | 
					        const auto& page_table = process.VMManager().page_table;
 | 
				
			||||||
        std::size_t remaining_size = size;
 | 
					        std::size_t remaining_size = size;
 | 
				
			||||||
@ -337,7 +433,7 @@ struct Memory::Impl {
 | 
				
			|||||||
            }
 | 
					            }
 | 
				
			||||||
            case Common::PageType::RasterizerCachedMemory: {
 | 
					            case Common::PageType::RasterizerCachedMemory: {
 | 
				
			||||||
                u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
 | 
					                u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
 | 
				
			||||||
                system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
 | 
					                system.GPU().InvalidateRegion(current_vaddr, copy_amount);
 | 
				
			||||||
                std::memset(host_ptr, 0, copy_amount);
 | 
					                std::memset(host_ptr, 0, copy_amount);
 | 
				
			||||||
                break;
 | 
					                break;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
@ -384,7 +480,7 @@ struct Memory::Impl {
 | 
				
			|||||||
            }
 | 
					            }
 | 
				
			||||||
            case Common::PageType::RasterizerCachedMemory: {
 | 
					            case Common::PageType::RasterizerCachedMemory: {
 | 
				
			||||||
                const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
 | 
					                const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
 | 
				
			||||||
                system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
 | 
					                system.GPU().FlushRegion(current_vaddr, copy_amount);
 | 
				
			||||||
                WriteBlock(process, dest_addr, host_ptr, copy_amount);
 | 
					                WriteBlock(process, dest_addr, host_ptr, copy_amount);
 | 
				
			||||||
                break;
 | 
					                break;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
@ -545,7 +641,7 @@ struct Memory::Impl {
 | 
				
			|||||||
            break;
 | 
					            break;
 | 
				
			||||||
        case Common::PageType::RasterizerCachedMemory: {
 | 
					        case Common::PageType::RasterizerCachedMemory: {
 | 
				
			||||||
            const u8* const host_ptr = GetPointerFromVMA(vaddr);
 | 
					            const u8* const host_ptr = GetPointerFromVMA(vaddr);
 | 
				
			||||||
            system.GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T));
 | 
					            system.GPU().FlushRegion(vaddr, sizeof(T));
 | 
				
			||||||
            T value;
 | 
					            T value;
 | 
				
			||||||
            std::memcpy(&value, host_ptr, sizeof(T));
 | 
					            std::memcpy(&value, host_ptr, sizeof(T));
 | 
				
			||||||
            return value;
 | 
					            return value;
 | 
				
			||||||
@ -587,7 +683,7 @@ struct Memory::Impl {
 | 
				
			|||||||
            break;
 | 
					            break;
 | 
				
			||||||
        case Common::PageType::RasterizerCachedMemory: {
 | 
					        case Common::PageType::RasterizerCachedMemory: {
 | 
				
			||||||
            u8* const host_ptr{GetPointerFromVMA(vaddr)};
 | 
					            u8* const host_ptr{GetPointerFromVMA(vaddr)};
 | 
				
			||||||
            system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T));
 | 
					            system.GPU().InvalidateRegion(vaddr, sizeof(T));
 | 
				
			||||||
            std::memcpy(host_ptr, &data, sizeof(T));
 | 
					            std::memcpy(host_ptr, &data, sizeof(T));
 | 
				
			||||||
            break;
 | 
					            break;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
@ -696,6 +792,15 @@ void Memory::ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_
 | 
				
			|||||||
    impl->ReadBlock(src_addr, dest_buffer, size);
 | 
					    impl->ReadBlock(src_addr, dest_buffer, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void Memory::ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr,
 | 
				
			||||||
 | 
					                             void* dest_buffer, const std::size_t size) {
 | 
				
			||||||
 | 
					    impl->ReadBlockUnsafe(process, src_addr, dest_buffer, size);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void Memory::ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
 | 
				
			||||||
 | 
					    impl->ReadBlockUnsafe(src_addr, dest_buffer, size);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void Memory::WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
 | 
					void Memory::WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
 | 
				
			||||||
                        std::size_t size) {
 | 
					                        std::size_t size) {
 | 
				
			||||||
    impl->WriteBlock(process, dest_addr, src_buffer, size);
 | 
					    impl->WriteBlock(process, dest_addr, src_buffer, size);
 | 
				
			||||||
@ -705,6 +810,16 @@ void Memory::WriteBlock(const VAddr dest_addr, const void* src_buffer, const std
 | 
				
			|||||||
    impl->WriteBlock(dest_addr, src_buffer, size);
 | 
					    impl->WriteBlock(dest_addr, src_buffer, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void Memory::WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr,
 | 
				
			||||||
 | 
					                              const void* src_buffer, std::size_t size) {
 | 
				
			||||||
 | 
					    impl->WriteBlockUnsafe(process, dest_addr, src_buffer, size);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void Memory::WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer,
 | 
				
			||||||
 | 
					                              const std::size_t size) {
 | 
				
			||||||
 | 
					    impl->WriteBlockUnsafe(dest_addr, src_buffer, size);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void Memory::ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size) {
 | 
					void Memory::ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size) {
 | 
				
			||||||
    impl->ZeroBlock(process, dest_addr, size);
 | 
					    impl->ZeroBlock(process, dest_addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
@ -294,6 +294,27 @@ public:
 | 
				
			|||||||
    void ReadBlock(const Kernel::Process& process, VAddr src_addr, void* dest_buffer,
 | 
					    void ReadBlock(const Kernel::Process& process, VAddr src_addr, void* dest_buffer,
 | 
				
			||||||
                   std::size_t size);
 | 
					                   std::size_t size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /**
 | 
				
			||||||
 | 
					     * Reads a contiguous block of bytes from a specified process' address space.
 | 
				
			||||||
 | 
					     * This unsafe version does not trigger GPU flushing.
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     * @param process     The process to read the data from.
 | 
				
			||||||
 | 
					     * @param src_addr    The virtual address to begin reading from.
 | 
				
			||||||
 | 
					     * @param dest_buffer The buffer to place the read bytes into.
 | 
				
			||||||
 | 
					     * @param size        The amount of data to read, in bytes.
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     * @note If a size of 0 is specified, then this function reads nothing and
 | 
				
			||||||
 | 
					     *       no attempts to access memory are made at all.
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     * @pre dest_buffer must be at least size bytes in length, otherwise a
 | 
				
			||||||
 | 
					     *      buffer overrun will occur.
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     * @post The range [dest_buffer, size) contains the read bytes from the
 | 
				
			||||||
 | 
					     *       process' address space.
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    void ReadBlockUnsafe(const Kernel::Process& process, VAddr src_addr, void* dest_buffer,
 | 
				
			||||||
 | 
					                         std::size_t size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * Reads a contiguous block of bytes from the current process' address space.
 | 
					     * Reads a contiguous block of bytes from the current process' address space.
 | 
				
			||||||
     *
 | 
					     *
 | 
				
			||||||
@ -312,6 +333,25 @@ public:
 | 
				
			|||||||
     */
 | 
					     */
 | 
				
			||||||
    void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size);
 | 
					    void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /**
 | 
				
			||||||
 | 
					     * Reads a contiguous block of bytes from the current process' address space.
 | 
				
			||||||
 | 
					     * This unsafe version does not trigger GPU flushing.
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     * @param src_addr    The virtual address to begin reading from.
 | 
				
			||||||
 | 
					     * @param dest_buffer The buffer to place the read bytes into.
 | 
				
			||||||
 | 
					     * @param size        The amount of data to read, in bytes.
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     * @note If a size of 0 is specified, then this function reads nothing and
 | 
				
			||||||
 | 
					     *       no attempts to access memory are made at all.
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     * @pre dest_buffer must be at least size bytes in length, otherwise a
 | 
				
			||||||
 | 
					     *      buffer overrun will occur.
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     * @post The range [dest_buffer, size) contains the read bytes from the
 | 
				
			||||||
 | 
					     *       current process' address space.
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    void ReadBlockUnsafe(VAddr src_addr, void* dest_buffer, std::size_t size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * Writes a range of bytes into a given process' address space at the specified
 | 
					     * Writes a range of bytes into a given process' address space at the specified
 | 
				
			||||||
     * virtual address.
 | 
					     * virtual address.
 | 
				
			||||||
@ -335,6 +375,26 @@ public:
 | 
				
			|||||||
    void WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
 | 
					    void WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
 | 
				
			||||||
                    std::size_t size);
 | 
					                    std::size_t size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /**
 | 
				
			||||||
 | 
					     * Writes a range of bytes into a given process' address space at the specified
 | 
				
			||||||
 | 
					     * virtual address.
 | 
				
			||||||
 | 
					     * This unsafe version does not invalidate GPU Memory.
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     * @param process    The process to write data into the address space of.
 | 
				
			||||||
 | 
					     * @param dest_addr  The destination virtual address to begin writing the data at.
 | 
				
			||||||
 | 
					     * @param src_buffer The data to write into the process' address space.
 | 
				
			||||||
 | 
					     * @param size       The size of the data to write, in bytes.
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     * @post The address range [dest_addr, size) in the process' address space
 | 
				
			||||||
 | 
					     *       contains the data that was within src_buffer.
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     * @post If an attempt is made to write into an unmapped region of memory, the writes
 | 
				
			||||||
 | 
					     *       will be ignored and an error will be logged.
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    void WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
 | 
				
			||||||
 | 
					                          std::size_t size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * Writes a range of bytes into the current process' address space at the specified
 | 
					     * Writes a range of bytes into the current process' address space at the specified
 | 
				
			||||||
     * virtual address.
 | 
					     * virtual address.
 | 
				
			||||||
@ -356,6 +416,24 @@ public:
 | 
				
			|||||||
     */
 | 
					     */
 | 
				
			||||||
    void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size);
 | 
					    void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /**
 | 
				
			||||||
 | 
					     * Writes a range of bytes into the current process' address space at the specified
 | 
				
			||||||
 | 
					     * virtual address.
 | 
				
			||||||
 | 
					     * This unsafe version does not invalidate GPU Memory.
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     * @param dest_addr  The destination virtual address to begin writing the data at.
 | 
				
			||||||
 | 
					     * @param src_buffer The data to write into the current process' address space.
 | 
				
			||||||
 | 
					     * @param size       The size of the data to write, in bytes.
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     * @post The address range [dest_addr, size) in the current process' address space
 | 
				
			||||||
 | 
					     *       contains the data that was within src_buffer.
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     * @post If an attempt is made to write into an unmapped region of memory, the writes
 | 
				
			||||||
 | 
					     *       will be ignored and an error will be logged.
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    void WriteBlockUnsafe(VAddr dest_addr, const void* src_buffer, std::size_t size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * Fills the specified address range within a process' address space with zeroes.
 | 
					     * Fills the specified address range within a process' address space with zeroes.
 | 
				
			||||||
     *
 | 
					     *
 | 
				
			||||||
 | 
				
			|||||||
@ -15,37 +15,29 @@ namespace VideoCommon {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class BufferBlock {
 | 
					class BufferBlock {
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
    bool Overlaps(const CacheAddr start, const CacheAddr end) const {
 | 
					    bool Overlaps(const VAddr start, const VAddr end) const {
 | 
				
			||||||
        return (cache_addr < end) && (cache_addr_end > start);
 | 
					        return (cpu_addr < end) && (cpu_addr_end > start);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const {
 | 
					    bool IsInside(const VAddr other_start, const VAddr other_end) const {
 | 
				
			||||||
        return cache_addr <= other_start && other_end <= cache_addr_end;
 | 
					        return cpu_addr <= other_start && other_end <= cpu_addr_end;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    u8* GetWritableHostPtr() const {
 | 
					    std::size_t GetOffset(const VAddr in_addr) {
 | 
				
			||||||
        return FromCacheAddr(cache_addr);
 | 
					        return static_cast<std::size_t>(in_addr - cpu_addr);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    u8* GetWritableHostPtr(std::size_t offset) const {
 | 
					    VAddr GetCpuAddr() const {
 | 
				
			||||||
        return FromCacheAddr(cache_addr + offset);
 | 
					        return cpu_addr;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::size_t GetOffset(const CacheAddr in_addr) {
 | 
					    VAddr GetCpuAddrEnd() const {
 | 
				
			||||||
        return static_cast<std::size_t>(in_addr - cache_addr);
 | 
					        return cpu_addr_end;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    CacheAddr GetCacheAddr() const {
 | 
					    void SetCpuAddr(const VAddr new_addr) {
 | 
				
			||||||
        return cache_addr;
 | 
					        cpu_addr = new_addr;
 | 
				
			||||||
    }
 | 
					        cpu_addr_end = new_addr + size;
 | 
				
			||||||
 | 
					 | 
				
			||||||
    CacheAddr GetCacheAddrEnd() const {
 | 
					 | 
				
			||||||
        return cache_addr_end;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    void SetCacheAddr(const CacheAddr new_addr) {
 | 
					 | 
				
			||||||
        cache_addr = new_addr;
 | 
					 | 
				
			||||||
        cache_addr_end = new_addr + size;
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::size_t GetSize() const {
 | 
					    std::size_t GetSize() const {
 | 
				
			||||||
@ -61,14 +53,14 @@ public:
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
protected:
 | 
					protected:
 | 
				
			||||||
    explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} {
 | 
					    explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} {
 | 
				
			||||||
        SetCacheAddr(cache_addr);
 | 
					        SetCpuAddr(cpu_addr);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    ~BufferBlock() = default;
 | 
					    ~BufferBlock() = default;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
private:
 | 
					private:
 | 
				
			||||||
    CacheAddr cache_addr{};
 | 
					    VAddr cpu_addr{};
 | 
				
			||||||
    CacheAddr cache_addr_end{};
 | 
					    VAddr cpu_addr_end{};
 | 
				
			||||||
    std::size_t size{};
 | 
					    std::size_t size{};
 | 
				
			||||||
    u64 epoch{};
 | 
					    u64 epoch{};
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
				
			|||||||
@ -19,6 +19,7 @@
 | 
				
			|||||||
#include "common/alignment.h"
 | 
					#include "common/alignment.h"
 | 
				
			||||||
#include "common/common_types.h"
 | 
					#include "common/common_types.h"
 | 
				
			||||||
#include "core/core.h"
 | 
					#include "core/core.h"
 | 
				
			||||||
 | 
					#include "core/memory.h"
 | 
				
			||||||
#include "video_core/buffer_cache/buffer_block.h"
 | 
					#include "video_core/buffer_cache/buffer_block.h"
 | 
				
			||||||
#include "video_core/buffer_cache/map_interval.h"
 | 
					#include "video_core/buffer_cache/map_interval.h"
 | 
				
			||||||
#include "video_core/memory_manager.h"
 | 
					#include "video_core/memory_manager.h"
 | 
				
			||||||
@ -37,28 +38,45 @@ public:
 | 
				
			|||||||
                            bool is_written = false, bool use_fast_cbuf = false) {
 | 
					                            bool is_written = false, bool use_fast_cbuf = false) {
 | 
				
			||||||
        std::lock_guard lock{mutex};
 | 
					        std::lock_guard lock{mutex};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        auto& memory_manager = system.GPU().MemoryManager();
 | 
					        const std::optional<VAddr> cpu_addr_opt =
 | 
				
			||||||
        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
 | 
					            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
 | 
				
			||||||
        if (!host_ptr) {
 | 
					
 | 
				
			||||||
 | 
					        if (!cpu_addr_opt) {
 | 
				
			||||||
            return {GetEmptyBuffer(size), 0};
 | 
					            return {GetEmptyBuffer(size), 0};
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        const auto cache_addr = ToCacheAddr(host_ptr);
 | 
					
 | 
				
			||||||
 | 
					        VAddr cpu_addr = *cpu_addr_opt;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // Cache management is a big overhead, so only cache entries with a given size.
 | 
					        // Cache management is a big overhead, so only cache entries with a given size.
 | 
				
			||||||
        // TODO: Figure out which size is the best for given games.
 | 
					        // TODO: Figure out which size is the best for given games.
 | 
				
			||||||
        constexpr std::size_t max_stream_size = 0x800;
 | 
					        constexpr std::size_t max_stream_size = 0x800;
 | 
				
			||||||
        if (use_fast_cbuf || size < max_stream_size) {
 | 
					        if (use_fast_cbuf || size < max_stream_size) {
 | 
				
			||||||
            if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) {
 | 
					            if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
 | 
				
			||||||
 | 
					                auto& memory_manager = system.GPU().MemoryManager();
 | 
				
			||||||
                if (use_fast_cbuf) {
 | 
					                if (use_fast_cbuf) {
 | 
				
			||||||
                    return ConstBufferUpload(host_ptr, size);
 | 
					                    if (memory_manager.IsGranularRange(gpu_addr, size)) {
 | 
				
			||||||
 | 
					                        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
 | 
				
			||||||
 | 
					                        return ConstBufferUpload(host_ptr, size);
 | 
				
			||||||
 | 
					                    } else {
 | 
				
			||||||
 | 
					                        staging_buffer.resize(size);
 | 
				
			||||||
 | 
					                        memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
 | 
				
			||||||
 | 
					                        return ConstBufferUpload(staging_buffer.data(), size);
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
                } else {
 | 
					                } else {
 | 
				
			||||||
                    return StreamBufferUpload(host_ptr, size, alignment);
 | 
					                    if (memory_manager.IsGranularRange(gpu_addr, size)) {
 | 
				
			||||||
 | 
					                        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
 | 
				
			||||||
 | 
					                        return StreamBufferUpload(host_ptr, size, alignment);
 | 
				
			||||||
 | 
					                    } else {
 | 
				
			||||||
 | 
					                        staging_buffer.resize(size);
 | 
				
			||||||
 | 
					                        memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
 | 
				
			||||||
 | 
					                        return StreamBufferUpload(staging_buffer.data(), size, alignment);
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        auto block = GetBlock(cache_addr, size);
 | 
					        auto block = GetBlock(cpu_addr, size);
 | 
				
			||||||
        auto map = MapAddress(block, gpu_addr, cache_addr, size);
 | 
					        auto map = MapAddress(block, gpu_addr, cpu_addr, size);
 | 
				
			||||||
        if (is_written) {
 | 
					        if (is_written) {
 | 
				
			||||||
            map->MarkAsModified(true, GetModifiedTicks());
 | 
					            map->MarkAsModified(true, GetModifiedTicks());
 | 
				
			||||||
            if (!map->IsWritten()) {
 | 
					            if (!map->IsWritten()) {
 | 
				
			||||||
@ -71,7 +89,7 @@ public:
 | 
				
			|||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        const u64 offset = static_cast<u64>(block->GetOffset(cache_addr));
 | 
					        const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return {ToHandle(block), offset};
 | 
					        return {ToHandle(block), offset};
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -112,7 +130,7 @@ public:
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Write any cached resources overlapping the specified region back to memory
 | 
					    /// Write any cached resources overlapping the specified region back to memory
 | 
				
			||||||
    void FlushRegion(CacheAddr addr, std::size_t size) {
 | 
					    void FlushRegion(VAddr addr, std::size_t size) {
 | 
				
			||||||
        std::lock_guard lock{mutex};
 | 
					        std::lock_guard lock{mutex};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        std::vector<MapInterval> objects = GetMapsInRange(addr, size);
 | 
					        std::vector<MapInterval> objects = GetMapsInRange(addr, size);
 | 
				
			||||||
@ -127,7 +145,7 @@ public:
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Mark the specified region as being invalidated
 | 
					    /// Mark the specified region as being invalidated
 | 
				
			||||||
    void InvalidateRegion(CacheAddr addr, u64 size) {
 | 
					    void InvalidateRegion(VAddr addr, u64 size) {
 | 
				
			||||||
        std::lock_guard lock{mutex};
 | 
					        std::lock_guard lock{mutex};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        std::vector<MapInterval> objects = GetMapsInRange(addr, size);
 | 
					        std::vector<MapInterval> objects = GetMapsInRange(addr, size);
 | 
				
			||||||
@ -152,7 +170,7 @@ protected:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    virtual void WriteBarrier() = 0;
 | 
					    virtual void WriteBarrier() = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0;
 | 
					    virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
 | 
					    virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
 | 
				
			||||||
                                 const u8* data) = 0;
 | 
					                                 const u8* data) = 0;
 | 
				
			||||||
@ -169,20 +187,17 @@ protected:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    /// Register an object into the cache
 | 
					    /// Register an object into the cache
 | 
				
			||||||
    void Register(const MapInterval& new_map, bool inherit_written = false) {
 | 
					    void Register(const MapInterval& new_map, bool inherit_written = false) {
 | 
				
			||||||
        const CacheAddr cache_ptr = new_map->GetStart();
 | 
					        const VAddr cpu_addr = new_map->GetStart();
 | 
				
			||||||
        const std::optional<VAddr> cpu_addr =
 | 
					        if (!cpu_addr) {
 | 
				
			||||||
            system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress());
 | 
					 | 
				
			||||||
        if (!cache_ptr || !cpu_addr) {
 | 
					 | 
				
			||||||
            LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
 | 
					            LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
 | 
				
			||||||
                         new_map->GetGpuAddress());
 | 
					                         new_map->GetGpuAddress());
 | 
				
			||||||
            return;
 | 
					            return;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        const std::size_t size = new_map->GetEnd() - new_map->GetStart();
 | 
					        const std::size_t size = new_map->GetEnd() - new_map->GetStart();
 | 
				
			||||||
        new_map->SetCpuAddress(*cpu_addr);
 | 
					 | 
				
			||||||
        new_map->MarkAsRegistered(true);
 | 
					        new_map->MarkAsRegistered(true);
 | 
				
			||||||
        const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
 | 
					        const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
 | 
				
			||||||
        mapped_addresses.insert({interval, new_map});
 | 
					        mapped_addresses.insert({interval, new_map});
 | 
				
			||||||
        rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
 | 
					        rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
 | 
				
			||||||
        if (inherit_written) {
 | 
					        if (inherit_written) {
 | 
				
			||||||
            MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
 | 
					            MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
 | 
				
			||||||
            new_map->MarkAsWritten(true);
 | 
					            new_map->MarkAsWritten(true);
 | 
				
			||||||
@ -192,7 +207,7 @@ protected:
 | 
				
			|||||||
    /// Unregisters an object from the cache
 | 
					    /// Unregisters an object from the cache
 | 
				
			||||||
    void Unregister(MapInterval& map) {
 | 
					    void Unregister(MapInterval& map) {
 | 
				
			||||||
        const std::size_t size = map->GetEnd() - map->GetStart();
 | 
					        const std::size_t size = map->GetEnd() - map->GetStart();
 | 
				
			||||||
        rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1);
 | 
					        rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
 | 
				
			||||||
        map->MarkAsRegistered(false);
 | 
					        map->MarkAsRegistered(false);
 | 
				
			||||||
        if (map->IsWritten()) {
 | 
					        if (map->IsWritten()) {
 | 
				
			||||||
            UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
 | 
					            UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
 | 
				
			||||||
@ -202,32 +217,39 @@ protected:
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
private:
 | 
					private:
 | 
				
			||||||
    MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) {
 | 
					    MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) {
 | 
				
			||||||
        return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
 | 
					        return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr,
 | 
					    MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr,
 | 
				
			||||||
                           const CacheAddr cache_addr, const std::size_t size) {
 | 
					                           const std::size_t size) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size);
 | 
					        std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size);
 | 
				
			||||||
        if (overlaps.empty()) {
 | 
					        if (overlaps.empty()) {
 | 
				
			||||||
            const CacheAddr cache_addr_end = cache_addr + size;
 | 
					            auto& memory_manager = system.GPU().MemoryManager();
 | 
				
			||||||
            MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr);
 | 
					            const VAddr cpu_addr_end = cpu_addr + size;
 | 
				
			||||||
            u8* host_ptr = FromCacheAddr(cache_addr);
 | 
					            MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr);
 | 
				
			||||||
            UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr);
 | 
					            if (memory_manager.IsGranularRange(gpu_addr, size)) {
 | 
				
			||||||
 | 
					                u8* host_ptr = memory_manager.GetPointer(gpu_addr);
 | 
				
			||||||
 | 
					                UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr);
 | 
				
			||||||
 | 
					            } else {
 | 
				
			||||||
 | 
					                staging_buffer.resize(size);
 | 
				
			||||||
 | 
					                memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
 | 
				
			||||||
 | 
					                UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data());
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
            Register(new_map);
 | 
					            Register(new_map);
 | 
				
			||||||
            return new_map;
 | 
					            return new_map;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        const CacheAddr cache_addr_end = cache_addr + size;
 | 
					        const VAddr cpu_addr_end = cpu_addr + size;
 | 
				
			||||||
        if (overlaps.size() == 1) {
 | 
					        if (overlaps.size() == 1) {
 | 
				
			||||||
            MapInterval& current_map = overlaps[0];
 | 
					            MapInterval& current_map = overlaps[0];
 | 
				
			||||||
            if (current_map->IsInside(cache_addr, cache_addr_end)) {
 | 
					            if (current_map->IsInside(cpu_addr, cpu_addr_end)) {
 | 
				
			||||||
                return current_map;
 | 
					                return current_map;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        CacheAddr new_start = cache_addr;
 | 
					        VAddr new_start = cpu_addr;
 | 
				
			||||||
        CacheAddr new_end = cache_addr_end;
 | 
					        VAddr new_end = cpu_addr_end;
 | 
				
			||||||
        bool write_inheritance = false;
 | 
					        bool write_inheritance = false;
 | 
				
			||||||
        bool modified_inheritance = false;
 | 
					        bool modified_inheritance = false;
 | 
				
			||||||
        // Calculate new buffer parameters
 | 
					        // Calculate new buffer parameters
 | 
				
			||||||
@ -237,7 +259,7 @@ private:
 | 
				
			|||||||
            write_inheritance |= overlap->IsWritten();
 | 
					            write_inheritance |= overlap->IsWritten();
 | 
				
			||||||
            modified_inheritance |= overlap->IsModified();
 | 
					            modified_inheritance |= overlap->IsModified();
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr;
 | 
					        GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr;
 | 
				
			||||||
        for (auto& overlap : overlaps) {
 | 
					        for (auto& overlap : overlaps) {
 | 
				
			||||||
            Unregister(overlap);
 | 
					            Unregister(overlap);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
@ -250,7 +272,7 @@ private:
 | 
				
			|||||||
        return new_map;
 | 
					        return new_map;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end,
 | 
					    void UpdateBlock(const TBuffer& block, VAddr start, VAddr end,
 | 
				
			||||||
                     std::vector<MapInterval>& overlaps) {
 | 
					                     std::vector<MapInterval>& overlaps) {
 | 
				
			||||||
        const IntervalType base_interval{start, end};
 | 
					        const IntervalType base_interval{start, end};
 | 
				
			||||||
        IntervalSet interval_set{};
 | 
					        IntervalSet interval_set{};
 | 
				
			||||||
@ -262,13 +284,15 @@ private:
 | 
				
			|||||||
        for (auto& interval : interval_set) {
 | 
					        for (auto& interval : interval_set) {
 | 
				
			||||||
            std::size_t size = interval.upper() - interval.lower();
 | 
					            std::size_t size = interval.upper() - interval.lower();
 | 
				
			||||||
            if (size > 0) {
 | 
					            if (size > 0) {
 | 
				
			||||||
                u8* host_ptr = FromCacheAddr(interval.lower());
 | 
					                staging_buffer.resize(size);
 | 
				
			||||||
                UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr);
 | 
					                system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
 | 
				
			||||||
 | 
					                UploadBlockData(block, block->GetOffset(interval.lower()), size,
 | 
				
			||||||
 | 
					                                staging_buffer.data());
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) {
 | 
					    std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) {
 | 
				
			||||||
        if (size == 0) {
 | 
					        if (size == 0) {
 | 
				
			||||||
            return {};
 | 
					            return {};
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
@ -290,8 +314,9 @@ private:
 | 
				
			|||||||
    void FlushMap(MapInterval map) {
 | 
					    void FlushMap(MapInterval map) {
 | 
				
			||||||
        std::size_t size = map->GetEnd() - map->GetStart();
 | 
					        std::size_t size = map->GetEnd() - map->GetStart();
 | 
				
			||||||
        TBuffer block = blocks[map->GetStart() >> block_page_bits];
 | 
					        TBuffer block = blocks[map->GetStart() >> block_page_bits];
 | 
				
			||||||
        u8* host_ptr = FromCacheAddr(map->GetStart());
 | 
					        staging_buffer.resize(size);
 | 
				
			||||||
        DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr);
 | 
					        DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data());
 | 
				
			||||||
 | 
					        system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size);
 | 
				
			||||||
        map->MarkAsModified(false, 0);
 | 
					        map->MarkAsModified(false, 0);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -316,14 +341,14 @@ private:
 | 
				
			|||||||
    TBuffer EnlargeBlock(TBuffer buffer) {
 | 
					    TBuffer EnlargeBlock(TBuffer buffer) {
 | 
				
			||||||
        const std::size_t old_size = buffer->GetSize();
 | 
					        const std::size_t old_size = buffer->GetSize();
 | 
				
			||||||
        const std::size_t new_size = old_size + block_page_size;
 | 
					        const std::size_t new_size = old_size + block_page_size;
 | 
				
			||||||
        const CacheAddr cache_addr = buffer->GetCacheAddr();
 | 
					        const VAddr cpu_addr = buffer->GetCpuAddr();
 | 
				
			||||||
        TBuffer new_buffer = CreateBlock(cache_addr, new_size);
 | 
					        TBuffer new_buffer = CreateBlock(cpu_addr, new_size);
 | 
				
			||||||
        CopyBlock(buffer, new_buffer, 0, 0, old_size);
 | 
					        CopyBlock(buffer, new_buffer, 0, 0, old_size);
 | 
				
			||||||
        buffer->SetEpoch(epoch);
 | 
					        buffer->SetEpoch(epoch);
 | 
				
			||||||
        pending_destruction.push_back(buffer);
 | 
					        pending_destruction.push_back(buffer);
 | 
				
			||||||
        const CacheAddr cache_addr_end = cache_addr + new_size - 1;
 | 
					        const VAddr cpu_addr_end = cpu_addr + new_size - 1;
 | 
				
			||||||
        u64 page_start = cache_addr >> block_page_bits;
 | 
					        u64 page_start = cpu_addr >> block_page_bits;
 | 
				
			||||||
        const u64 page_end = cache_addr_end >> block_page_bits;
 | 
					        const u64 page_end = cpu_addr_end >> block_page_bits;
 | 
				
			||||||
        while (page_start <= page_end) {
 | 
					        while (page_start <= page_end) {
 | 
				
			||||||
            blocks[page_start] = new_buffer;
 | 
					            blocks[page_start] = new_buffer;
 | 
				
			||||||
            ++page_start;
 | 
					            ++page_start;
 | 
				
			||||||
@ -334,9 +359,9 @@ private:
 | 
				
			|||||||
    TBuffer MergeBlocks(TBuffer first, TBuffer second) {
 | 
					    TBuffer MergeBlocks(TBuffer first, TBuffer second) {
 | 
				
			||||||
        const std::size_t size_1 = first->GetSize();
 | 
					        const std::size_t size_1 = first->GetSize();
 | 
				
			||||||
        const std::size_t size_2 = second->GetSize();
 | 
					        const std::size_t size_2 = second->GetSize();
 | 
				
			||||||
        const CacheAddr first_addr = first->GetCacheAddr();
 | 
					        const VAddr first_addr = first->GetCpuAddr();
 | 
				
			||||||
        const CacheAddr second_addr = second->GetCacheAddr();
 | 
					        const VAddr second_addr = second->GetCpuAddr();
 | 
				
			||||||
        const CacheAddr new_addr = std::min(first_addr, second_addr);
 | 
					        const VAddr new_addr = std::min(first_addr, second_addr);
 | 
				
			||||||
        const std::size_t new_size = size_1 + size_2;
 | 
					        const std::size_t new_size = size_1 + size_2;
 | 
				
			||||||
        TBuffer new_buffer = CreateBlock(new_addr, new_size);
 | 
					        TBuffer new_buffer = CreateBlock(new_addr, new_size);
 | 
				
			||||||
        CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
 | 
					        CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
 | 
				
			||||||
@ -345,9 +370,9 @@ private:
 | 
				
			|||||||
        second->SetEpoch(epoch);
 | 
					        second->SetEpoch(epoch);
 | 
				
			||||||
        pending_destruction.push_back(first);
 | 
					        pending_destruction.push_back(first);
 | 
				
			||||||
        pending_destruction.push_back(second);
 | 
					        pending_destruction.push_back(second);
 | 
				
			||||||
        const CacheAddr cache_addr_end = new_addr + new_size - 1;
 | 
					        const VAddr cpu_addr_end = new_addr + new_size - 1;
 | 
				
			||||||
        u64 page_start = new_addr >> block_page_bits;
 | 
					        u64 page_start = new_addr >> block_page_bits;
 | 
				
			||||||
        const u64 page_end = cache_addr_end >> block_page_bits;
 | 
					        const u64 page_end = cpu_addr_end >> block_page_bits;
 | 
				
			||||||
        while (page_start <= page_end) {
 | 
					        while (page_start <= page_end) {
 | 
				
			||||||
            blocks[page_start] = new_buffer;
 | 
					            blocks[page_start] = new_buffer;
 | 
				
			||||||
            ++page_start;
 | 
					            ++page_start;
 | 
				
			||||||
@ -355,18 +380,18 @@ private:
 | 
				
			|||||||
        return new_buffer;
 | 
					        return new_buffer;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) {
 | 
					    TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
 | 
				
			||||||
        TBuffer found{};
 | 
					        TBuffer found{};
 | 
				
			||||||
        const CacheAddr cache_addr_end = cache_addr + size - 1;
 | 
					        const VAddr cpu_addr_end = cpu_addr + size - 1;
 | 
				
			||||||
        u64 page_start = cache_addr >> block_page_bits;
 | 
					        u64 page_start = cpu_addr >> block_page_bits;
 | 
				
			||||||
        const u64 page_end = cache_addr_end >> block_page_bits;
 | 
					        const u64 page_end = cpu_addr_end >> block_page_bits;
 | 
				
			||||||
        while (page_start <= page_end) {
 | 
					        while (page_start <= page_end) {
 | 
				
			||||||
            auto it = blocks.find(page_start);
 | 
					            auto it = blocks.find(page_start);
 | 
				
			||||||
            if (it == blocks.end()) {
 | 
					            if (it == blocks.end()) {
 | 
				
			||||||
                if (found) {
 | 
					                if (found) {
 | 
				
			||||||
                    found = EnlargeBlock(found);
 | 
					                    found = EnlargeBlock(found);
 | 
				
			||||||
                } else {
 | 
					                } else {
 | 
				
			||||||
                    const CacheAddr start_addr = (page_start << block_page_bits);
 | 
					                    const VAddr start_addr = (page_start << block_page_bits);
 | 
				
			||||||
                    found = CreateBlock(start_addr, block_page_size);
 | 
					                    found = CreateBlock(start_addr, block_page_size);
 | 
				
			||||||
                    blocks[page_start] = found;
 | 
					                    blocks[page_start] = found;
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
@ -386,7 +411,7 @@ private:
 | 
				
			|||||||
        return found;
 | 
					        return found;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) {
 | 
					    void MarkRegionAsWritten(const VAddr start, const VAddr end) {
 | 
				
			||||||
        u64 page_start = start >> write_page_bit;
 | 
					        u64 page_start = start >> write_page_bit;
 | 
				
			||||||
        const u64 page_end = end >> write_page_bit;
 | 
					        const u64 page_end = end >> write_page_bit;
 | 
				
			||||||
        while (page_start <= page_end) {
 | 
					        while (page_start <= page_end) {
 | 
				
			||||||
@ -400,7 +425,7 @@ private:
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) {
 | 
					    void UnmarkRegionAsWritten(const VAddr start, const VAddr end) {
 | 
				
			||||||
        u64 page_start = start >> write_page_bit;
 | 
					        u64 page_start = start >> write_page_bit;
 | 
				
			||||||
        const u64 page_end = end >> write_page_bit;
 | 
					        const u64 page_end = end >> write_page_bit;
 | 
				
			||||||
        while (page_start <= page_end) {
 | 
					        while (page_start <= page_end) {
 | 
				
			||||||
@ -416,7 +441,7 @@ private:
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const {
 | 
					    bool IsRegionWritten(const VAddr start, const VAddr end) const {
 | 
				
			||||||
        u64 page_start = start >> write_page_bit;
 | 
					        u64 page_start = start >> write_page_bit;
 | 
				
			||||||
        const u64 page_end = end >> write_page_bit;
 | 
					        const u64 page_end = end >> write_page_bit;
 | 
				
			||||||
        while (page_start <= page_end) {
 | 
					        while (page_start <= page_end) {
 | 
				
			||||||
@ -440,8 +465,8 @@ private:
 | 
				
			|||||||
    u64 buffer_offset = 0;
 | 
					    u64 buffer_offset = 0;
 | 
				
			||||||
    u64 buffer_offset_base = 0;
 | 
					    u64 buffer_offset_base = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    using IntervalSet = boost::icl::interval_set<CacheAddr>;
 | 
					    using IntervalSet = boost::icl::interval_set<VAddr>;
 | 
				
			||||||
    using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>;
 | 
					    using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>;
 | 
				
			||||||
    using IntervalType = typename IntervalCache::interval_type;
 | 
					    using IntervalType = typename IntervalCache::interval_type;
 | 
				
			||||||
    IntervalCache mapped_addresses;
 | 
					    IntervalCache mapped_addresses;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -456,6 +481,8 @@ private:
 | 
				
			|||||||
    u64 epoch = 0;
 | 
					    u64 epoch = 0;
 | 
				
			||||||
    u64 modified_ticks = 0;
 | 
					    u64 modified_ticks = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::vector<u8> staging_buffer;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::recursive_mutex mutex;
 | 
					    std::recursive_mutex mutex;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -11,7 +11,7 @@ namespace VideoCommon {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class MapIntervalBase {
 | 
					class MapIntervalBase {
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
    MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr)
 | 
					    MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr)
 | 
				
			||||||
        : start{start}, end{end}, gpu_addr{gpu_addr} {}
 | 
					        : start{start}, end{end}, gpu_addr{gpu_addr} {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void SetCpuAddress(VAddr new_cpu_addr) {
 | 
					    void SetCpuAddress(VAddr new_cpu_addr) {
 | 
				
			||||||
@ -26,7 +26,7 @@ public:
 | 
				
			|||||||
        return gpu_addr;
 | 
					        return gpu_addr;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const {
 | 
					    bool IsInside(const VAddr other_start, const VAddr other_end) const {
 | 
				
			||||||
        return (start <= other_start && other_end <= end);
 | 
					        return (start <= other_start && other_end <= end);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -46,11 +46,11 @@ public:
 | 
				
			|||||||
        return is_registered;
 | 
					        return is_registered;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    CacheAddr GetStart() const {
 | 
					    VAddr GetStart() const {
 | 
				
			||||||
        return start;
 | 
					        return start;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    CacheAddr GetEnd() const {
 | 
					    VAddr GetEnd() const {
 | 
				
			||||||
        return end;
 | 
					        return end;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -76,8 +76,8 @@ public:
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
private:
 | 
					private:
 | 
				
			||||||
    CacheAddr start;
 | 
					    VAddr start;
 | 
				
			||||||
    CacheAddr end;
 | 
					    VAddr end;
 | 
				
			||||||
    GPUVAddr gpu_addr;
 | 
					    GPUVAddr gpu_addr;
 | 
				
			||||||
    VAddr cpu_addr{};
 | 
					    VAddr cpu_addr{};
 | 
				
			||||||
    bool is_written{};
 | 
					    bool is_written{};
 | 
				
			||||||
 | 
				
			|||||||
@ -270,13 +270,13 @@ public:
 | 
				
			|||||||
    virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
 | 
					    virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | 
					    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | 
				
			||||||
    virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
 | 
					    virtual void FlushRegion(VAddr addr, u64 size) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Notify rasterizer that any caches of the specified region should be invalidated
 | 
					    /// Notify rasterizer that any caches of the specified region should be invalidated
 | 
				
			||||||
    virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
 | 
					    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
 | 
					    /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
 | 
				
			||||||
    virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
 | 
					    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
protected:
 | 
					protected:
 | 
				
			||||||
    virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0;
 | 
					    virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0;
 | 
				
			||||||
 | 
				
			|||||||
@ -30,15 +30,15 @@ void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
 | 
				
			|||||||
    gpu_thread.SwapBuffers(framebuffer);
 | 
					    gpu_thread.SwapBuffers(framebuffer);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
 | 
					void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
 | 
				
			||||||
    gpu_thread.FlushRegion(addr, size);
 | 
					    gpu_thread.FlushRegion(addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) {
 | 
					void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
 | 
				
			||||||
    gpu_thread.InvalidateRegion(addr, size);
 | 
					    gpu_thread.InvalidateRegion(addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
 | 
					void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
 | 
				
			||||||
    gpu_thread.FlushAndInvalidateRegion(addr, size);
 | 
					    gpu_thread.FlushAndInvalidateRegion(addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -27,9 +27,9 @@ public:
 | 
				
			|||||||
    void Start() override;
 | 
					    void Start() override;
 | 
				
			||||||
    void PushGPUEntries(Tegra::CommandList&& entries) override;
 | 
					    void PushGPUEntries(Tegra::CommandList&& entries) override;
 | 
				
			||||||
    void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
 | 
					    void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
 | 
				
			||||||
    void FlushRegion(CacheAddr addr, u64 size) override;
 | 
					    void FlushRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
    void InvalidateRegion(CacheAddr addr, u64 size) override;
 | 
					    void InvalidateRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
 | 
					    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
    void WaitIdle() const override;
 | 
					    void WaitIdle() const override;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
protected:
 | 
					protected:
 | 
				
			||||||
 | 
				
			|||||||
@ -26,15 +26,15 @@ void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
 | 
				
			|||||||
    renderer->SwapBuffers(framebuffer);
 | 
					    renderer->SwapBuffers(framebuffer);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
 | 
					void GPUSynch::FlushRegion(VAddr addr, u64 size) {
 | 
				
			||||||
    renderer->Rasterizer().FlushRegion(addr, size);
 | 
					    renderer->Rasterizer().FlushRegion(addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) {
 | 
					void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
 | 
				
			||||||
    renderer->Rasterizer().InvalidateRegion(addr, size);
 | 
					    renderer->Rasterizer().InvalidateRegion(addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
 | 
					void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
 | 
				
			||||||
    renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);
 | 
					    renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -26,9 +26,9 @@ public:
 | 
				
			|||||||
    void Start() override;
 | 
					    void Start() override;
 | 
				
			||||||
    void PushGPUEntries(Tegra::CommandList&& entries) override;
 | 
					    void PushGPUEntries(Tegra::CommandList&& entries) override;
 | 
				
			||||||
    void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
 | 
					    void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
 | 
				
			||||||
    void FlushRegion(CacheAddr addr, u64 size) override;
 | 
					    void FlushRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
    void InvalidateRegion(CacheAddr addr, u64 size) override;
 | 
					    void InvalidateRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
 | 
					    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
    void WaitIdle() const override {}
 | 
					    void WaitIdle() const override {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
protected:
 | 
					protected:
 | 
				
			||||||
 | 
				
			|||||||
@ -77,15 +77,15 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
 | 
				
			|||||||
    PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt));
 | 
					    PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
 | 
					void ThreadManager::FlushRegion(VAddr addr, u64 size) {
 | 
				
			||||||
    PushCommand(FlushRegionCommand(addr, size));
 | 
					    PushCommand(FlushRegionCommand(addr, size));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
 | 
					void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
 | 
				
			||||||
    system.Renderer().Rasterizer().InvalidateRegion(addr, size);
 | 
					    system.Renderer().Rasterizer().InvalidateRegion(addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
 | 
					void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
 | 
				
			||||||
    // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
 | 
					    // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
 | 
				
			||||||
    InvalidateRegion(addr, size);
 | 
					    InvalidateRegion(addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
@ -47,26 +47,26 @@ struct SwapBuffersCommand final {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
/// Command to signal to the GPU thread to flush a region
 | 
					/// Command to signal to the GPU thread to flush a region
 | 
				
			||||||
struct FlushRegionCommand final {
 | 
					struct FlushRegionCommand final {
 | 
				
			||||||
    explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
 | 
					    explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    CacheAddr addr;
 | 
					    VAddr addr;
 | 
				
			||||||
    u64 size;
 | 
					    u64 size;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/// Command to signal to the GPU thread to invalidate a region
 | 
					/// Command to signal to the GPU thread to invalidate a region
 | 
				
			||||||
struct InvalidateRegionCommand final {
 | 
					struct InvalidateRegionCommand final {
 | 
				
			||||||
    explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
 | 
					    explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    CacheAddr addr;
 | 
					    VAddr addr;
 | 
				
			||||||
    u64 size;
 | 
					    u64 size;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/// Command to signal to the GPU thread to flush and invalidate a region
 | 
					/// Command to signal to the GPU thread to flush and invalidate a region
 | 
				
			||||||
struct FlushAndInvalidateRegionCommand final {
 | 
					struct FlushAndInvalidateRegionCommand final {
 | 
				
			||||||
    explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size)
 | 
					    explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
 | 
				
			||||||
        : addr{addr}, size{size} {}
 | 
					        : addr{addr}, size{size} {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    CacheAddr addr;
 | 
					    VAddr addr;
 | 
				
			||||||
    u64 size;
 | 
					    u64 size;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -111,13 +111,13 @@ public:
 | 
				
			|||||||
    void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
 | 
					    void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | 
					    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | 
				
			||||||
    void FlushRegion(CacheAddr addr, u64 size);
 | 
					    void FlushRegion(VAddr addr, u64 size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Notify rasterizer that any caches of the specified region should be invalidated
 | 
					    /// Notify rasterizer that any caches of the specified region should be invalidated
 | 
				
			||||||
    void InvalidateRegion(CacheAddr addr, u64 size);
 | 
					    void InvalidateRegion(VAddr addr, u64 size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
 | 
					    /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
 | 
				
			||||||
    void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
 | 
					    void FlushAndInvalidateRegion(VAddr addr, u64 size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Wait until the gpu thread is idle.
 | 
					    // Wait until the gpu thread is idle.
 | 
				
			||||||
    void WaitIdle() const;
 | 
					    void WaitIdle() const;
 | 
				
			||||||
 | 
				
			|||||||
@ -81,12 +81,11 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
 | 
				
			|||||||
    ASSERT((gpu_addr & page_mask) == 0);
 | 
					    ASSERT((gpu_addr & page_mask) == 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const u64 aligned_size{Common::AlignUp(size, page_size)};
 | 
					    const u64 aligned_size{Common::AlignUp(size, page_size)};
 | 
				
			||||||
    const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
 | 
					 | 
				
			||||||
    const auto cpu_addr = GpuToCpuAddress(gpu_addr);
 | 
					    const auto cpu_addr = GpuToCpuAddress(gpu_addr);
 | 
				
			||||||
    ASSERT(cpu_addr);
 | 
					    ASSERT(cpu_addr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Flush and invalidate through the GPU interface, to be asynchronous if possible.
 | 
					    // Flush and invalidate through the GPU interface, to be asynchronous if possible.
 | 
				
			||||||
    system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size);
 | 
					    system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    UnmapRange(gpu_addr, aligned_size);
 | 
					    UnmapRange(gpu_addr, aligned_size);
 | 
				
			||||||
    ASSERT(system.CurrentProcess()
 | 
					    ASSERT(system.CurrentProcess()
 | 
				
			||||||
@ -140,11 +139,11 @@ T MemoryManager::Read(GPUVAddr addr) const {
 | 
				
			|||||||
        return {};
 | 
					        return {};
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const u8* page_pointer{page_table.pointers[addr >> page_bits]};
 | 
					    const u8* page_pointer{GetPointer(addr)};
 | 
				
			||||||
    if (page_pointer) {
 | 
					    if (page_pointer) {
 | 
				
			||||||
        // NOTE: Avoid adding any extra logic to this fast-path block
 | 
					        // NOTE: Avoid adding any extra logic to this fast-path block
 | 
				
			||||||
        T value;
 | 
					        T value;
 | 
				
			||||||
        std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T));
 | 
					        std::memcpy(&value, page_pointer, sizeof(T));
 | 
				
			||||||
        return value;
 | 
					        return value;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -167,10 +166,10 @@ void MemoryManager::Write(GPUVAddr addr, T data) {
 | 
				
			|||||||
        return;
 | 
					        return;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    u8* page_pointer{page_table.pointers[addr >> page_bits]};
 | 
					    u8* page_pointer{GetPointer(addr)};
 | 
				
			||||||
    if (page_pointer) {
 | 
					    if (page_pointer) {
 | 
				
			||||||
        // NOTE: Avoid adding any extra logic to this fast-path block
 | 
					        // NOTE: Avoid adding any extra logic to this fast-path block
 | 
				
			||||||
        std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T));
 | 
					        std::memcpy(page_pointer, &data, sizeof(T));
 | 
				
			||||||
        return;
 | 
					        return;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -201,9 +200,12 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) {
 | 
				
			|||||||
        return {};
 | 
					        return {};
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    u8* const page_pointer{page_table.pointers[addr >> page_bits]};
 | 
					    auto& memory = system.Memory();
 | 
				
			||||||
    if (page_pointer != nullptr) {
 | 
					
 | 
				
			||||||
        return page_pointer + (addr & page_mask);
 | 
					    const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (page_addr != 0) {
 | 
				
			||||||
 | 
					        return memory.GetPointer(page_addr + (addr & page_mask));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
 | 
					    LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
 | 
				
			||||||
@ -215,9 +217,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
 | 
				
			|||||||
        return {};
 | 
					        return {};
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const u8* const page_pointer{page_table.pointers[addr >> page_bits]};
 | 
					    const auto& memory = system.Memory();
 | 
				
			||||||
    if (page_pointer != nullptr) {
 | 
					
 | 
				
			||||||
        return page_pointer + (addr & page_mask);
 | 
					    const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (page_addr != 0) {
 | 
				
			||||||
 | 
					        return memory.GetPointer(page_addr + (addr & page_mask));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
 | 
					    LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
 | 
				
			||||||
@ -238,17 +243,19 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
 | 
				
			|||||||
    std::size_t page_index{src_addr >> page_bits};
 | 
					    std::size_t page_index{src_addr >> page_bits};
 | 
				
			||||||
    std::size_t page_offset{src_addr & page_mask};
 | 
					    std::size_t page_offset{src_addr & page_mask};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    auto& memory = system.Memory();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    while (remaining_size > 0) {
 | 
					    while (remaining_size > 0) {
 | 
				
			||||||
        const std::size_t copy_amount{
 | 
					        const std::size_t copy_amount{
 | 
				
			||||||
            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
 | 
					            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        switch (page_table.attributes[page_index]) {
 | 
					        switch (page_table.attributes[page_index]) {
 | 
				
			||||||
        case Common::PageType::Memory: {
 | 
					        case Common::PageType::Memory: {
 | 
				
			||||||
            const u8* src_ptr{page_table.pointers[page_index] + page_offset};
 | 
					            const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
 | 
				
			||||||
            // Flush must happen on the rasterizer interface, such that memory is always synchronous
 | 
					            // Flush must happen on the rasterizer interface, such that memory is always synchronous
 | 
				
			||||||
            // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu.
 | 
					            // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu.
 | 
				
			||||||
            rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
 | 
					            rasterizer.FlushRegion(src_addr, copy_amount);
 | 
				
			||||||
            std::memcpy(dest_buffer, src_ptr, copy_amount);
 | 
					            memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
 | 
				
			||||||
            break;
 | 
					            break;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        default:
 | 
					        default:
 | 
				
			||||||
@ -268,13 +275,15 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
 | 
				
			|||||||
    std::size_t page_index{src_addr >> page_bits};
 | 
					    std::size_t page_index{src_addr >> page_bits};
 | 
				
			||||||
    std::size_t page_offset{src_addr & page_mask};
 | 
					    std::size_t page_offset{src_addr & page_mask};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    auto& memory = system.Memory();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    while (remaining_size > 0) {
 | 
					    while (remaining_size > 0) {
 | 
				
			||||||
        const std::size_t copy_amount{
 | 
					        const std::size_t copy_amount{
 | 
				
			||||||
            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
 | 
					            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
 | 
				
			||||||
        const u8* page_pointer = page_table.pointers[page_index];
 | 
					        const u8* page_pointer = page_table.pointers[page_index];
 | 
				
			||||||
        if (page_pointer) {
 | 
					        if (page_pointer) {
 | 
				
			||||||
            const u8* src_ptr{page_pointer + page_offset};
 | 
					            const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
 | 
				
			||||||
            std::memcpy(dest_buffer, src_ptr, copy_amount);
 | 
					            memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            std::memset(dest_buffer, 0, copy_amount);
 | 
					            std::memset(dest_buffer, 0, copy_amount);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
@ -290,17 +299,19 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
 | 
				
			|||||||
    std::size_t page_index{dest_addr >> page_bits};
 | 
					    std::size_t page_index{dest_addr >> page_bits};
 | 
				
			||||||
    std::size_t page_offset{dest_addr & page_mask};
 | 
					    std::size_t page_offset{dest_addr & page_mask};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    auto& memory = system.Memory();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    while (remaining_size > 0) {
 | 
					    while (remaining_size > 0) {
 | 
				
			||||||
        const std::size_t copy_amount{
 | 
					        const std::size_t copy_amount{
 | 
				
			||||||
            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
 | 
					            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        switch (page_table.attributes[page_index]) {
 | 
					        switch (page_table.attributes[page_index]) {
 | 
				
			||||||
        case Common::PageType::Memory: {
 | 
					        case Common::PageType::Memory: {
 | 
				
			||||||
            u8* dest_ptr{page_table.pointers[page_index] + page_offset};
 | 
					            const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
 | 
				
			||||||
            // Invalidate must happen on the rasterizer interface, such that memory is always
 | 
					            // Invalidate must happen on the rasterizer interface, such that memory is always
 | 
				
			||||||
            // synchronous when it is written (even when in asynchronous GPU mode).
 | 
					            // synchronous when it is written (even when in asynchronous GPU mode).
 | 
				
			||||||
            rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
 | 
					            rasterizer.InvalidateRegion(dest_addr, copy_amount);
 | 
				
			||||||
            std::memcpy(dest_ptr, src_buffer, copy_amount);
 | 
					            memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
 | 
				
			||||||
            break;
 | 
					            break;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        default:
 | 
					        default:
 | 
				
			||||||
@ -320,13 +331,15 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
 | 
				
			|||||||
    std::size_t page_index{dest_addr >> page_bits};
 | 
					    std::size_t page_index{dest_addr >> page_bits};
 | 
				
			||||||
    std::size_t page_offset{dest_addr & page_mask};
 | 
					    std::size_t page_offset{dest_addr & page_mask};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    auto& memory = system.Memory();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    while (remaining_size > 0) {
 | 
					    while (remaining_size > 0) {
 | 
				
			||||||
        const std::size_t copy_amount{
 | 
					        const std::size_t copy_amount{
 | 
				
			||||||
            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
 | 
					            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
 | 
				
			||||||
        u8* page_pointer = page_table.pointers[page_index];
 | 
					        u8* page_pointer = page_table.pointers[page_index];
 | 
				
			||||||
        if (page_pointer) {
 | 
					        if (page_pointer) {
 | 
				
			||||||
            u8* dest_ptr{page_pointer + page_offset};
 | 
					            const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
 | 
				
			||||||
            std::memcpy(dest_ptr, src_buffer, copy_amount);
 | 
					            memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        page_index++;
 | 
					        page_index++;
 | 
				
			||||||
        page_offset = 0;
 | 
					        page_offset = 0;
 | 
				
			||||||
@ -336,33 +349,9 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
 | 
					void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
 | 
				
			||||||
    std::size_t remaining_size{size};
 | 
					    std::vector<u8> tmp_buffer(size);
 | 
				
			||||||
    std::size_t page_index{src_addr >> page_bits};
 | 
					    ReadBlock(src_addr, tmp_buffer.data(), size);
 | 
				
			||||||
    std::size_t page_offset{src_addr & page_mask};
 | 
					    WriteBlock(dest_addr, tmp_buffer.data(), size);
 | 
				
			||||||
 | 
					 | 
				
			||||||
    while (remaining_size > 0) {
 | 
					 | 
				
			||||||
        const std::size_t copy_amount{
 | 
					 | 
				
			||||||
            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        switch (page_table.attributes[page_index]) {
 | 
					 | 
				
			||||||
        case Common::PageType::Memory: {
 | 
					 | 
				
			||||||
            // Flush must happen on the rasterizer interface, such that memory is always synchronous
 | 
					 | 
				
			||||||
            // when it is copied (even when in asynchronous GPU mode).
 | 
					 | 
				
			||||||
            const u8* src_ptr{page_table.pointers[page_index] + page_offset};
 | 
					 | 
				
			||||||
            rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
 | 
					 | 
				
			||||||
            WriteBlock(dest_addr, src_ptr, copy_amount);
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        default:
 | 
					 | 
				
			||||||
            UNREACHABLE();
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        page_index++;
 | 
					 | 
				
			||||||
        page_offset = 0;
 | 
					 | 
				
			||||||
        dest_addr += static_cast<VAddr>(copy_amount);
 | 
					 | 
				
			||||||
        src_addr += static_cast<VAddr>(copy_amount);
 | 
					 | 
				
			||||||
        remaining_size -= copy_amount;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
 | 
					void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
 | 
				
			||||||
@ -371,6 +360,12 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const
 | 
				
			|||||||
    WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size);
 | 
					    WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
 | 
				
			||||||
 | 
					    const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits];
 | 
				
			||||||
 | 
					    const std::size_t page = (addr & Memory::PAGE_MASK) + size;
 | 
				
			||||||
 | 
					    return page <= Memory::PAGE_SIZE;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
 | 
					void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
 | 
				
			||||||
                             VAddr backing_addr) {
 | 
					                             VAddr backing_addr) {
 | 
				
			||||||
    LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
 | 
					    LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
 | 
				
			||||||
 | 
				
			|||||||
@ -97,6 +97,11 @@ public:
 | 
				
			|||||||
    void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
 | 
					    void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
 | 
				
			||||||
    void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
 | 
					    void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /**
 | 
				
			||||||
 | 
					     * IsGranularRange checks if a gpu region can be simply read with a pointer
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
private:
 | 
					private:
 | 
				
			||||||
    using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
 | 
					    using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
 | 
				
			||||||
    using VMAHandle = VMAMap::const_iterator;
 | 
					    using VMAHandle = VMAMap::const_iterator;
 | 
				
			||||||
 | 
				
			|||||||
@ -98,12 +98,12 @@ public:
 | 
				
			|||||||
                                                      static_cast<QueryCache&>(*this),
 | 
					                                                      static_cast<QueryCache&>(*this),
 | 
				
			||||||
                                                      VideoCore::QueryType::SamplesPassed}}} {}
 | 
					                                                      VideoCore::QueryType::SamplesPassed}}} {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void InvalidateRegion(CacheAddr addr, std::size_t size) {
 | 
					    void InvalidateRegion(VAddr addr, std::size_t size) {
 | 
				
			||||||
        std::unique_lock lock{mutex};
 | 
					        std::unique_lock lock{mutex};
 | 
				
			||||||
        FlushAndRemoveRegion(addr, size);
 | 
					        FlushAndRemoveRegion(addr, size);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void FlushRegion(CacheAddr addr, std::size_t size) {
 | 
					    void FlushRegion(VAddr addr, std::size_t size) {
 | 
				
			||||||
        std::unique_lock lock{mutex};
 | 
					        std::unique_lock lock{mutex};
 | 
				
			||||||
        FlushAndRemoveRegion(addr, size);
 | 
					        FlushAndRemoveRegion(addr, size);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -117,14 +117,16 @@ public:
 | 
				
			|||||||
    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
 | 
					    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
 | 
				
			||||||
        std::unique_lock lock{mutex};
 | 
					        std::unique_lock lock{mutex};
 | 
				
			||||||
        auto& memory_manager = system.GPU().MemoryManager();
 | 
					        auto& memory_manager = system.GPU().MemoryManager();
 | 
				
			||||||
        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
 | 
					        const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
 | 
				
			||||||
 | 
					        ASSERT(cpu_addr_opt);
 | 
				
			||||||
 | 
					        VAddr cpu_addr = *cpu_addr_opt;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
 | 
					        CachedQuery* query = TryGet(cpu_addr);
 | 
				
			||||||
        if (!query) {
 | 
					        if (!query) {
 | 
				
			||||||
            const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
 | 
					            ASSERT_OR_EXECUTE(cpu_addr_opt, return;);
 | 
				
			||||||
            ASSERT_OR_EXECUTE(cpu_addr, return;);
 | 
					            const auto host_ptr = memory_manager.GetPointer(gpu_addr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
 | 
					            query = Register(type, cpu_addr, host_ptr, timestamp.has_value());
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        query->BindCounter(Stream(type).Current(), timestamp);
 | 
					        query->BindCounter(Stream(type).Current(), timestamp);
 | 
				
			||||||
@ -173,11 +175,11 @@ protected:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
private:
 | 
					private:
 | 
				
			||||||
    /// Flushes a memory range to guest memory and removes it from the cache.
 | 
					    /// Flushes a memory range to guest memory and removes it from the cache.
 | 
				
			||||||
    void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
 | 
					    void FlushAndRemoveRegion(VAddr addr, std::size_t size) {
 | 
				
			||||||
        const u64 addr_begin = static_cast<u64>(addr);
 | 
					        const u64 addr_begin = static_cast<u64>(addr);
 | 
				
			||||||
        const u64 addr_end = addr_begin + static_cast<u64>(size);
 | 
					        const u64 addr_end = addr_begin + static_cast<u64>(size);
 | 
				
			||||||
        const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
 | 
					        const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
 | 
				
			||||||
            const u64 cache_begin = query.GetCacheAddr();
 | 
					            const u64 cache_begin = query.GetCpuAddr();
 | 
				
			||||||
            const u64 cache_end = cache_begin + query.SizeInBytes();
 | 
					            const u64 cache_end = cache_begin + query.SizeInBytes();
 | 
				
			||||||
            return cache_begin < addr_end && addr_begin < cache_end;
 | 
					            return cache_begin < addr_end && addr_begin < cache_end;
 | 
				
			||||||
        };
 | 
					        };
 | 
				
			||||||
@ -193,7 +195,7 @@ private:
 | 
				
			|||||||
                if (!in_range(query)) {
 | 
					                if (!in_range(query)) {
 | 
				
			||||||
                    continue;
 | 
					                    continue;
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1);
 | 
					                rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1);
 | 
				
			||||||
                query.Flush();
 | 
					                query.Flush();
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
 | 
					            contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
 | 
				
			||||||
@ -204,22 +206,21 @@ private:
 | 
				
			|||||||
    /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
 | 
					    /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
 | 
				
			||||||
    CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
 | 
					    CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
 | 
				
			||||||
        rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
 | 
					        rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
 | 
				
			||||||
        const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT;
 | 
					        const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT;
 | 
				
			||||||
        return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
 | 
					        return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
 | 
				
			||||||
                                                  host_ptr);
 | 
					                                                  host_ptr);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Tries to a get a cached query. Returns nullptr on failure.
 | 
					    /// Tries to a get a cached query. Returns nullptr on failure.
 | 
				
			||||||
    CachedQuery* TryGet(CacheAddr addr) {
 | 
					    CachedQuery* TryGet(VAddr addr) {
 | 
				
			||||||
        const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
 | 
					        const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
 | 
				
			||||||
        const auto it = cached_queries.find(page);
 | 
					        const auto it = cached_queries.find(page);
 | 
				
			||||||
        if (it == std::end(cached_queries)) {
 | 
					        if (it == std::end(cached_queries)) {
 | 
				
			||||||
            return nullptr;
 | 
					            return nullptr;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        auto& contents = it->second;
 | 
					        auto& contents = it->second;
 | 
				
			||||||
        const auto found =
 | 
					        const auto found = std::find_if(std::begin(contents), std::end(contents),
 | 
				
			||||||
            std::find_if(std::begin(contents), std::end(contents),
 | 
					                                        [addr](auto& query) { return query.GetCpuAddr() == addr; });
 | 
				
			||||||
                         [addr](auto& query) { return query.GetCacheAddr() == addr; });
 | 
					 | 
				
			||||||
        return found != std::end(contents) ? &*found : nullptr;
 | 
					        return found != std::end(contents) ? &*found : nullptr;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -323,14 +324,10 @@ public:
 | 
				
			|||||||
        timestamp = timestamp_;
 | 
					        timestamp = timestamp_;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    VAddr CpuAddr() const noexcept {
 | 
					    VAddr GetCpuAddr() const noexcept {
 | 
				
			||||||
        return cpu_addr;
 | 
					        return cpu_addr;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    CacheAddr GetCacheAddr() const noexcept {
 | 
					 | 
				
			||||||
        return ToCacheAddr(host_ptr);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    u64 SizeInBytes() const noexcept {
 | 
					    u64 SizeInBytes() const noexcept {
 | 
				
			||||||
        return SizeInBytes(timestamp.has_value());
 | 
					        return SizeInBytes(timestamp.has_value());
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
				
			|||||||
@ -18,22 +18,14 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class RasterizerCacheObject {
 | 
					class RasterizerCacheObject {
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
    explicit RasterizerCacheObject(const u8* host_ptr)
 | 
					    explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
 | 
				
			||||||
        : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    virtual ~RasterizerCacheObject();
 | 
					    virtual ~RasterizerCacheObject();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    CacheAddr GetCacheAddr() const {
 | 
					    VAddr GetCpuAddr() const {
 | 
				
			||||||
        return cache_addr;
 | 
					        return cpu_addr;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const u8* GetHostPtr() const {
 | 
					 | 
				
			||||||
        return host_ptr;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /// Gets the address of the shader in guest memory, required for cache management
 | 
					 | 
				
			||||||
    virtual VAddr GetCpuAddr() const = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /// Gets the size of the shader in guest memory, required for cache management
 | 
					    /// Gets the size of the shader in guest memory, required for cache management
 | 
				
			||||||
    virtual std::size_t GetSizeInBytes() const = 0;
 | 
					    virtual std::size_t GetSizeInBytes() const = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -68,8 +60,7 @@ private:
 | 
				
			|||||||
    bool is_registered{};      ///< Whether the object is currently registered with the cache
 | 
					    bool is_registered{};      ///< Whether the object is currently registered with the cache
 | 
				
			||||||
    bool is_dirty{};           ///< Whether the object is dirty (out of sync with guest memory)
 | 
					    bool is_dirty{};           ///< Whether the object is dirty (out of sync with guest memory)
 | 
				
			||||||
    u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
 | 
					    u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
 | 
				
			||||||
    const u8* host_ptr{};      ///< Pointer to the memory backing this cached region
 | 
					    VAddr cpu_addr{};          ///< Cpu address memory, unique from emulated virtual address space
 | 
				
			||||||
    CacheAddr cache_addr{};    ///< Cache address memory, unique from emulated virtual address space
 | 
					 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template <class T>
 | 
					template <class T>
 | 
				
			||||||
@ -80,7 +71,7 @@ public:
 | 
				
			|||||||
    explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
 | 
					    explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Write any cached resources overlapping the specified region back to memory
 | 
					    /// Write any cached resources overlapping the specified region back to memory
 | 
				
			||||||
    void FlushRegion(CacheAddr addr, std::size_t size) {
 | 
					    void FlushRegion(VAddr addr, std::size_t size) {
 | 
				
			||||||
        std::lock_guard lock{mutex};
 | 
					        std::lock_guard lock{mutex};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        const auto& objects{GetSortedObjectsFromRegion(addr, size)};
 | 
					        const auto& objects{GetSortedObjectsFromRegion(addr, size)};
 | 
				
			||||||
@ -90,7 +81,7 @@ public:
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Mark the specified region as being invalidated
 | 
					    /// Mark the specified region as being invalidated
 | 
				
			||||||
    void InvalidateRegion(CacheAddr addr, u64 size) {
 | 
					    void InvalidateRegion(VAddr addr, u64 size) {
 | 
				
			||||||
        std::lock_guard lock{mutex};
 | 
					        std::lock_guard lock{mutex};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        const auto& objects{GetSortedObjectsFromRegion(addr, size)};
 | 
					        const auto& objects{GetSortedObjectsFromRegion(addr, size)};
 | 
				
			||||||
@ -114,27 +105,20 @@ public:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
protected:
 | 
					protected:
 | 
				
			||||||
    /// Tries to get an object from the cache with the specified cache address
 | 
					    /// Tries to get an object from the cache with the specified cache address
 | 
				
			||||||
    T TryGet(CacheAddr addr) const {
 | 
					    T TryGet(VAddr addr) const {
 | 
				
			||||||
        const auto iter = map_cache.find(addr);
 | 
					        const auto iter = map_cache.find(addr);
 | 
				
			||||||
        if (iter != map_cache.end())
 | 
					        if (iter != map_cache.end())
 | 
				
			||||||
            return iter->second;
 | 
					            return iter->second;
 | 
				
			||||||
        return nullptr;
 | 
					        return nullptr;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    T TryGet(const void* addr) const {
 | 
					 | 
				
			||||||
        const auto iter = map_cache.find(ToCacheAddr(addr));
 | 
					 | 
				
			||||||
        if (iter != map_cache.end())
 | 
					 | 
				
			||||||
            return iter->second;
 | 
					 | 
				
			||||||
        return nullptr;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /// Register an object into the cache
 | 
					    /// Register an object into the cache
 | 
				
			||||||
    virtual void Register(const T& object) {
 | 
					    virtual void Register(const T& object) {
 | 
				
			||||||
        std::lock_guard lock{mutex};
 | 
					        std::lock_guard lock{mutex};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        object->SetIsRegistered(true);
 | 
					        object->SetIsRegistered(true);
 | 
				
			||||||
        interval_cache.add({GetInterval(object), ObjectSet{object}});
 | 
					        interval_cache.add({GetInterval(object), ObjectSet{object}});
 | 
				
			||||||
        map_cache.insert({object->GetCacheAddr(), object});
 | 
					        map_cache.insert({object->GetCpuAddr(), object});
 | 
				
			||||||
        rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
 | 
					        rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -144,7 +128,7 @@ protected:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        object->SetIsRegistered(false);
 | 
					        object->SetIsRegistered(false);
 | 
				
			||||||
        rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
 | 
					        rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
 | 
				
			||||||
        const CacheAddr addr = object->GetCacheAddr();
 | 
					        const VAddr addr = object->GetCpuAddr();
 | 
				
			||||||
        interval_cache.subtract({GetInterval(object), ObjectSet{object}});
 | 
					        interval_cache.subtract({GetInterval(object), ObjectSet{object}});
 | 
				
			||||||
        map_cache.erase(addr);
 | 
					        map_cache.erase(addr);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -173,7 +157,7 @@ protected:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
private:
 | 
					private:
 | 
				
			||||||
    /// Returns a list of cached objects from the specified memory region, ordered by access time
 | 
					    /// Returns a list of cached objects from the specified memory region, ordered by access time
 | 
				
			||||||
    std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) {
 | 
					    std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
 | 
				
			||||||
        if (size == 0) {
 | 
					        if (size == 0) {
 | 
				
			||||||
            return {};
 | 
					            return {};
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
@ -197,13 +181,13 @@ private:
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    using ObjectSet = std::set<T>;
 | 
					    using ObjectSet = std::set<T>;
 | 
				
			||||||
    using ObjectCache = std::unordered_map<CacheAddr, T>;
 | 
					    using ObjectCache = std::unordered_map<VAddr, T>;
 | 
				
			||||||
    using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>;
 | 
					    using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
 | 
				
			||||||
    using ObjectInterval = typename IntervalCache::interval_type;
 | 
					    using ObjectInterval = typename IntervalCache::interval_type;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    static auto GetInterval(const T& object) {
 | 
					    static auto GetInterval(const T& object) {
 | 
				
			||||||
        return ObjectInterval::right_open(object->GetCacheAddr(),
 | 
					        return ObjectInterval::right_open(object->GetCpuAddr(),
 | 
				
			||||||
                                          object->GetCacheAddr() + object->GetSizeInBytes());
 | 
					                                          object->GetCpuAddr() + object->GetSizeInBytes());
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ObjectCache map_cache;
 | 
					    ObjectCache map_cache;
 | 
				
			||||||
 | 
				
			|||||||
@ -53,14 +53,14 @@ public:
 | 
				
			|||||||
    virtual void FlushAll() = 0;
 | 
					    virtual void FlushAll() = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | 
					    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | 
				
			||||||
    virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
 | 
					    virtual void FlushRegion(VAddr addr, u64 size) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Notify rasterizer that any caches of the specified region should be invalidated
 | 
					    /// Notify rasterizer that any caches of the specified region should be invalidated
 | 
				
			||||||
    virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
 | 
					    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | 
					    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | 
				
			||||||
    /// and invalidated
 | 
					    /// and invalidated
 | 
				
			||||||
    virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
 | 
					    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Notify the rasterizer to send all written commands to the host GPU.
 | 
					    /// Notify the rasterizer to send all written commands to the host GPU.
 | 
				
			||||||
    virtual void FlushCommands() = 0;
 | 
					    virtual void FlushCommands() = 0;
 | 
				
			||||||
 | 
				
			|||||||
@ -21,8 +21,8 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
 | 
					MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size)
 | 
					CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size)
 | 
				
			||||||
    : VideoCommon::BufferBlock{cache_addr, size} {
 | 
					    : VideoCommon::BufferBlock{cpu_addr, size} {
 | 
				
			||||||
    gl_buffer.Create();
 | 
					    gl_buffer.Create();
 | 
				
			||||||
    glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
 | 
					    glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -47,8 +47,8 @@ OGLBufferCache::~OGLBufferCache() {
 | 
				
			|||||||
    glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
 | 
					    glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
 | 
					Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
 | 
				
			||||||
    return std::make_shared<CachedBufferBlock>(cache_addr, size);
 | 
					    return std::make_shared<CachedBufferBlock>(cpu_addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void OGLBufferCache::WriteBarrier() {
 | 
					void OGLBufferCache::WriteBarrier() {
 | 
				
			||||||
 | 
				
			|||||||
@ -31,7 +31,7 @@ using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuf
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class CachedBufferBlock : public VideoCommon::BufferBlock {
 | 
					class CachedBufferBlock : public VideoCommon::BufferBlock {
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
    explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size);
 | 
					    explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size);
 | 
				
			||||||
    ~CachedBufferBlock();
 | 
					    ~CachedBufferBlock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const GLuint* GetHandle() const {
 | 
					    const GLuint* GetHandle() const {
 | 
				
			||||||
@ -55,7 +55,7 @@ public:
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
protected:
 | 
					protected:
 | 
				
			||||||
    Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
 | 
					    Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void WriteBarrier() override;
 | 
					    void WriteBarrier() override;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -656,9 +656,9 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
void RasterizerOpenGL::FlushAll() {}
 | 
					void RasterizerOpenGL::FlushAll() {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
 | 
					void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
 | 
				
			||||||
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
 | 
					    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
 | 
				
			||||||
    if (!addr || !size) {
 | 
					    if (addr == 0 || size == 0) {
 | 
				
			||||||
        return;
 | 
					        return;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    texture_cache.FlushRegion(addr, size);
 | 
					    texture_cache.FlushRegion(addr, size);
 | 
				
			||||||
@ -666,9 +666,9 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
 | 
				
			|||||||
    query_cache.FlushRegion(addr, size);
 | 
					    query_cache.FlushRegion(addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
 | 
					void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
 | 
				
			||||||
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
 | 
					    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
 | 
				
			||||||
    if (!addr || !size) {
 | 
					    if (addr == 0 || size == 0) {
 | 
				
			||||||
        return;
 | 
					        return;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    texture_cache.InvalidateRegion(addr, size);
 | 
					    texture_cache.InvalidateRegion(addr, size);
 | 
				
			||||||
@ -677,7 +677,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
 | 
				
			|||||||
    query_cache.InvalidateRegion(addr, size);
 | 
					    query_cache.InvalidateRegion(addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
 | 
					void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
 | 
				
			||||||
    if (Settings::values.use_accurate_gpu_emulation) {
 | 
					    if (Settings::values.use_accurate_gpu_emulation) {
 | 
				
			||||||
        FlushRegion(addr, size);
 | 
					        FlushRegion(addr, size);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -716,8 +716,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
 | 
					    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const auto surface{
 | 
					    const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
 | 
				
			||||||
        texture_cache.TryFindFramebufferSurface(system.Memory().GetPointer(framebuffer_addr))};
 | 
					 | 
				
			||||||
    if (!surface) {
 | 
					    if (!surface) {
 | 
				
			||||||
        return {};
 | 
					        return {};
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
				
			|||||||
@ -65,9 +65,9 @@ public:
 | 
				
			|||||||
    void ResetCounter(VideoCore::QueryType type) override;
 | 
					    void ResetCounter(VideoCore::QueryType type) override;
 | 
				
			||||||
    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
 | 
					    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
 | 
				
			||||||
    void FlushAll() override;
 | 
					    void FlushAll() override;
 | 
				
			||||||
    void FlushRegion(CacheAddr addr, u64 size) override;
 | 
					    void FlushRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
    void InvalidateRegion(CacheAddr addr, u64 size) override;
 | 
					    void InvalidateRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
 | 
					    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
    void FlushCommands() override;
 | 
					    void FlushCommands() override;
 | 
				
			||||||
    void TickFrame() override;
 | 
					    void TickFrame() override;
 | 
				
			||||||
    bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
 | 
					    bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
 | 
				
			||||||
 | 
				
			|||||||
@ -214,11 +214,11 @@ std::unordered_set<GLenum> GetSupportedFormats() {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
} // Anonymous namespace
 | 
					} // Anonymous namespace
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
 | 
					CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
 | 
				
			||||||
                           std::shared_ptr<VideoCommon::Shader::Registry> registry,
 | 
					                           std::shared_ptr<VideoCommon::Shader::Registry> registry,
 | 
				
			||||||
                           ShaderEntries entries, std::shared_ptr<OGLProgram> program)
 | 
					                           ShaderEntries entries, std::shared_ptr<OGLProgram> program)
 | 
				
			||||||
    : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)},
 | 
					    : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
 | 
				
			||||||
      cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {}
 | 
					      size_in_bytes{size_in_bytes}, program{std::move(program)} {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CachedShader::~CachedShader() = default;
 | 
					CachedShader::~CachedShader() = default;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -254,9 +254,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
 | 
				
			|||||||
    entry.bindless_samplers = registry->GetBindlessSamplers();
 | 
					    entry.bindless_samplers = registry->GetBindlessSamplers();
 | 
				
			||||||
    params.disk_cache.SaveEntry(std::move(entry));
 | 
					    params.disk_cache.SaveEntry(std::move(entry));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
 | 
					    return std::shared_ptr<CachedShader>(new CachedShader(
 | 
				
			||||||
                                                          size_in_bytes, std::move(registry),
 | 
					        params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
 | 
				
			||||||
                                                          MakeEntries(ir), std::move(program)));
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
 | 
					Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
 | 
				
			||||||
@ -279,17 +278,16 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
 | 
				
			|||||||
    entry.bindless_samplers = registry->GetBindlessSamplers();
 | 
					    entry.bindless_samplers = registry->GetBindlessSamplers();
 | 
				
			||||||
    params.disk_cache.SaveEntry(std::move(entry));
 | 
					    params.disk_cache.SaveEntry(std::move(entry));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
 | 
					    return std::shared_ptr<CachedShader>(new CachedShader(
 | 
				
			||||||
                                                          size_in_bytes, std::move(registry),
 | 
					        params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
 | 
				
			||||||
                                                          MakeEntries(ir), std::move(program)));
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Shader CachedShader::CreateFromCache(const ShaderParameters& params,
 | 
					Shader CachedShader::CreateFromCache(const ShaderParameters& params,
 | 
				
			||||||
                                     const PrecompiledShader& precompiled_shader,
 | 
					                                     const PrecompiledShader& precompiled_shader,
 | 
				
			||||||
                                     std::size_t size_in_bytes) {
 | 
					                                     std::size_t size_in_bytes) {
 | 
				
			||||||
    return std::shared_ptr<CachedShader>(new CachedShader(
 | 
					    return std::shared_ptr<CachedShader>(
 | 
				
			||||||
        params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry,
 | 
					        new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
 | 
				
			||||||
        precompiled_shader.entries, precompiled_shader.program));
 | 
					                         precompiled_shader.entries, precompiled_shader.program));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
 | 
					ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
 | 
				
			||||||
@ -449,12 +447,14 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
 | 
				
			|||||||
    const GPUVAddr address{GetShaderAddress(system, program)};
 | 
					    const GPUVAddr address{GetShaderAddress(system, program)};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Look up shader in the cache based on address
 | 
					    // Look up shader in the cache based on address
 | 
				
			||||||
    const auto host_ptr{memory_manager.GetPointer(address)};
 | 
					    const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
 | 
				
			||||||
    Shader shader{TryGet(host_ptr)};
 | 
					    Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr};
 | 
				
			||||||
    if (shader) {
 | 
					    if (shader) {
 | 
				
			||||||
        return last_shaders[static_cast<std::size_t>(program)] = shader;
 | 
					        return last_shaders[static_cast<std::size_t>(program)] = shader;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const auto host_ptr{memory_manager.GetPointer(address)};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // No shader found - create a new one
 | 
					    // No shader found - create a new one
 | 
				
			||||||
    ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
 | 
					    ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
 | 
				
			||||||
    ProgramCode code_b;
 | 
					    ProgramCode code_b;
 | 
				
			||||||
@ -465,9 +465,9 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    const auto unique_identifier = GetUniqueIdentifier(
 | 
					    const auto unique_identifier = GetUniqueIdentifier(
 | 
				
			||||||
        GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
 | 
					        GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
 | 
				
			||||||
    const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
 | 
					
 | 
				
			||||||
    const ShaderParameters params{system,   disk_cache, device,
 | 
					    const ShaderParameters params{system,    disk_cache, device,
 | 
				
			||||||
                                  cpu_addr, host_ptr,   unique_identifier};
 | 
					                                  *cpu_addr, host_ptr,   unique_identifier};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const auto found = runtime_cache.find(unique_identifier);
 | 
					    const auto found = runtime_cache.find(unique_identifier);
 | 
				
			||||||
    if (found == runtime_cache.end()) {
 | 
					    if (found == runtime_cache.end()) {
 | 
				
			||||||
@ -484,18 +484,20 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
 | 
					Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
 | 
				
			||||||
    auto& memory_manager{system.GPU().MemoryManager()};
 | 
					    auto& memory_manager{system.GPU().MemoryManager()};
 | 
				
			||||||
    const auto host_ptr{memory_manager.GetPointer(code_addr)};
 | 
					    const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
 | 
				
			||||||
    auto kernel = TryGet(host_ptr);
 | 
					
 | 
				
			||||||
 | 
					    auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr;
 | 
				
			||||||
    if (kernel) {
 | 
					    if (kernel) {
 | 
				
			||||||
        return kernel;
 | 
					        return kernel;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const auto host_ptr{memory_manager.GetPointer(code_addr)};
 | 
				
			||||||
    // No kernel found, create a new one
 | 
					    // No kernel found, create a new one
 | 
				
			||||||
    auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
 | 
					    auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
 | 
				
			||||||
    const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
 | 
					    const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
 | 
				
			||||||
    const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
 | 
					
 | 
				
			||||||
    const ShaderParameters params{system,   disk_cache, device,
 | 
					    const ShaderParameters params{system,    disk_cache, device,
 | 
				
			||||||
                                  cpu_addr, host_ptr,   unique_identifier};
 | 
					                                  *cpu_addr, host_ptr,   unique_identifier};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const auto found = runtime_cache.find(unique_identifier);
 | 
					    const auto found = runtime_cache.find(unique_identifier);
 | 
				
			||||||
    if (found == runtime_cache.end()) {
 | 
					    if (found == runtime_cache.end()) {
 | 
				
			||||||
 | 
				
			|||||||
@ -65,11 +65,6 @@ public:
 | 
				
			|||||||
    /// Gets the GL program handle for the shader
 | 
					    /// Gets the GL program handle for the shader
 | 
				
			||||||
    GLuint GetHandle() const;
 | 
					    GLuint GetHandle() const;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Returns the guest CPU address of the shader
 | 
					 | 
				
			||||||
    VAddr GetCpuAddr() const override {
 | 
					 | 
				
			||||||
        return cpu_addr;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /// Returns the size in bytes of the shader
 | 
					    /// Returns the size in bytes of the shader
 | 
				
			||||||
    std::size_t GetSizeInBytes() const override {
 | 
					    std::size_t GetSizeInBytes() const override {
 | 
				
			||||||
        return size_in_bytes;
 | 
					        return size_in_bytes;
 | 
				
			||||||
@ -90,13 +85,12 @@ public:
 | 
				
			|||||||
                                  std::size_t size_in_bytes);
 | 
					                                  std::size_t size_in_bytes);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
private:
 | 
					private:
 | 
				
			||||||
    explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
 | 
					    explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
 | 
				
			||||||
                          std::shared_ptr<VideoCommon::Shader::Registry> registry,
 | 
					                          std::shared_ptr<VideoCommon::Shader::Registry> registry,
 | 
				
			||||||
                          ShaderEntries entries, std::shared_ptr<OGLProgram> program);
 | 
					                          ShaderEntries entries, std::shared_ptr<OGLProgram> program);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::shared_ptr<VideoCommon::Shader::Registry> registry;
 | 
					    std::shared_ptr<VideoCommon::Shader::Registry> registry;
 | 
				
			||||||
    ShaderEntries entries;
 | 
					    ShaderEntries entries;
 | 
				
			||||||
    VAddr cpu_addr = 0;
 | 
					 | 
				
			||||||
    std::size_t size_in_bytes = 0;
 | 
					    std::size_t size_in_bytes = 0;
 | 
				
			||||||
    std::shared_ptr<OGLProgram> program;
 | 
					    std::shared_ptr<OGLProgram> program;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
				
			|||||||
@ -42,8 +42,8 @@ auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
 | 
				
			|||||||
} // Anonymous namespace
 | 
					} // Anonymous namespace
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
 | 
					CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
 | 
				
			||||||
                                     CacheAddr cache_addr, std::size_t size)
 | 
					                                     VAddr cpu_addr, std::size_t size)
 | 
				
			||||||
    : VideoCommon::BufferBlock{cache_addr, size} {
 | 
					    : VideoCommon::BufferBlock{cpu_addr, size} {
 | 
				
			||||||
    const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
 | 
					    const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
 | 
				
			||||||
                                         BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
 | 
					                                         BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
 | 
				
			||||||
                                             vk::BufferUsageFlagBits::eTransferDst,
 | 
					                                             vk::BufferUsageFlagBits::eTransferDst,
 | 
				
			||||||
@ -68,8 +68,8 @@ VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::S
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
VKBufferCache::~VKBufferCache() = default;
 | 
					VKBufferCache::~VKBufferCache() = default;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
 | 
					Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
 | 
				
			||||||
    return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size);
 | 
					    return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {
 | 
					const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {
 | 
				
			||||||
 | 
				
			|||||||
@ -30,7 +30,7 @@ class VKScheduler;
 | 
				
			|||||||
class CachedBufferBlock final : public VideoCommon::BufferBlock {
 | 
					class CachedBufferBlock final : public VideoCommon::BufferBlock {
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
    explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
 | 
					    explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
 | 
				
			||||||
                               CacheAddr cache_addr, std::size_t size);
 | 
					                               VAddr cpu_addr, std::size_t size);
 | 
				
			||||||
    ~CachedBufferBlock();
 | 
					    ~CachedBufferBlock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const vk::Buffer* GetHandle() const {
 | 
					    const vk::Buffer* GetHandle() const {
 | 
				
			||||||
@ -55,7 +55,7 @@ public:
 | 
				
			|||||||
protected:
 | 
					protected:
 | 
				
			||||||
    void WriteBarrier() override {}
 | 
					    void WriteBarrier() override {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
 | 
					    Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const vk::Buffer* ToHandle(const Buffer& buffer) override;
 | 
					    const vk::Buffer* ToHandle(const Buffer& buffer) override;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -158,11 +158,11 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
 | 
				
			|||||||
} // Anonymous namespace
 | 
					} // Anonymous namespace
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
 | 
					CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
 | 
				
			||||||
                           GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
 | 
					                           GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
 | 
				
			||||||
                           ProgramCode program_code, u32 main_offset)
 | 
					                           u32 main_offset)
 | 
				
			||||||
    : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
 | 
					    : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
 | 
				
			||||||
      program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)},
 | 
					      registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
 | 
				
			||||||
      shader_ir{this->program_code, main_offset, compiler_settings, registry},
 | 
					                                                           compiler_settings, registry},
 | 
				
			||||||
      entries{GenerateShaderEntries(shader_ir)} {}
 | 
					      entries{GenerateShaderEntries(shader_ir)} {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CachedShader::~CachedShader() = default;
 | 
					CachedShader::~CachedShader() = default;
 | 
				
			||||||
@ -201,19 +201,19 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        auto& memory_manager{system.GPU().MemoryManager()};
 | 
					        auto& memory_manager{system.GPU().MemoryManager()};
 | 
				
			||||||
        const GPUVAddr program_addr{GetShaderAddress(system, program)};
 | 
					        const GPUVAddr program_addr{GetShaderAddress(system, program)};
 | 
				
			||||||
        const auto host_ptr{memory_manager.GetPointer(program_addr)};
 | 
					        const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
 | 
				
			||||||
        auto shader = TryGet(host_ptr);
 | 
					        ASSERT(cpu_addr);
 | 
				
			||||||
 | 
					        auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
 | 
				
			||||||
        if (!shader) {
 | 
					        if (!shader) {
 | 
				
			||||||
 | 
					            const auto host_ptr{memory_manager.GetPointer(program_addr)};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            // No shader found - create a new one
 | 
					            // No shader found - create a new one
 | 
				
			||||||
            constexpr u32 stage_offset = 10;
 | 
					            constexpr u32 stage_offset = 10;
 | 
				
			||||||
            const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
 | 
					            const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
 | 
				
			||||||
            auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
 | 
					            auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
 | 
					 | 
				
			||||||
            ASSERT(cpu_addr);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
 | 
					            shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
 | 
				
			||||||
                                                    host_ptr, std::move(code), stage_offset);
 | 
					                                                    std::move(code), stage_offset);
 | 
				
			||||||
            Register(shader);
 | 
					            Register(shader);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        shaders[index] = std::move(shader);
 | 
					        shaders[index] = std::move(shader);
 | 
				
			||||||
@ -253,18 +253,19 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    auto& memory_manager = system.GPU().MemoryManager();
 | 
					    auto& memory_manager = system.GPU().MemoryManager();
 | 
				
			||||||
    const auto program_addr = key.shader;
 | 
					    const auto program_addr = key.shader;
 | 
				
			||||||
    const auto host_ptr = memory_manager.GetPointer(program_addr);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    auto shader = TryGet(host_ptr);
 | 
					    const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
 | 
				
			||||||
 | 
					    ASSERT(cpu_addr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
 | 
				
			||||||
    if (!shader) {
 | 
					    if (!shader) {
 | 
				
			||||||
        // No shader found - create a new one
 | 
					        // No shader found - create a new one
 | 
				
			||||||
        const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
 | 
					        const auto host_ptr = memory_manager.GetPointer(program_addr);
 | 
				
			||||||
        ASSERT(cpu_addr);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
 | 
					        auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
 | 
				
			||||||
        constexpr u32 kernel_main_offset = 0;
 | 
					        constexpr u32 kernel_main_offset = 0;
 | 
				
			||||||
        shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
 | 
					        shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
 | 
				
			||||||
                                                program_addr, *cpu_addr, host_ptr, std::move(code),
 | 
					                                                program_addr, *cpu_addr, std::move(code),
 | 
				
			||||||
                                                kernel_main_offset);
 | 
					                                                kernel_main_offset);
 | 
				
			||||||
        Register(shader);
 | 
					        Register(shader);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -345,8 +346,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
 | 
					        const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
 | 
				
			||||||
        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
 | 
					        const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
 | 
				
			||||||
        const auto shader = TryGet(host_ptr);
 | 
					        ASSERT(cpu_addr);
 | 
				
			||||||
 | 
					        const auto shader = TryGet(*cpu_addr);
 | 
				
			||||||
        ASSERT(shader);
 | 
					        ASSERT(shader);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
 | 
					        const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
 | 
				
			||||||
 | 
				
			|||||||
@ -113,17 +113,13 @@ namespace Vulkan {
 | 
				
			|||||||
class CachedShader final : public RasterizerCacheObject {
 | 
					class CachedShader final : public RasterizerCacheObject {
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
    explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
 | 
					    explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
 | 
				
			||||||
                          VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset);
 | 
					                          VAddr cpu_addr, ProgramCode program_code, u32 main_offset);
 | 
				
			||||||
    ~CachedShader();
 | 
					    ~CachedShader();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    GPUVAddr GetGpuAddr() const {
 | 
					    GPUVAddr GetGpuAddr() const {
 | 
				
			||||||
        return gpu_addr;
 | 
					        return gpu_addr;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    VAddr GetCpuAddr() const override {
 | 
					 | 
				
			||||||
        return cpu_addr;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    std::size_t GetSizeInBytes() const override {
 | 
					    std::size_t GetSizeInBytes() const override {
 | 
				
			||||||
        return program_code.size() * sizeof(u64);
 | 
					        return program_code.size() * sizeof(u64);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -149,7 +145,6 @@ private:
 | 
				
			|||||||
                                                                 Tegra::Engines::ShaderType stage);
 | 
					                                                                 Tegra::Engines::ShaderType stage);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    GPUVAddr gpu_addr{};
 | 
					    GPUVAddr gpu_addr{};
 | 
				
			||||||
    VAddr cpu_addr{};
 | 
					 | 
				
			||||||
    ProgramCode program_code;
 | 
					    ProgramCode program_code;
 | 
				
			||||||
    VideoCommon::Shader::Registry registry;
 | 
					    VideoCommon::Shader::Registry registry;
 | 
				
			||||||
    VideoCommon::Shader::ShaderIR shader_ir;
 | 
					    VideoCommon::Shader::ShaderIR shader_ir;
 | 
				
			||||||
 | 
				
			|||||||
@ -495,20 +495,26 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
void RasterizerVulkan::FlushAll() {}
 | 
					void RasterizerVulkan::FlushAll() {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
 | 
					void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
 | 
				
			||||||
 | 
					    if (addr == 0 || size == 0) {
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
    texture_cache.FlushRegion(addr, size);
 | 
					    texture_cache.FlushRegion(addr, size);
 | 
				
			||||||
    buffer_cache.FlushRegion(addr, size);
 | 
					    buffer_cache.FlushRegion(addr, size);
 | 
				
			||||||
    query_cache.FlushRegion(addr, size);
 | 
					    query_cache.FlushRegion(addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
 | 
					void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
 | 
				
			||||||
 | 
					    if (addr == 0 || size == 0) {
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
    texture_cache.InvalidateRegion(addr, size);
 | 
					    texture_cache.InvalidateRegion(addr, size);
 | 
				
			||||||
    pipeline_cache.InvalidateRegion(addr, size);
 | 
					    pipeline_cache.InvalidateRegion(addr, size);
 | 
				
			||||||
    buffer_cache.InvalidateRegion(addr, size);
 | 
					    buffer_cache.InvalidateRegion(addr, size);
 | 
				
			||||||
    query_cache.InvalidateRegion(addr, size);
 | 
					    query_cache.InvalidateRegion(addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
 | 
					void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
 | 
				
			||||||
    FlushRegion(addr, size);
 | 
					    FlushRegion(addr, size);
 | 
				
			||||||
    InvalidateRegion(addr, size);
 | 
					    InvalidateRegion(addr, size);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -540,8 +546,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
 | 
				
			|||||||
        return false;
 | 
					        return false;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)};
 | 
					    const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
 | 
				
			||||||
    const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)};
 | 
					 | 
				
			||||||
    if (!surface) {
 | 
					    if (!surface) {
 | 
				
			||||||
        return false;
 | 
					        return false;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
				
			|||||||
@ -118,9 +118,9 @@ public:
 | 
				
			|||||||
    void ResetCounter(VideoCore::QueryType type) override;
 | 
					    void ResetCounter(VideoCore::QueryType type) override;
 | 
				
			||||||
    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
 | 
					    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
 | 
				
			||||||
    void FlushAll() override;
 | 
					    void FlushAll() override;
 | 
				
			||||||
    void FlushRegion(CacheAddr addr, u64 size) override;
 | 
					    void FlushRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
    void InvalidateRegion(CacheAddr addr, u64 size) override;
 | 
					    void InvalidateRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
 | 
					    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
 | 
				
			||||||
    void FlushCommands() override;
 | 
					    void FlushCommands() override;
 | 
				
			||||||
    void TickFrame() override;
 | 
					    void TickFrame() override;
 | 
				
			||||||
    bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
 | 
					    bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
 | 
				
			||||||
 | 
				
			|||||||
@ -190,22 +190,11 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
 | 
				
			|||||||
    MICROPROFILE_SCOPE(GPU_Load_Texture);
 | 
					    MICROPROFILE_SCOPE(GPU_Load_Texture);
 | 
				
			||||||
    auto& staging_buffer = staging_cache.GetBuffer(0);
 | 
					    auto& staging_buffer = staging_cache.GetBuffer(0);
 | 
				
			||||||
    u8* host_ptr;
 | 
					    u8* host_ptr;
 | 
				
			||||||
    is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size);
 | 
					    // Use an extra temporal buffer
 | 
				
			||||||
 | 
					    auto& tmp_buffer = staging_cache.GetBuffer(1);
 | 
				
			||||||
    // Handle continuouty
 | 
					    tmp_buffer.resize(guest_memory_size);
 | 
				
			||||||
    if (is_continuous) {
 | 
					    host_ptr = tmp_buffer.data();
 | 
				
			||||||
        // Use physical memory directly
 | 
					    memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
 | 
				
			||||||
        host_ptr = memory_manager.GetPointer(gpu_addr);
 | 
					 | 
				
			||||||
        if (!host_ptr) {
 | 
					 | 
				
			||||||
            return;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
        // Use an extra temporal buffer
 | 
					 | 
				
			||||||
        auto& tmp_buffer = staging_cache.GetBuffer(1);
 | 
					 | 
				
			||||||
        tmp_buffer.resize(guest_memory_size);
 | 
					 | 
				
			||||||
        host_ptr = tmp_buffer.data();
 | 
					 | 
				
			||||||
        memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (params.is_tiled) {
 | 
					    if (params.is_tiled) {
 | 
				
			||||||
        ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
 | 
					        ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
 | 
				
			||||||
@ -257,19 +246,10 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
 | 
				
			|||||||
    auto& staging_buffer = staging_cache.GetBuffer(0);
 | 
					    auto& staging_buffer = staging_cache.GetBuffer(0);
 | 
				
			||||||
    u8* host_ptr;
 | 
					    u8* host_ptr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Handle continuouty
 | 
					    // Use an extra temporal buffer
 | 
				
			||||||
    if (is_continuous) {
 | 
					    auto& tmp_buffer = staging_cache.GetBuffer(1);
 | 
				
			||||||
        // Use physical memory directly
 | 
					    tmp_buffer.resize(guest_memory_size);
 | 
				
			||||||
        host_ptr = memory_manager.GetPointer(gpu_addr);
 | 
					    host_ptr = tmp_buffer.data();
 | 
				
			||||||
        if (!host_ptr) {
 | 
					 | 
				
			||||||
            return;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
        // Use an extra temporal buffer
 | 
					 | 
				
			||||||
        auto& tmp_buffer = staging_cache.GetBuffer(1);
 | 
					 | 
				
			||||||
        tmp_buffer.resize(guest_memory_size);
 | 
					 | 
				
			||||||
        host_ptr = tmp_buffer.data();
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (params.is_tiled) {
 | 
					    if (params.is_tiled) {
 | 
				
			||||||
        ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
 | 
					        ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
 | 
				
			||||||
@ -300,9 +280,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
 | 
				
			|||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    if (!is_continuous) {
 | 
					    memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
 | 
				
			||||||
        memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
} // namespace VideoCommon
 | 
					} // namespace VideoCommon
 | 
				
			||||||
 | 
				
			|||||||
@ -68,8 +68,8 @@ public:
 | 
				
			|||||||
        return gpu_addr;
 | 
					        return gpu_addr;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    bool Overlaps(const CacheAddr start, const CacheAddr end) const {
 | 
					    bool Overlaps(const VAddr start, const VAddr end) const {
 | 
				
			||||||
        return (cache_addr < end) && (cache_addr_end > start);
 | 
					        return (cpu_addr < end) && (cpu_addr_end > start);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) {
 | 
					    bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) {
 | 
				
			||||||
@ -86,21 +86,13 @@ public:
 | 
				
			|||||||
        return cpu_addr;
 | 
					        return cpu_addr;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    VAddr GetCpuAddrEnd() const {
 | 
				
			||||||
 | 
					        return cpu_addr_end;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void SetCpuAddr(const VAddr new_addr) {
 | 
					    void SetCpuAddr(const VAddr new_addr) {
 | 
				
			||||||
        cpu_addr = new_addr;
 | 
					        cpu_addr = new_addr;
 | 
				
			||||||
    }
 | 
					        cpu_addr_end = new_addr + guest_memory_size;
 | 
				
			||||||
 | 
					 | 
				
			||||||
    CacheAddr GetCacheAddr() const {
 | 
					 | 
				
			||||||
        return cache_addr;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    CacheAddr GetCacheAddrEnd() const {
 | 
					 | 
				
			||||||
        return cache_addr_end;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    void SetCacheAddr(const CacheAddr new_addr) {
 | 
					 | 
				
			||||||
        cache_addr = new_addr;
 | 
					 | 
				
			||||||
        cache_addr_end = new_addr + guest_memory_size;
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const SurfaceParams& GetSurfaceParams() const {
 | 
					    const SurfaceParams& GetSurfaceParams() const {
 | 
				
			||||||
@ -119,14 +111,6 @@ public:
 | 
				
			|||||||
        return mipmap_sizes[level];
 | 
					        return mipmap_sizes[level];
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void MarkAsContinuous(const bool is_continuous) {
 | 
					 | 
				
			||||||
        this->is_continuous = is_continuous;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    bool IsContinuous() const {
 | 
					 | 
				
			||||||
        return is_continuous;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    bool IsLinear() const {
 | 
					    bool IsLinear() const {
 | 
				
			||||||
        return !params.is_tiled;
 | 
					        return !params.is_tiled;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -175,10 +159,8 @@ protected:
 | 
				
			|||||||
    std::size_t guest_memory_size;
 | 
					    std::size_t guest_memory_size;
 | 
				
			||||||
    std::size_t host_memory_size;
 | 
					    std::size_t host_memory_size;
 | 
				
			||||||
    GPUVAddr gpu_addr{};
 | 
					    GPUVAddr gpu_addr{};
 | 
				
			||||||
    CacheAddr cache_addr{};
 | 
					 | 
				
			||||||
    CacheAddr cache_addr_end{};
 | 
					 | 
				
			||||||
    VAddr cpu_addr{};
 | 
					    VAddr cpu_addr{};
 | 
				
			||||||
    bool is_continuous{};
 | 
					    VAddr cpu_addr_end{};
 | 
				
			||||||
    bool is_converted{};
 | 
					    bool is_converted{};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::vector<std::size_t> mipmap_sizes;
 | 
					    std::vector<std::size_t> mipmap_sizes;
 | 
				
			||||||
 | 
				
			|||||||
@ -52,11 +52,9 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template <typename TSurface, typename TView>
 | 
					template <typename TSurface, typename TView>
 | 
				
			||||||
class TextureCache {
 | 
					class TextureCache {
 | 
				
			||||||
    using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>;
 | 
					 | 
				
			||||||
    using IntervalType = typename IntervalMap::interval_type;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
    void InvalidateRegion(CacheAddr addr, std::size_t size) {
 | 
					    void InvalidateRegion(VAddr addr, std::size_t size) {
 | 
				
			||||||
        std::lock_guard lock{mutex};
 | 
					        std::lock_guard lock{mutex};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for (const auto& surface : GetSurfacesInRegion(addr, size)) {
 | 
					        for (const auto& surface : GetSurfacesInRegion(addr, size)) {
 | 
				
			||||||
@ -76,7 +74,7 @@ public:
 | 
				
			|||||||
        guard_samplers = new_guard;
 | 
					        guard_samplers = new_guard;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void FlushRegion(CacheAddr addr, std::size_t size) {
 | 
					    void FlushRegion(VAddr addr, std::size_t size) {
 | 
				
			||||||
        std::lock_guard lock{mutex};
 | 
					        std::lock_guard lock{mutex};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        auto surfaces = GetSurfacesInRegion(addr, size);
 | 
					        auto surfaces = GetSurfacesInRegion(addr, size);
 | 
				
			||||||
@ -99,9 +97,9 @@ public:
 | 
				
			|||||||
            return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
 | 
					            return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
 | 
					        const std::optional<VAddr> cpu_addr =
 | 
				
			||||||
        const auto cache_addr{ToCacheAddr(host_ptr)};
 | 
					            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
 | 
				
			||||||
        if (!cache_addr) {
 | 
					        if (!cpu_addr) {
 | 
				
			||||||
            return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
 | 
					            return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -110,7 +108,7 @@ public:
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
 | 
					        const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
 | 
				
			||||||
        const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
 | 
					        const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
 | 
				
			||||||
        if (guard_samplers) {
 | 
					        if (guard_samplers) {
 | 
				
			||||||
            sampled_textures.push_back(surface);
 | 
					            sampled_textures.push_back(surface);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
@ -124,13 +122,13 @@ public:
 | 
				
			|||||||
        if (!gpu_addr) {
 | 
					        if (!gpu_addr) {
 | 
				
			||||||
            return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
 | 
					            return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
 | 
					        const std::optional<VAddr> cpu_addr =
 | 
				
			||||||
        const auto cache_addr{ToCacheAddr(host_ptr)};
 | 
					            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
 | 
				
			||||||
        if (!cache_addr) {
 | 
					        if (!cpu_addr) {
 | 
				
			||||||
            return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
 | 
					            return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
 | 
					        const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
 | 
				
			||||||
        const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
 | 
					        const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
 | 
				
			||||||
        if (guard_samplers) {
 | 
					        if (guard_samplers) {
 | 
				
			||||||
            sampled_textures.push_back(surface);
 | 
					            sampled_textures.push_back(surface);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
@ -159,14 +157,14 @@ public:
 | 
				
			|||||||
            SetEmptyDepthBuffer();
 | 
					            SetEmptyDepthBuffer();
 | 
				
			||||||
            return {};
 | 
					            return {};
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
 | 
					        const std::optional<VAddr> cpu_addr =
 | 
				
			||||||
        const auto cache_addr{ToCacheAddr(host_ptr)};
 | 
					            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
 | 
				
			||||||
        if (!cache_addr) {
 | 
					        if (!cpu_addr) {
 | 
				
			||||||
            SetEmptyDepthBuffer();
 | 
					            SetEmptyDepthBuffer();
 | 
				
			||||||
            return {};
 | 
					            return {};
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
 | 
					        const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
 | 
				
			||||||
        auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true);
 | 
					        auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
 | 
				
			||||||
        if (depth_buffer.target)
 | 
					        if (depth_buffer.target)
 | 
				
			||||||
            depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
 | 
					            depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
 | 
				
			||||||
        depth_buffer.target = surface_view.first;
 | 
					        depth_buffer.target = surface_view.first;
 | 
				
			||||||
@ -199,15 +197,15 @@ public:
 | 
				
			|||||||
            return {};
 | 
					            return {};
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
 | 
					        const std::optional<VAddr> cpu_addr =
 | 
				
			||||||
        const auto cache_addr{ToCacheAddr(host_ptr)};
 | 
					            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
 | 
				
			||||||
        if (!cache_addr) {
 | 
					        if (!cpu_addr) {
 | 
				
			||||||
            SetEmptyColorBuffer(index);
 | 
					            SetEmptyColorBuffer(index);
 | 
				
			||||||
            return {};
 | 
					            return {};
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        auto surface_view =
 | 
					        auto surface_view =
 | 
				
			||||||
            GetSurface(gpu_addr, cache_addr, SurfaceParams::CreateForFramebuffer(system, index),
 | 
					            GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
 | 
				
			||||||
                       preserve_contents, true);
 | 
					                       preserve_contents, true);
 | 
				
			||||||
        if (render_targets[index].target)
 | 
					        if (render_targets[index].target)
 | 
				
			||||||
            render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
 | 
					            render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
 | 
				
			||||||
@ -257,27 +255,26 @@ public:
 | 
				
			|||||||
        const GPUVAddr src_gpu_addr = src_config.Address();
 | 
					        const GPUVAddr src_gpu_addr = src_config.Address();
 | 
				
			||||||
        const GPUVAddr dst_gpu_addr = dst_config.Address();
 | 
					        const GPUVAddr dst_gpu_addr = dst_config.Address();
 | 
				
			||||||
        DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
 | 
					        DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
 | 
				
			||||||
        const auto dst_host_ptr{system.GPU().MemoryManager().GetPointer(dst_gpu_addr)};
 | 
					        const std::optional<VAddr> dst_cpu_addr =
 | 
				
			||||||
        const auto dst_cache_addr{ToCacheAddr(dst_host_ptr)};
 | 
					            system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr);
 | 
				
			||||||
        const auto src_host_ptr{system.GPU().MemoryManager().GetPointer(src_gpu_addr)};
 | 
					        const std::optional<VAddr> src_cpu_addr =
 | 
				
			||||||
        const auto src_cache_addr{ToCacheAddr(src_host_ptr)};
 | 
					            system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
 | 
				
			||||||
        std::pair<TSurface, TView> dst_surface =
 | 
					        std::pair<TSurface, TView> dst_surface =
 | 
				
			||||||
            GetSurface(dst_gpu_addr, dst_cache_addr, dst_params, true, false);
 | 
					            GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
 | 
				
			||||||
        std::pair<TSurface, TView> src_surface =
 | 
					        std::pair<TSurface, TView> src_surface =
 | 
				
			||||||
            GetSurface(src_gpu_addr, src_cache_addr, src_params, true, false);
 | 
					            GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false);
 | 
				
			||||||
        ImageBlit(src_surface.second, dst_surface.second, copy_config);
 | 
					        ImageBlit(src_surface.second, dst_surface.second, copy_config);
 | 
				
			||||||
        dst_surface.first->MarkAsModified(true, Tick());
 | 
					        dst_surface.first->MarkAsModified(true, Tick());
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    TSurface TryFindFramebufferSurface(const u8* host_ptr) {
 | 
					    TSurface TryFindFramebufferSurface(VAddr addr) {
 | 
				
			||||||
        const CacheAddr cache_addr = ToCacheAddr(host_ptr);
 | 
					        if (!addr) {
 | 
				
			||||||
        if (!cache_addr) {
 | 
					 | 
				
			||||||
            return nullptr;
 | 
					            return nullptr;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        const CacheAddr page = cache_addr >> registry_page_bits;
 | 
					        const VAddr page = addr >> registry_page_bits;
 | 
				
			||||||
        std::vector<TSurface>& list = registry[page];
 | 
					        std::vector<TSurface>& list = registry[page];
 | 
				
			||||||
        for (auto& surface : list) {
 | 
					        for (auto& surface : list) {
 | 
				
			||||||
            if (surface->GetCacheAddr() == cache_addr) {
 | 
					            if (surface->GetCpuAddr() == addr) {
 | 
				
			||||||
                return surface;
 | 
					                return surface;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
@ -338,18 +335,14 @@ protected:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    void Register(TSurface surface) {
 | 
					    void Register(TSurface surface) {
 | 
				
			||||||
        const GPUVAddr gpu_addr = surface->GetGpuAddr();
 | 
					        const GPUVAddr gpu_addr = surface->GetGpuAddr();
 | 
				
			||||||
        const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
 | 
					 | 
				
			||||||
        const std::size_t size = surface->GetSizeInBytes();
 | 
					        const std::size_t size = surface->GetSizeInBytes();
 | 
				
			||||||
        const std::optional<VAddr> cpu_addr =
 | 
					        const std::optional<VAddr> cpu_addr =
 | 
				
			||||||
            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
 | 
					            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
 | 
				
			||||||
        if (!cache_ptr || !cpu_addr) {
 | 
					        if (!cpu_addr) {
 | 
				
			||||||
            LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
 | 
					            LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
 | 
				
			||||||
                         gpu_addr);
 | 
					                         gpu_addr);
 | 
				
			||||||
            return;
 | 
					            return;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size);
 | 
					 | 
				
			||||||
        surface->MarkAsContinuous(continuous);
 | 
					 | 
				
			||||||
        surface->SetCacheAddr(cache_ptr);
 | 
					 | 
				
			||||||
        surface->SetCpuAddr(*cpu_addr);
 | 
					        surface->SetCpuAddr(*cpu_addr);
 | 
				
			||||||
        RegisterInnerCache(surface);
 | 
					        RegisterInnerCache(surface);
 | 
				
			||||||
        surface->MarkAsRegistered(true);
 | 
					        surface->MarkAsRegistered(true);
 | 
				
			||||||
@ -634,7 +627,7 @@ private:
 | 
				
			|||||||
    std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
 | 
					    std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
 | 
				
			||||||
                                                               const SurfaceParams& params,
 | 
					                                                               const SurfaceParams& params,
 | 
				
			||||||
                                                               const GPUVAddr gpu_addr,
 | 
					                                                               const GPUVAddr gpu_addr,
 | 
				
			||||||
                                                               const CacheAddr cache_addr,
 | 
					                                                               const VAddr cpu_addr,
 | 
				
			||||||
                                                               bool preserve_contents) {
 | 
					                                                               bool preserve_contents) {
 | 
				
			||||||
        if (params.target == SurfaceTarget::Texture3D) {
 | 
					        if (params.target == SurfaceTarget::Texture3D) {
 | 
				
			||||||
            bool failed = false;
 | 
					            bool failed = false;
 | 
				
			||||||
@ -659,7 +652,7 @@ private:
 | 
				
			|||||||
                    failed = true;
 | 
					                    failed = true;
 | 
				
			||||||
                    break;
 | 
					                    break;
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr);
 | 
					                const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
 | 
				
			||||||
                const auto [x, y, z] = params.GetBlockOffsetXYZ(offset);
 | 
					                const auto [x, y, z] = params.GetBlockOffsetXYZ(offset);
 | 
				
			||||||
                modified |= surface->IsModified();
 | 
					                modified |= surface->IsModified();
 | 
				
			||||||
                const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
 | 
					                const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
 | 
				
			||||||
@ -679,7 +672,7 @@ private:
 | 
				
			|||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            for (const auto& surface : overlaps) {
 | 
					            for (const auto& surface : overlaps) {
 | 
				
			||||||
                if (!surface->MatchTarget(params.target)) {
 | 
					                if (!surface->MatchTarget(params.target)) {
 | 
				
			||||||
                    if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) {
 | 
					                    if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
 | 
				
			||||||
                        if (Settings::values.use_accurate_gpu_emulation) {
 | 
					                        if (Settings::values.use_accurate_gpu_emulation) {
 | 
				
			||||||
                            return std::nullopt;
 | 
					                            return std::nullopt;
 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
@ -688,7 +681,7 @@ private:
 | 
				
			|||||||
                    }
 | 
					                    }
 | 
				
			||||||
                    return std::nullopt;
 | 
					                    return std::nullopt;
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                if (surface->GetCacheAddr() != cache_addr) {
 | 
					                if (surface->GetCpuAddr() != cpu_addr) {
 | 
				
			||||||
                    continue;
 | 
					                    continue;
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
 | 
					                if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
 | 
				
			||||||
@ -722,13 +715,13 @@ private:
 | 
				
			|||||||
     *                          left blank.
 | 
					     *                          left blank.
 | 
				
			||||||
     * @param is_render         Whether or not the surface is a render target.
 | 
					     * @param is_render         Whether or not the surface is a render target.
 | 
				
			||||||
     **/
 | 
					     **/
 | 
				
			||||||
    std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr,
 | 
					    std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
 | 
				
			||||||
                                          const SurfaceParams& params, bool preserve_contents,
 | 
					                                          const SurfaceParams& params, bool preserve_contents,
 | 
				
			||||||
                                          bool is_render) {
 | 
					                                          bool is_render) {
 | 
				
			||||||
        // Step 1
 | 
					        // Step 1
 | 
				
			||||||
        // Check Level 1 Cache for a fast structural match. If candidate surface
 | 
					        // Check Level 1 Cache for a fast structural match. If candidate surface
 | 
				
			||||||
        // matches at certain level we are pretty much done.
 | 
					        // matches at certain level we are pretty much done.
 | 
				
			||||||
        if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
 | 
					        if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) {
 | 
				
			||||||
            TSurface& current_surface = iter->second;
 | 
					            TSurface& current_surface = iter->second;
 | 
				
			||||||
            const auto topological_result = current_surface->MatchesTopology(params);
 | 
					            const auto topological_result = current_surface->MatchesTopology(params);
 | 
				
			||||||
            if (topological_result != MatchTopologyResult::FullMatch) {
 | 
					            if (topological_result != MatchTopologyResult::FullMatch) {
 | 
				
			||||||
@ -755,7 +748,7 @@ private:
 | 
				
			|||||||
        // Step 2
 | 
					        // Step 2
 | 
				
			||||||
        // Obtain all possible overlaps in the memory region
 | 
					        // Obtain all possible overlaps in the memory region
 | 
				
			||||||
        const std::size_t candidate_size = params.GetGuestSizeInBytes();
 | 
					        const std::size_t candidate_size = params.GetGuestSizeInBytes();
 | 
				
			||||||
        auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
 | 
					        auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // If none are found, we are done. we just load the surface and create it.
 | 
					        // If none are found, we are done. we just load the surface and create it.
 | 
				
			||||||
        if (overlaps.empty()) {
 | 
					        if (overlaps.empty()) {
 | 
				
			||||||
@ -777,7 +770,7 @@ private:
 | 
				
			|||||||
        // Check if it's a 3D texture
 | 
					        // Check if it's a 3D texture
 | 
				
			||||||
        if (params.block_depth > 0) {
 | 
					        if (params.block_depth > 0) {
 | 
				
			||||||
            auto surface =
 | 
					            auto surface =
 | 
				
			||||||
                Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents);
 | 
					                Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
 | 
				
			||||||
            if (surface) {
 | 
					            if (surface) {
 | 
				
			||||||
                return *surface;
 | 
					                return *surface;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
@ -852,16 +845,16 @@ private:
 | 
				
			|||||||
     * @param params   The parameters on the candidate surface.
 | 
					     * @param params   The parameters on the candidate surface.
 | 
				
			||||||
     **/
 | 
					     **/
 | 
				
			||||||
    Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
 | 
					    Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
 | 
				
			||||||
        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
 | 
					        const std::optional<VAddr> cpu_addr =
 | 
				
			||||||
        const auto cache_addr{ToCacheAddr(host_ptr)};
 | 
					            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (!cache_addr) {
 | 
					        if (!cpu_addr) {
 | 
				
			||||||
            Deduction result{};
 | 
					            Deduction result{};
 | 
				
			||||||
            result.type = DeductionType::DeductionFailed;
 | 
					            result.type = DeductionType::DeductionFailed;
 | 
				
			||||||
            return result;
 | 
					            return result;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
 | 
					        if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) {
 | 
				
			||||||
            TSurface& current_surface = iter->second;
 | 
					            TSurface& current_surface = iter->second;
 | 
				
			||||||
            const auto topological_result = current_surface->MatchesTopology(params);
 | 
					            const auto topological_result = current_surface->MatchesTopology(params);
 | 
				
			||||||
            if (topological_result != MatchTopologyResult::FullMatch) {
 | 
					            if (topological_result != MatchTopologyResult::FullMatch) {
 | 
				
			||||||
@ -880,7 +873,7 @@ private:
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        const std::size_t candidate_size = params.GetGuestSizeInBytes();
 | 
					        const std::size_t candidate_size = params.GetGuestSizeInBytes();
 | 
				
			||||||
        auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
 | 
					        auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (overlaps.empty()) {
 | 
					        if (overlaps.empty()) {
 | 
				
			||||||
            Deduction result{};
 | 
					            Deduction result{};
 | 
				
			||||||
@ -1024,10 +1017,10 @@ private:
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void RegisterInnerCache(TSurface& surface) {
 | 
					    void RegisterInnerCache(TSurface& surface) {
 | 
				
			||||||
        const CacheAddr cache_addr = surface->GetCacheAddr();
 | 
					        const VAddr cpu_addr = surface->GetCpuAddr();
 | 
				
			||||||
        CacheAddr start = cache_addr >> registry_page_bits;
 | 
					        VAddr start = cpu_addr >> registry_page_bits;
 | 
				
			||||||
        const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
 | 
					        const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
 | 
				
			||||||
        l1_cache[cache_addr] = surface;
 | 
					        l1_cache[cpu_addr] = surface;
 | 
				
			||||||
        while (start <= end) {
 | 
					        while (start <= end) {
 | 
				
			||||||
            registry[start].push_back(surface);
 | 
					            registry[start].push_back(surface);
 | 
				
			||||||
            start++;
 | 
					            start++;
 | 
				
			||||||
@ -1035,10 +1028,10 @@ private:
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void UnregisterInnerCache(TSurface& surface) {
 | 
					    void UnregisterInnerCache(TSurface& surface) {
 | 
				
			||||||
        const CacheAddr cache_addr = surface->GetCacheAddr();
 | 
					        const VAddr cpu_addr = surface->GetCpuAddr();
 | 
				
			||||||
        CacheAddr start = cache_addr >> registry_page_bits;
 | 
					        VAddr start = cpu_addr >> registry_page_bits;
 | 
				
			||||||
        const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
 | 
					        const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
 | 
				
			||||||
        l1_cache.erase(cache_addr);
 | 
					        l1_cache.erase(cpu_addr);
 | 
				
			||||||
        while (start <= end) {
 | 
					        while (start <= end) {
 | 
				
			||||||
            auto& reg{registry[start]};
 | 
					            auto& reg{registry[start]};
 | 
				
			||||||
            reg.erase(std::find(reg.begin(), reg.end(), surface));
 | 
					            reg.erase(std::find(reg.begin(), reg.end(), surface));
 | 
				
			||||||
@ -1046,18 +1039,18 @@ private:
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) {
 | 
					    std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
 | 
				
			||||||
        if (size == 0) {
 | 
					        if (size == 0) {
 | 
				
			||||||
            return {};
 | 
					            return {};
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        const CacheAddr cache_addr_end = cache_addr + size;
 | 
					        const VAddr cpu_addr_end = cpu_addr + size;
 | 
				
			||||||
        CacheAddr start = cache_addr >> registry_page_bits;
 | 
					        VAddr start = cpu_addr >> registry_page_bits;
 | 
				
			||||||
        const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits;
 | 
					        const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
 | 
				
			||||||
        std::vector<TSurface> surfaces;
 | 
					        std::vector<TSurface> surfaces;
 | 
				
			||||||
        while (start <= end) {
 | 
					        while (start <= end) {
 | 
				
			||||||
            std::vector<TSurface>& list = registry[start];
 | 
					            std::vector<TSurface>& list = registry[start];
 | 
				
			||||||
            for (auto& surface : list) {
 | 
					            for (auto& surface : list) {
 | 
				
			||||||
                if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) {
 | 
					                if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) {
 | 
				
			||||||
                    surface->MarkAsPicked(true);
 | 
					                    surface->MarkAsPicked(true);
 | 
				
			||||||
                    surfaces.push_back(surface);
 | 
					                    surfaces.push_back(surface);
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
@ -1146,14 +1139,14 @@ private:
 | 
				
			|||||||
    // large in size.
 | 
					    // large in size.
 | 
				
			||||||
    static constexpr u64 registry_page_bits{20};
 | 
					    static constexpr u64 registry_page_bits{20};
 | 
				
			||||||
    static constexpr u64 registry_page_size{1 << registry_page_bits};
 | 
					    static constexpr u64 registry_page_size{1 << registry_page_bits};
 | 
				
			||||||
    std::unordered_map<CacheAddr, std::vector<TSurface>> registry;
 | 
					    std::unordered_map<VAddr, std::vector<TSurface>> registry;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    static constexpr u32 DEPTH_RT = 8;
 | 
					    static constexpr u32 DEPTH_RT = 8;
 | 
				
			||||||
    static constexpr u32 NO_RT = 0xFFFFFFFF;
 | 
					    static constexpr u32 NO_RT = 0xFFFFFFFF;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // The L1 Cache is used for fast texture lookup before checking the overlaps
 | 
					    // The L1 Cache is used for fast texture lookup before checking the overlaps
 | 
				
			||||||
    // This avoids calculating size and other stuffs.
 | 
					    // This avoids calculating size and other stuffs.
 | 
				
			||||||
    std::unordered_map<CacheAddr, TSurface> l1_cache;
 | 
					    std::unordered_map<VAddr, TSurface> l1_cache;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
 | 
					    /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
 | 
				
			||||||
    /// previously been used. This is to prevent surfaces from being constantly created and
 | 
					    /// previously been used. This is to prevent surfaces from being constantly created and
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user