mirror of
				https://git.zaroz.cloud/nintendo-back-up/yuzu/yuzu.git
				synced 2025-05-12 00:45:25 +00:00 
			
		
		
		
	GPU: Partially implemented the Maxwell DMA engine.
Only tiled->linear and linear->tiled copies that aren't offsetted are supported for now. Queries are not supported. Swizzled copies are not supported.
This commit is contained in:
		
							parent
							
								
									281fd881a0
								
							
						
					
					
						commit
						987a170665
					
				@ -9,6 +9,8 @@ add_library(video_core STATIC
 | 
				
			|||||||
    engines/maxwell_3d.h
 | 
					    engines/maxwell_3d.h
 | 
				
			||||||
    engines/maxwell_compute.cpp
 | 
					    engines/maxwell_compute.cpp
 | 
				
			||||||
    engines/maxwell_compute.h
 | 
					    engines/maxwell_compute.h
 | 
				
			||||||
 | 
					    engines/maxwell_dma.cpp
 | 
				
			||||||
 | 
					    engines/maxwell_dma.h
 | 
				
			||||||
    engines/shader_bytecode.h
 | 
					    engines/shader_bytecode.h
 | 
				
			||||||
    gpu.cpp
 | 
					    gpu.cpp
 | 
				
			||||||
    gpu.h
 | 
					    gpu.h
 | 
				
			||||||
 | 
				
			|||||||
@ -16,6 +16,7 @@
 | 
				
			|||||||
#include "video_core/engines/fermi_2d.h"
 | 
					#include "video_core/engines/fermi_2d.h"
 | 
				
			||||||
#include "video_core/engines/maxwell_3d.h"
 | 
					#include "video_core/engines/maxwell_3d.h"
 | 
				
			||||||
#include "video_core/engines/maxwell_compute.h"
 | 
					#include "video_core/engines/maxwell_compute.h"
 | 
				
			||||||
 | 
					#include "video_core/engines/maxwell_dma.h"
 | 
				
			||||||
#include "video_core/gpu.h"
 | 
					#include "video_core/gpu.h"
 | 
				
			||||||
#include "video_core/renderer_base.h"
 | 
					#include "video_core/renderer_base.h"
 | 
				
			||||||
#include "video_core/video_core.h"
 | 
					#include "video_core/video_core.h"
 | 
				
			||||||
@ -60,8 +61,11 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params)
 | 
				
			|||||||
    case EngineID::MAXWELL_COMPUTE_B:
 | 
					    case EngineID::MAXWELL_COMPUTE_B:
 | 
				
			||||||
        maxwell_compute->WriteReg(method, value);
 | 
					        maxwell_compute->WriteReg(method, value);
 | 
				
			||||||
        break;
 | 
					        break;
 | 
				
			||||||
 | 
					    case EngineID::MAXWELL_DMA_COPY_A:
 | 
				
			||||||
 | 
					        maxwell_dma->WriteReg(method, value);
 | 
				
			||||||
 | 
					        break;
 | 
				
			||||||
    default:
 | 
					    default:
 | 
				
			||||||
        UNIMPLEMENTED();
 | 
					        UNIMPLEMENTED_MSG("Unimplemented engine");
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -47,6 +47,7 @@ void Fermi2D::HandleSurfaceCopy() {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    if (regs.src.linear == regs.dst.linear) {
 | 
					    if (regs.src.linear == regs.dst.linear) {
 | 
				
			||||||
        // If the input layout and the output layout are the same, just perform a raw copy.
 | 
					        // If the input layout and the output layout are the same, just perform a raw copy.
 | 
				
			||||||
 | 
					        ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight());
 | 
				
			||||||
        Memory::CopyBlock(dest_cpu, source_cpu,
 | 
					        Memory::CopyBlock(dest_cpu, source_cpu,
 | 
				
			||||||
                          src_bytes_per_pixel * regs.dst.width * regs.dst.height);
 | 
					                          src_bytes_per_pixel * regs.dst.width * regs.dst.height);
 | 
				
			||||||
        return;
 | 
					        return;
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										69
									
								
								src/video_core/engines/maxwell_dma.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								src/video_core/engines/maxwell_dma.cpp
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,69 @@
 | 
				
			|||||||
 | 
					// Copyright 2018 yuzu Emulator Project
 | 
				
			||||||
 | 
					// Licensed under GPLv2 or any later version
 | 
				
			||||||
 | 
					// Refer to the license.txt file included.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "core/memory.h"
 | 
				
			||||||
 | 
					#include "video_core/engines/maxwell_dma.h"
 | 
				
			||||||
 | 
					#include "video_core/textures/decoders.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace Tegra {
 | 
				
			||||||
 | 
					namespace Engines {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					MaxwellDMA::MaxwellDMA(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void MaxwellDMA::WriteReg(u32 method, u32 value) {
 | 
				
			||||||
 | 
					    ASSERT_MSG(method < Regs::NUM_REGS,
 | 
				
			||||||
 | 
					               "Invalid MaxwellDMA register, increase the size of the Regs structure");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    regs.reg_array[method] = value;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define MAXWELLDMA_REG_INDEX(field_name)                                                           \
 | 
				
			||||||
 | 
					    (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    switch (method) {
 | 
				
			||||||
 | 
					    case MAXWELLDMA_REG_INDEX(exec): {
 | 
				
			||||||
 | 
					        HandleCopy();
 | 
				
			||||||
 | 
					        break;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#undef MAXWELLDMA_REG_INDEX
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void MaxwellDMA::HandleCopy() {
 | 
				
			||||||
 | 
					    NGLOG_WARNING(HW_GPU, "Requested a DMA copy");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const GPUVAddr source = regs.src_address.Address();
 | 
				
			||||||
 | 
					    const GPUVAddr dest = regs.dst_address.Address();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source);
 | 
				
			||||||
 | 
					    const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // TODO(Subv): Perform more research and implement all features of this engine.
 | 
				
			||||||
 | 
					    ASSERT(regs.exec.enable_swizzle == 0);
 | 
				
			||||||
 | 
					    ASSERT(regs.exec.enable_2d == 1);
 | 
				
			||||||
 | 
					    ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
 | 
				
			||||||
 | 
					    ASSERT(regs.exec.query_intr == Regs::QueryIntr::None);
 | 
				
			||||||
 | 
					    ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2);
 | 
				
			||||||
 | 
					    ASSERT(regs.src_params.pos_x == 0);
 | 
				
			||||||
 | 
					    ASSERT(regs.src_params.pos_y == 0);
 | 
				
			||||||
 | 
					    ASSERT(regs.dst_params.pos_x == 0);
 | 
				
			||||||
 | 
					    ASSERT(regs.dst_params.pos_y == 0);
 | 
				
			||||||
 | 
					    ASSERT(regs.exec.is_dst_linear != regs.exec.is_src_linear);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    u8* src_buffer = Memory::GetPointer(source_cpu);
 | 
				
			||||||
 | 
					    u8* dst_buffer = Memory::GetPointer(dest_cpu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
 | 
				
			||||||
 | 
					        // If the input is tiled and the output is linear, deswizzle the input and copy it over.
 | 
				
			||||||
 | 
					        Texture::CopySwizzledData(regs.src_params.size_x, regs.src_params.size_y, 1, 1, src_buffer,
 | 
				
			||||||
 | 
					                                  dst_buffer, true, regs.src_params.BlockHeight());
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        // If the input is linear and the output is tiled, swizzle the input and copy it over.
 | 
				
			||||||
 | 
					        Texture::CopySwizzledData(regs.dst_params.size_x, regs.dst_params.size_y, 1, 1, dst_buffer,
 | 
				
			||||||
 | 
					                                  src_buffer, false, regs.dst_params.BlockHeight());
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					} // namespace Engines
 | 
				
			||||||
 | 
					} // namespace Tegra
 | 
				
			||||||
							
								
								
									
										155
									
								
								src/video_core/engines/maxwell_dma.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										155
									
								
								src/video_core/engines/maxwell_dma.h
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,155 @@
 | 
				
			|||||||
 | 
					// Copyright 2018 yuzu Emulator Project
 | 
				
			||||||
 | 
					// Licensed under GPLv2 or any later version
 | 
				
			||||||
 | 
					// Refer to the license.txt file included.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <array>
 | 
				
			||||||
 | 
					#include "common/assert.h"
 | 
				
			||||||
 | 
					#include "common/bit_field.h"
 | 
				
			||||||
 | 
					#include "common/common_funcs.h"
 | 
				
			||||||
 | 
					#include "common/common_types.h"
 | 
				
			||||||
 | 
					#include "video_core/gpu.h"
 | 
				
			||||||
 | 
					#include "video_core/memory_manager.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace Tegra {
 | 
				
			||||||
 | 
					namespace Engines {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class MaxwellDMA final {
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					    explicit MaxwellDMA(MemoryManager& memory_manager);
 | 
				
			||||||
 | 
					    ~MaxwellDMA() = default;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Write the value to the register identified by method.
 | 
				
			||||||
 | 
					    void WriteReg(u32 method, u32 value);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    struct Regs {
 | 
				
			||||||
 | 
					        static constexpr size_t NUM_REGS = 0x1D6;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        struct Parameters {
 | 
				
			||||||
 | 
					            union {
 | 
				
			||||||
 | 
					                BitField<0, 4, u32> block_depth;
 | 
				
			||||||
 | 
					                BitField<4, 4, u32> block_height;
 | 
				
			||||||
 | 
					                BitField<8, 4, u32> block_width;
 | 
				
			||||||
 | 
					            };
 | 
				
			||||||
 | 
					            u32 size_x;
 | 
				
			||||||
 | 
					            u32 size_y;
 | 
				
			||||||
 | 
					            u32 size_z;
 | 
				
			||||||
 | 
					            u32 pos_z;
 | 
				
			||||||
 | 
					            union {
 | 
				
			||||||
 | 
					                BitField<0, 16, u32> pos_x;
 | 
				
			||||||
 | 
					                BitField<16, 16, u32> pos_y;
 | 
				
			||||||
 | 
					            };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            u32 BlockHeight() const {
 | 
				
			||||||
 | 
					                return 1 << block_height;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        enum class CopyMode : u32 {
 | 
				
			||||||
 | 
					            None = 0,
 | 
				
			||||||
 | 
					            Unk1 = 1,
 | 
				
			||||||
 | 
					            Unk2 = 2,
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        enum class QueryMode : u32 {
 | 
				
			||||||
 | 
					            None = 0,
 | 
				
			||||||
 | 
					            Short = 1,
 | 
				
			||||||
 | 
					            Long = 2,
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        enum class QueryIntr : u32 {
 | 
				
			||||||
 | 
					            None = 0,
 | 
				
			||||||
 | 
					            Block = 1,
 | 
				
			||||||
 | 
					            NonBlock = 2,
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        union {
 | 
				
			||||||
 | 
					            struct {
 | 
				
			||||||
 | 
					                INSERT_PADDING_WORDS(0xC0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                struct {
 | 
				
			||||||
 | 
					                    union {
 | 
				
			||||||
 | 
					                        BitField<0, 2, CopyMode> copy_mode;
 | 
				
			||||||
 | 
					                        BitField<2, 1, u32> flush;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        BitField<3, 2, QueryMode> query_mode;
 | 
				
			||||||
 | 
					                        BitField<5, 2, QueryIntr> query_intr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        BitField<7, 1, u32> is_src_linear;
 | 
				
			||||||
 | 
					                        BitField<8, 1, u32> is_dst_linear;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        BitField<9, 1, u32> enable_2d;
 | 
				
			||||||
 | 
					                        BitField<10, 1, u32> enable_swizzle;
 | 
				
			||||||
 | 
					                    };
 | 
				
			||||||
 | 
					                } exec;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                INSERT_PADDING_WORDS(0x3F);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                struct {
 | 
				
			||||||
 | 
					                    u32 address_high;
 | 
				
			||||||
 | 
					                    u32 address_low;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    GPUVAddr Address() const {
 | 
				
			||||||
 | 
					                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
 | 
				
			||||||
 | 
					                                                     address_low);
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                } src_address;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                struct {
 | 
				
			||||||
 | 
					                    u32 address_high;
 | 
				
			||||||
 | 
					                    u32 address_low;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    GPUVAddr Address() const {
 | 
				
			||||||
 | 
					                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
 | 
				
			||||||
 | 
					                                                     address_low);
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                } dst_address;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                u32 src_pitch;
 | 
				
			||||||
 | 
					                u32 dst_pitch;
 | 
				
			||||||
 | 
					                u32 x_count;
 | 
				
			||||||
 | 
					                u32 y_count;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                INSERT_PADDING_WORDS(0xBB);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                Parameters dst_params;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                INSERT_PADDING_WORDS(1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                Parameters src_params;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                INSERT_PADDING_WORDS(0x13);
 | 
				
			||||||
 | 
					            };
 | 
				
			||||||
 | 
					            std::array<u32, NUM_REGS> reg_array;
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					    } regs{};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MemoryManager& memory_manager;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					private:
 | 
				
			||||||
 | 
					    /// Performs the copy from the source buffer to the destination buffer as configured in the
 | 
				
			||||||
 | 
					    /// registers.
 | 
				
			||||||
 | 
					    void HandleCopy();
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define ASSERT_REG_POSITION(field_name, position)                                                  \
 | 
				
			||||||
 | 
					    static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4,                          \
 | 
				
			||||||
 | 
					                  "Field " #field_name " has invalid position")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ASSERT_REG_POSITION(exec, 0xC0);
 | 
				
			||||||
 | 
					ASSERT_REG_POSITION(src_address, 0x100);
 | 
				
			||||||
 | 
					ASSERT_REG_POSITION(dst_address, 0x102);
 | 
				
			||||||
 | 
					ASSERT_REG_POSITION(src_pitch, 0x104);
 | 
				
			||||||
 | 
					ASSERT_REG_POSITION(dst_pitch, 0x105);
 | 
				
			||||||
 | 
					ASSERT_REG_POSITION(x_count, 0x106);
 | 
				
			||||||
 | 
					ASSERT_REG_POSITION(y_count, 0x107);
 | 
				
			||||||
 | 
					ASSERT_REG_POSITION(dst_params, 0x1C3);
 | 
				
			||||||
 | 
					ASSERT_REG_POSITION(src_params, 0x1CA);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#undef ASSERT_REG_POSITION
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					} // namespace Engines
 | 
				
			||||||
 | 
					} // namespace Tegra
 | 
				
			||||||
@ -5,6 +5,7 @@
 | 
				
			|||||||
#include "video_core/engines/fermi_2d.h"
 | 
					#include "video_core/engines/fermi_2d.h"
 | 
				
			||||||
#include "video_core/engines/maxwell_3d.h"
 | 
					#include "video_core/engines/maxwell_3d.h"
 | 
				
			||||||
#include "video_core/engines/maxwell_compute.h"
 | 
					#include "video_core/engines/maxwell_compute.h"
 | 
				
			||||||
 | 
					#include "video_core/engines/maxwell_dma.h"
 | 
				
			||||||
#include "video_core/gpu.h"
 | 
					#include "video_core/gpu.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace Tegra {
 | 
					namespace Tegra {
 | 
				
			||||||
@ -14,6 +15,7 @@ GPU::GPU() {
 | 
				
			|||||||
    maxwell_3d = std::make_unique<Engines::Maxwell3D>(*memory_manager);
 | 
					    maxwell_3d = std::make_unique<Engines::Maxwell3D>(*memory_manager);
 | 
				
			||||||
    fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);
 | 
					    fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);
 | 
				
			||||||
    maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
 | 
					    maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
 | 
				
			||||||
 | 
					    maxwell_dma = std::make_unique<Engines::MaxwellDMA>(*memory_manager);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
GPU::~GPU() = default;
 | 
					GPU::~GPU() = default;
 | 
				
			||||||
 | 
				
			|||||||
@ -63,6 +63,7 @@ namespace Engines {
 | 
				
			|||||||
class Fermi2D;
 | 
					class Fermi2D;
 | 
				
			||||||
class Maxwell3D;
 | 
					class Maxwell3D;
 | 
				
			||||||
class MaxwellCompute;
 | 
					class MaxwellCompute;
 | 
				
			||||||
 | 
					class MaxwellDMA;
 | 
				
			||||||
} // namespace Engines
 | 
					} // namespace Engines
 | 
				
			||||||
 | 
					
 | 
				
			||||||
enum class EngineID {
 | 
					enum class EngineID {
 | 
				
			||||||
@ -103,6 +104,8 @@ private:
 | 
				
			|||||||
    std::unique_ptr<Engines::Fermi2D> fermi_2d;
 | 
					    std::unique_ptr<Engines::Fermi2D> fermi_2d;
 | 
				
			||||||
    /// Compute engine
 | 
					    /// Compute engine
 | 
				
			||||||
    std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
 | 
					    std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
 | 
				
			||||||
 | 
					    /// DMA engine
 | 
				
			||||||
 | 
					    std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
} // namespace Tegra
 | 
					} // namespace Tegra
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user