mirror of
				https://git.zaroz.cloud/nintendo-back-up/yuzu/yuzu.git
				synced 2025-05-12 00:45:25 +00:00 
			
		
		
		
	Merge pull request #379 from Subv/multi_buffers
GPU: Support multiple enabled vertex arrays.
This commit is contained in:
		
						commit
						07dc0bbf3e
					
				| @ -500,6 +500,11 @@ public: | |||||||
|                         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(start_high) << 32) | |                         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(start_high) << 32) | | ||||||
|                                                      start_low); |                                                      start_low); | ||||||
|                     } |                     } | ||||||
|  | 
 | ||||||
|  |                     bool IsEnabled() const { | ||||||
|  |                         return enable != 0 && StartAddress() != 0; | ||||||
|  |                     } | ||||||
|  | 
 | ||||||
|                 } vertex_array[NumVertexArrays]; |                 } vertex_array[NumVertexArrays]; | ||||||
| 
 | 
 | ||||||
|                 Blend blend; |                 Blend blend; | ||||||
|  | |||||||
| @ -127,7 +127,8 @@ RasterizerOpenGL::~RasterizerOpenGL() { | |||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { | std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, | ||||||
|  |                                                              GLintptr buffer_offset) { | ||||||
|     MICROPROFILE_SCOPE(OpenGL_VAO); |     MICROPROFILE_SCOPE(OpenGL_VAO); | ||||||
|     const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; |     const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; | ||||||
|     const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; |     const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; | ||||||
| @ -136,43 +137,59 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { | |||||||
|     state.draw.vertex_buffer = stream_buffer->GetHandle(); |     state.draw.vertex_buffer = stream_buffer->GetHandle(); | ||||||
|     state.Apply(); |     state.Apply(); | ||||||
| 
 | 
 | ||||||
|     // TODO(bunnei): Add support for 1+ vertex arrays
 |     // Upload all guest vertex arrays sequentially to our buffer
 | ||||||
|     const auto& vertex_array{regs.vertex_array[0]}; |     for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||||||
|     const auto& vertex_array_limit{regs.vertex_array_limit[0]}; |         const auto& vertex_array = regs.vertex_array[index]; | ||||||
|     ASSERT_MSG(vertex_array.enable, "vertex array 0 is disabled?"); |         if (!vertex_array.IsEnabled()) | ||||||
|     ASSERT_MSG(!vertex_array.divisor, "vertex array 0 divisor is unimplemented!"); |             continue; | ||||||
|     for (unsigned index = 1; index < Maxwell::NumVertexArrays; ++index) { | 
 | ||||||
|         ASSERT_MSG(!regs.vertex_array[index].enable, "vertex array %d is unimplemented!", index); |         const Tegra::GPUVAddr start = vertex_array.StartAddress(); | ||||||
|  |         const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); | ||||||
|  | 
 | ||||||
|  |         ASSERT(end > start); | ||||||
|  |         u64 size = end - start + 1; | ||||||
|  | 
 | ||||||
|  |         // Copy vertex array data
 | ||||||
|  |         const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(start)}; | ||||||
|  |         res_cache.FlushRegion(data_addr, size, nullptr); | ||||||
|  |         Memory::ReadBlock(data_addr, array_ptr, size); | ||||||
|  | 
 | ||||||
|  |         // Bind the vertex array to the buffer at the current offset.
 | ||||||
|  |         glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride); | ||||||
|  | 
 | ||||||
|  |         ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented"); | ||||||
|  | 
 | ||||||
|  |         array_ptr += size; | ||||||
|  |         buffer_offset += size; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
 |     // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
 | ||||||
|     // Enables the first 16 vertex attributes always, as we don't know which ones are actually used
 |     // Enables the first 16 vertex attributes always, as we don't know which ones are actually used
 | ||||||
|     // until shader time. Note, Tegra technically supports 32, but we're cappinig this to 16 for now
 |     // until shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now
 | ||||||
|     // to avoid OpenGL errors.
 |     // to avoid OpenGL errors.
 | ||||||
|  |     // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
 | ||||||
|  |     // assume every shader uses them all.
 | ||||||
|     for (unsigned index = 0; index < 16; ++index) { |     for (unsigned index = 0; index < 16; ++index) { | ||||||
|         auto& attrib = regs.vertex_attrib_format[index]; |         auto& attrib = regs.vertex_attrib_format[index]; | ||||||
|         NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", |         NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", | ||||||
|                     index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), |                     index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), | ||||||
|                     attrib.offset.Value(), attrib.IsNormalized()); |                     attrib.offset.Value(), attrib.IsNormalized()); | ||||||
| 
 | 
 | ||||||
|         glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), |         auto& buffer = regs.vertex_array[attrib.buffer]; | ||||||
|                               attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride, |         ASSERT(buffer.IsEnabled()); | ||||||
|                               reinterpret_cast<GLvoid*>(buffer_offset + attrib.offset)); | 
 | ||||||
|         glEnableVertexAttribArray(index); |         glEnableVertexAttribArray(index); | ||||||
|  |         glVertexAttribFormat(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), | ||||||
|  |                              attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); | ||||||
|  |         glVertexAttribBinding(index, attrib.buffer); | ||||||
|  | 
 | ||||||
|         hw_vao_enabled_attributes[index] = true; |         hw_vao_enabled_attributes[index] = true; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     // Copy vertex array data
 |     return {array_ptr, buffer_offset}; | ||||||
|     const u64 data_size{vertex_array_limit.LimitAddress() - vertex_array.StartAddress() + 1}; |  | ||||||
|     const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())}; |  | ||||||
|     res_cache.FlushRegion(data_addr, data_size, nullptr); |  | ||||||
|     Memory::ReadBlock(data_addr, array_ptr, data_size); |  | ||||||
| 
 |  | ||||||
|     array_ptr += data_size; |  | ||||||
|     buffer_offset += data_size; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos) { | void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { | ||||||
|     // Helper function for uploading uniform data
 |     // Helper function for uploading uniform data
 | ||||||
|     const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { |     const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { | ||||||
|         if (has_ARB_direct_state_access) { |         if (has_ARB_direct_state_access) { | ||||||
| @ -190,8 +207,6 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size | |||||||
|     u32 current_constbuffer_bindpoint = 0; |     u32 current_constbuffer_bindpoint = 0; | ||||||
| 
 | 
 | ||||||
|     for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { |     for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { | ||||||
|         ptr_pos += sizeof(GLShader::MaxwellUniformData); |  | ||||||
| 
 |  | ||||||
|         auto& shader_config = gpu.regs.shader_config[index]; |         auto& shader_config = gpu.regs.shader_config[index]; | ||||||
|         const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; |         const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; | ||||||
| 
 | 
 | ||||||
| @ -205,13 +220,16 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size | |||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         // Upload uniform data as one UBO per stage
 |         // Upload uniform data as one UBO per stage
 | ||||||
|         const GLintptr ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); |         const GLintptr ubo_offset = buffer_offset; | ||||||
|         copy_buffer(uniform_buffers[stage].handle, ubo_offset, |         copy_buffer(uniform_buffers[stage].handle, ubo_offset, | ||||||
|                     sizeof(GLShader::MaxwellUniformData)); |                     sizeof(GLShader::MaxwellUniformData)); | ||||||
|         GLShader::MaxwellUniformData* ub_ptr = |         GLShader::MaxwellUniformData* ub_ptr = | ||||||
|             reinterpret_cast<GLShader::MaxwellUniformData*>(&buffer_ptr[ptr_pos]); |             reinterpret_cast<GLShader::MaxwellUniformData*>(buffer_ptr); | ||||||
|         ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]); |         ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]); | ||||||
| 
 | 
 | ||||||
|  |         buffer_ptr += sizeof(GLShader::MaxwellUniformData); | ||||||
|  |         buffer_offset += sizeof(GLShader::MaxwellUniformData); | ||||||
|  | 
 | ||||||
|         // Fetch program code from memory
 |         // Fetch program code from memory
 | ||||||
|         GLShader::ProgramCode program_code; |         GLShader::ProgramCode program_code; | ||||||
|         const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; |         const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; | ||||||
| @ -252,6 +270,24 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size | |||||||
|     shader_program_manager->UseTrivialGeometryShader(); |     shader_program_manager->UseTrivialGeometryShader(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | ||||||
|  |     const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; | ||||||
|  | 
 | ||||||
|  |     size_t size = 0; | ||||||
|  |     for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||||||
|  |         if (!regs.vertex_array[index].IsEnabled()) | ||||||
|  |             continue; | ||||||
|  | 
 | ||||||
|  |         const Tegra::GPUVAddr start = regs.vertex_array[index].StartAddress(); | ||||||
|  |         const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); | ||||||
|  | 
 | ||||||
|  |         ASSERT(end > start); | ||||||
|  |         size += end - start + 1; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return size; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { | bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { | ||||||
|     accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; |     accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; | ||||||
|     DrawArrays(); |     DrawArrays(); | ||||||
| @ -329,44 +365,49 @@ void RasterizerOpenGL::DrawArrays() { | |||||||
|     const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; |     const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; | ||||||
|     const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; |     const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; | ||||||
| 
 | 
 | ||||||
|     // TODO(bunnei): Add support for 1+ vertex arrays
 |  | ||||||
|     vs_input_size = vertex_num * regs.vertex_array[0].stride; |  | ||||||
| 
 |  | ||||||
|     state.draw.vertex_buffer = stream_buffer->GetHandle(); |     state.draw.vertex_buffer = stream_buffer->GetHandle(); | ||||||
|     state.Apply(); |     state.Apply(); | ||||||
| 
 | 
 | ||||||
|     size_t buffer_size = static_cast<size_t>(vs_input_size); |     size_t buffer_size = CalculateVertexArraysSize(); | ||||||
|  | 
 | ||||||
|     if (is_indexed) { |     if (is_indexed) { | ||||||
|         buffer_size = Common::AlignUp(buffer_size, 4) + index_buffer_size; |         buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + index_buffer_size; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     // Uniform space for the 5 shader stages
 |     // Uniform space for the 5 shader stages
 | ||||||
|     buffer_size += sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; |     buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + | ||||||
|  |                   sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; | ||||||
| 
 | 
 | ||||||
|     size_t ptr_pos = 0; |  | ||||||
|     u8* buffer_ptr; |     u8* buffer_ptr; | ||||||
|     GLintptr buffer_offset; |     GLintptr buffer_offset; | ||||||
|     std::tie(buffer_ptr, buffer_offset) = |     std::tie(buffer_ptr, buffer_offset) = | ||||||
|         stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4); |         stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4); | ||||||
| 
 | 
 | ||||||
|     SetupVertexArray(buffer_ptr, buffer_offset); |     u8* offseted_buffer; | ||||||
|     ptr_pos += vs_input_size; |     std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); | ||||||
|  | 
 | ||||||
|  |     offseted_buffer = | ||||||
|  |         reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4)); | ||||||
|  |     buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4); | ||||||
| 
 | 
 | ||||||
|     // If indexed mode, copy the index buffer
 |     // If indexed mode, copy the index buffer
 | ||||||
|     GLintptr index_buffer_offset = 0; |     GLintptr index_buffer_offset = 0; | ||||||
|     if (is_indexed) { |     if (is_indexed) { | ||||||
|         ptr_pos = Common::AlignUp(ptr_pos, 4); |  | ||||||
| 
 |  | ||||||
|         const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; |         const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; | ||||||
|         const VAddr index_data_addr{ |         const VAddr index_data_addr{ | ||||||
|             memory_manager->PhysicalToVirtualAddress(regs.index_array.StartAddress())}; |             memory_manager->PhysicalToVirtualAddress(regs.index_array.StartAddress())}; | ||||||
|         Memory::ReadBlock(index_data_addr, &buffer_ptr[ptr_pos], index_buffer_size); |         Memory::ReadBlock(index_data_addr, offseted_buffer, index_buffer_size); | ||||||
| 
 | 
 | ||||||
|         index_buffer_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); |         index_buffer_offset = buffer_offset; | ||||||
|         ptr_pos += index_buffer_size; |         offseted_buffer += index_buffer_size; | ||||||
|  |         buffer_offset += index_buffer_size; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     SetupShaders(buffer_ptr, buffer_offset, ptr_pos); |     offseted_buffer = | ||||||
|  |         reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4)); | ||||||
|  |     buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4); | ||||||
|  | 
 | ||||||
|  |     SetupShaders(offseted_buffer, buffer_offset); | ||||||
| 
 | 
 | ||||||
|     stream_buffer->Unmap(); |     stream_buffer->Unmap(); | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -148,13 +148,13 @@ private: | |||||||
|     static constexpr size_t STREAM_BUFFER_SIZE = 4 * 1024 * 1024; |     static constexpr size_t STREAM_BUFFER_SIZE = 4 * 1024 * 1024; | ||||||
|     std::unique_ptr<OGLStreamBuffer> stream_buffer; |     std::unique_ptr<OGLStreamBuffer> stream_buffer; | ||||||
| 
 | 
 | ||||||
|     GLsizeiptr vs_input_size; |     size_t CalculateVertexArraysSize() const; | ||||||
| 
 | 
 | ||||||
|     void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset); |     std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset); | ||||||
| 
 | 
 | ||||||
|     std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers; |     std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers; | ||||||
| 
 | 
 | ||||||
|     void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos); |     void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset); | ||||||
| 
 | 
 | ||||||
|     enum class AccelDraw { Disabled, Arrays, Indexed }; |     enum class AccelDraw { Disabled, Arrays, Indexed }; | ||||||
|     AccelDraw accelerate_draw; |     AccelDraw accelerate_draw; | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 bunnei
						bunnei