mirror of
				https://git.zaroz.cloud/nintendo-back-up/yuzu/yuzu.git
				synced 2025-05-12 00:45:25 +00:00 
			
		
		
		
	Merge pull request #1006 from degasus/stream_buffer
GL renderer: Pick the streambuffer from citra and use them.
This commit is contained in:
		
						commit
						b8c1dca62f
					
				| @ -36,30 +36,21 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); | ||||
| MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); | ||||
| MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); | ||||
| 
 | ||||
| RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_window{window} { | ||||
| RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) | ||||
|     : emu_window{window}, stream_buffer(GL_ARRAY_BUFFER, STREAM_BUFFER_SIZE) { | ||||
|     // Create sampler objects
 | ||||
|     for (size_t i = 0; i < texture_samplers.size(); ++i) { | ||||
|         texture_samplers[i].Create(); | ||||
|         state.texture_units[i].sampler = texture_samplers[i].sampler.handle; | ||||
|     } | ||||
| 
 | ||||
|     // Create SSBOs
 | ||||
|     for (size_t stage = 0; stage < ssbos.size(); ++stage) { | ||||
|         for (size_t buffer = 0; buffer < ssbos[stage].size(); ++buffer) { | ||||
|             ssbos[stage][buffer].Create(); | ||||
|             state.draw.const_buffers[stage][buffer].ssbo = ssbos[stage][buffer].handle; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     GLint ext_num; | ||||
|     glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num); | ||||
|     for (GLint i = 0; i < ext_num; i++) { | ||||
|         const std::string_view extension{ | ||||
|             reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))}; | ||||
| 
 | ||||
|         if (extension == "GL_ARB_buffer_storage") { | ||||
|             has_ARB_buffer_storage = true; | ||||
|         } else if (extension == "GL_ARB_direct_state_access") { | ||||
|         if (extension == "GL_ARB_direct_state_access") { | ||||
|             has_ARB_direct_state_access = true; | ||||
|         } else if (extension == "GL_ARB_separate_shader_objects") { | ||||
|             has_ARB_separate_shader_objects = true; | ||||
| @ -86,47 +77,31 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_wind | ||||
| 
 | ||||
|     hw_vao.Create(); | ||||
| 
 | ||||
|     stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); | ||||
|     stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); | ||||
|     state.draw.vertex_buffer = stream_buffer->GetHandle(); | ||||
|     state.draw.vertex_buffer = stream_buffer.GetHandle(); | ||||
| 
 | ||||
|     shader_program_manager = std::make_unique<GLShader::ProgramManager>(); | ||||
|     state.draw.shader_program = 0; | ||||
|     state.draw.vertex_array = hw_vao.handle; | ||||
|     state.Apply(); | ||||
| 
 | ||||
|     glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); | ||||
| 
 | ||||
|     for (unsigned index = 0; index < uniform_buffers.size(); ++index) { | ||||
|         auto& buffer = uniform_buffers[index]; | ||||
|         buffer.Create(); | ||||
|         glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle); | ||||
|         glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::MaxwellUniformData), nullptr, | ||||
|                      GL_STREAM_COPY); | ||||
|         glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer.handle); | ||||
|     } | ||||
|     glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer.GetHandle()); | ||||
| 
 | ||||
|     glEnable(GL_BLEND); | ||||
| 
 | ||||
|     glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); | ||||
| 
 | ||||
|     LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); | ||||
| } | ||||
| 
 | ||||
| RasterizerOpenGL::~RasterizerOpenGL() { | ||||
|     if (stream_buffer != nullptr) { | ||||
|         state.draw.vertex_buffer = stream_buffer->GetHandle(); | ||||
|         state.Apply(); | ||||
|         stream_buffer->Release(); | ||||
|     } | ||||
| } | ||||
| RasterizerOpenGL::~RasterizerOpenGL() {} | ||||
| 
 | ||||
| std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, | ||||
|                                                              GLintptr buffer_offset) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_VAO); | ||||
|     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; | ||||
|     const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager; | ||||
| 
 | ||||
|     state.draw.vertex_array = hw_vao.handle; | ||||
|     state.draw.vertex_buffer = stream_buffer->GetHandle(); | ||||
|     state.draw.vertex_buffer = stream_buffer.GetHandle(); | ||||
|     state.Apply(); | ||||
| 
 | ||||
|     // Upload all guest vertex arrays sequentially to our buffer
 | ||||
| @ -141,16 +116,15 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, | ||||
|         ASSERT(end > start); | ||||
|         u64 size = end - start + 1; | ||||
| 
 | ||||
|         // Copy vertex array data
 | ||||
|         Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size); | ||||
|         GLintptr vertex_buffer_offset; | ||||
|         std::tie(array_ptr, buffer_offset, vertex_buffer_offset) = | ||||
|             UploadMemory(array_ptr, buffer_offset, start, size); | ||||
| 
 | ||||
|         // Bind the vertex array to the buffer at the current offset.
 | ||||
|         glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride); | ||||
|         glBindVertexBuffer(index, stream_buffer.GetHandle(), vertex_buffer_offset, | ||||
|                            vertex_array.stride); | ||||
| 
 | ||||
|         ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented"); | ||||
| 
 | ||||
|         array_ptr += size; | ||||
|         buffer_offset += size; | ||||
|     } | ||||
| 
 | ||||
|     // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
 | ||||
| @ -201,22 +175,12 @@ static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program | ||||
|     return program_code; | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { | ||||
|     // Helper function for uploading uniform data
 | ||||
|     const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { | ||||
|         if (has_ARB_direct_state_access) { | ||||
|             glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); | ||||
|         } else { | ||||
|             glBindBuffer(GL_COPY_WRITE_BUFFER, handle); | ||||
|             glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); | ||||
|         } | ||||
|     }; | ||||
| 
 | ||||
| std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { | ||||
|     auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | ||||
| 
 | ||||
|     // Next available bindpoints to use when uploading the const buffers and textures to the GLSL
 | ||||
|     // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
 | ||||
|     u32 current_constbuffer_bindpoint = static_cast<u32>(uniform_buffers.size()); | ||||
|     u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; | ||||
|     u32 current_texture_bindpoint = 0; | ||||
| 
 | ||||
|     for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||||
| @ -228,22 +192,21 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { | ||||
|             continue; | ||||
|         } | ||||
| 
 | ||||
|         std::tie(buffer_ptr, buffer_offset) = | ||||
|             AlignBuffer(buffer_ptr, buffer_offset, static_cast<size_t>(uniform_buffer_alignment)); | ||||
| 
 | ||||
|         const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
 | ||||
| 
 | ||||
|         GLShader::MaxwellUniformData ubo{}; | ||||
|         ubo.SetFromRegs(gpu.state.shader_stages[stage]); | ||||
|         std::memcpy(buffer_ptr, &ubo, sizeof(ubo)); | ||||
| 
 | ||||
|         // Flush the buffer so that the GPU can see the data we just wrote.
 | ||||
|         glFlushMappedBufferRange(GL_ARRAY_BUFFER, buffer_offset, sizeof(ubo)); | ||||
|         // Bind the buffer
 | ||||
|         glBindBufferRange(GL_UNIFORM_BUFFER, stage, stream_buffer.GetHandle(), buffer_offset, | ||||
|                           sizeof(ubo)); | ||||
| 
 | ||||
|         // Upload uniform data as one UBO per stage
 | ||||
|         const GLintptr ubo_offset = buffer_offset; | ||||
|         copy_buffer(uniform_buffers[stage].handle, ubo_offset, | ||||
|                     sizeof(GLShader::MaxwellUniformData)); | ||||
| 
 | ||||
|         buffer_ptr += sizeof(GLShader::MaxwellUniformData); | ||||
|         buffer_offset += sizeof(GLShader::MaxwellUniformData); | ||||
|         buffer_ptr += sizeof(ubo); | ||||
|         buffer_offset += sizeof(ubo); | ||||
| 
 | ||||
|         GLShader::ShaderSetup setup{GetShaderProgramCode(program)}; | ||||
|         GLShader::ShaderEntries shader_resources; | ||||
| @ -282,9 +245,9 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { | ||||
|             static_cast<Maxwell::ShaderStage>(stage)); | ||||
| 
 | ||||
|         // Configure the const buffers for this shader stage.
 | ||||
|         current_constbuffer_bindpoint = | ||||
|             SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, | ||||
|                               current_constbuffer_bindpoint, shader_resources.const_buffer_entries); | ||||
|         std::tie(buffer_ptr, buffer_offset, current_constbuffer_bindpoint) = SetupConstBuffers( | ||||
|             buffer_ptr, buffer_offset, static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, | ||||
|             current_constbuffer_bindpoint, shader_resources.const_buffer_entries); | ||||
| 
 | ||||
|         // Configure the textures for this shader stage.
 | ||||
|         current_texture_bindpoint = | ||||
| @ -299,6 +262,8 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { | ||||
|     } | ||||
| 
 | ||||
|     shader_program_manager->UseTrivialGeometryShader(); | ||||
| 
 | ||||
|     return {buffer_ptr, buffer_offset}; | ||||
| } | ||||
| 
 | ||||
| size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | ||||
| @ -432,6 +397,31 @@ void RasterizerOpenGL::Clear() { | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| std::pair<u8*, GLintptr> RasterizerOpenGL::AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset, | ||||
|                                                        size_t alignment) { | ||||
|     // Align the offset, not the mapped pointer
 | ||||
|     GLintptr offset_aligned = | ||||
|         static_cast<GLintptr>(Common::AlignUp(static_cast<size_t>(buffer_offset), alignment)); | ||||
|     return {buffer_ptr + (offset_aligned - buffer_offset), offset_aligned}; | ||||
| } | ||||
| 
 | ||||
| std::tuple<u8*, GLintptr, GLintptr> RasterizerOpenGL::UploadMemory(u8* buffer_ptr, | ||||
|                                                                    GLintptr buffer_offset, | ||||
|                                                                    Tegra::GPUVAddr gpu_addr, | ||||
|                                                                    size_t size, size_t alignment) { | ||||
|     std::tie(buffer_ptr, buffer_offset) = AlignBuffer(buffer_ptr, buffer_offset, alignment); | ||||
|     GLintptr uploaded_offset = buffer_offset; | ||||
| 
 | ||||
|     const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager; | ||||
|     const boost::optional<VAddr> cpu_addr{memory_manager->GpuToCpuAddress(gpu_addr)}; | ||||
|     Memory::ReadBlock(*cpu_addr, buffer_ptr, size); | ||||
| 
 | ||||
|     buffer_ptr += size; | ||||
|     buffer_offset += size; | ||||
| 
 | ||||
|     return {buffer_ptr, buffer_offset, uploaded_offset}; | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::DrawArrays() { | ||||
|     if (accelerate_draw == AccelDraw::Disabled) | ||||
|         return; | ||||
| @ -456,7 +446,7 @@ void RasterizerOpenGL::DrawArrays() { | ||||
|     const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; | ||||
|     const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; | ||||
| 
 | ||||
|     state.draw.vertex_buffer = stream_buffer->GetHandle(); | ||||
|     state.draw.vertex_buffer = stream_buffer.GetHandle(); | ||||
|     state.Apply(); | ||||
| 
 | ||||
|     size_t buffer_size = CalculateVertexArraysSize(); | ||||
| @ -466,41 +456,31 @@ void RasterizerOpenGL::DrawArrays() { | ||||
|     } | ||||
| 
 | ||||
|     // Uniform space for the 5 shader stages
 | ||||
|     buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + | ||||
|                   sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; | ||||
|     buffer_size = | ||||
|         Common::AlignUp<size_t>(buffer_size, 4) + | ||||
|         (sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage; | ||||
| 
 | ||||
|     // Add space for at least 18 constant buffers
 | ||||
|     buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); | ||||
| 
 | ||||
|     u8* buffer_ptr; | ||||
|     GLintptr buffer_offset; | ||||
|     std::tie(buffer_ptr, buffer_offset) = | ||||
|         stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4); | ||||
|     std::tie(buffer_ptr, buffer_offset, std::ignore) = | ||||
|         stream_buffer.Map(static_cast<GLsizeiptr>(buffer_size), 4); | ||||
|     u8* buffer_ptr_base = buffer_ptr; | ||||
| 
 | ||||
|     u8* offseted_buffer; | ||||
|     std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); | ||||
| 
 | ||||
|     offseted_buffer = | ||||
|         reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4)); | ||||
|     buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4); | ||||
|     std::tie(buffer_ptr, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); | ||||
| 
 | ||||
|     // If indexed mode, copy the index buffer
 | ||||
|     GLintptr index_buffer_offset = 0; | ||||
|     if (is_indexed) { | ||||
|         const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager; | ||||
|         const boost::optional<VAddr> index_data_addr{ | ||||
|             memory_manager->GpuToCpuAddress(regs.index_array.StartAddress())}; | ||||
|         Memory::ReadBlock(*index_data_addr, offseted_buffer, index_buffer_size); | ||||
| 
 | ||||
|         index_buffer_offset = buffer_offset; | ||||
|         offseted_buffer += index_buffer_size; | ||||
|         buffer_offset += index_buffer_size; | ||||
|         std::tie(buffer_ptr, buffer_offset, index_buffer_offset) = UploadMemory( | ||||
|             buffer_ptr, buffer_offset, regs.index_array.StartAddress(), index_buffer_size); | ||||
|     } | ||||
| 
 | ||||
|     offseted_buffer = | ||||
|         reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4)); | ||||
|     buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4); | ||||
|     std::tie(buffer_ptr, buffer_offset) = SetupShaders(buffer_ptr, buffer_offset); | ||||
| 
 | ||||
|     SetupShaders(offseted_buffer, buffer_offset); | ||||
| 
 | ||||
|     stream_buffer->Unmap(); | ||||
|     stream_buffer.Unmap(buffer_ptr - buffer_ptr_base); | ||||
| 
 | ||||
|     shader_program_manager->ApplyTo(state); | ||||
|     state.Apply(); | ||||
| @ -647,36 +627,23 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint program, | ||||
|                                         u32 current_bindpoint, | ||||
|                                         const std::vector<GLShader::ConstBufferEntry>& entries) { | ||||
| std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers( | ||||
|     u8* buffer_ptr, GLintptr buffer_offset, Maxwell::ShaderStage stage, GLuint program, | ||||
|     u32 current_bindpoint, const std::vector<GLShader::ConstBufferEntry>& entries) { | ||||
|     const auto& gpu = Core::System::GetInstance().GPU(); | ||||
|     const auto& maxwell3d = gpu.Maxwell3D(); | ||||
| 
 | ||||
|     // Reset all buffer draw state for this stage.
 | ||||
|     for (auto& buffer : state.draw.const_buffers[static_cast<size_t>(stage)]) { | ||||
|         buffer.bindpoint = 0; | ||||
|         buffer.enabled = false; | ||||
|     } | ||||
| 
 | ||||
|     // Upload only the enabled buffers from the 16 constbuffers of each shader stage
 | ||||
|     const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)]; | ||||
| 
 | ||||
|     for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | ||||
|         const auto& used_buffer = entries[bindpoint]; | ||||
|         const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()]; | ||||
|         auto& buffer_draw_state = | ||||
|             state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()]; | ||||
| 
 | ||||
|         if (!buffer.enabled) { | ||||
|             continue; | ||||
|         } | ||||
| 
 | ||||
|         buffer_draw_state.enabled = true; | ||||
|         buffer_draw_state.bindpoint = current_bindpoint + bindpoint; | ||||
| 
 | ||||
|         boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address); | ||||
| 
 | ||||
|         size_t size = 0; | ||||
| 
 | ||||
|         if (used_buffer.IsIndirect()) { | ||||
| @ -698,25 +665,26 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr | ||||
|         size = Common::AlignUp(size, sizeof(GLvec4)); | ||||
|         ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); | ||||
| 
 | ||||
|         std::vector<u8> data(size); | ||||
|         Memory::ReadBlock(*addr, data.data(), data.size()); | ||||
|         GLintptr const_buffer_offset; | ||||
|         std::tie(buffer_ptr, buffer_offset, const_buffer_offset) = | ||||
|             UploadMemory(buffer_ptr, buffer_offset, buffer.address, size, | ||||
|                          static_cast<size_t>(uniform_buffer_alignment)); | ||||
| 
 | ||||
|         glBindBuffer(GL_UNIFORM_BUFFER, buffer_draw_state.ssbo); | ||||
|         glBufferData(GL_UNIFORM_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW); | ||||
|         glBindBuffer(GL_UNIFORM_BUFFER, 0); | ||||
|         glBindBufferRange(GL_UNIFORM_BUFFER, current_bindpoint + bindpoint, | ||||
|                           stream_buffer.GetHandle(), const_buffer_offset, size); | ||||
| 
 | ||||
|         // Now configure the bindpoint of the buffer inside the shader
 | ||||
|         const std::string buffer_name = used_buffer.GetName(); | ||||
|         const GLuint index = | ||||
|             glGetProgramResourceIndex(program, GL_UNIFORM_BLOCK, buffer_name.c_str()); | ||||
|         if (index != GL_INVALID_INDEX) { | ||||
|             glUniformBlockBinding(program, index, buffer_draw_state.bindpoint); | ||||
|             glUniformBlockBinding(program, index, current_bindpoint + bindpoint); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     state.Apply(); | ||||
| 
 | ||||
|     return current_bindpoint + static_cast<u32>(entries.size()); | ||||
|     return {buffer_ptr, buffer_offset, current_bindpoint + static_cast<u32>(entries.size())}; | ||||
| } | ||||
| 
 | ||||
| u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit, | ||||
|  | ||||
| @ -7,6 +7,7 @@ | ||||
| #include <array> | ||||
| #include <cstddef> | ||||
| #include <memory> | ||||
| #include <tuple> | ||||
| #include <utility> | ||||
| #include <vector> | ||||
| #include <glad/glad.h> | ||||
| @ -100,9 +101,10 @@ private: | ||||
|      * @param entries Vector describing the buffers that are actually used in the guest shader. | ||||
|      * @returns The next available bindpoint for use in the next shader stage. | ||||
|      */ | ||||
|     u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, GLuint program, | ||||
|                           u32 current_bindpoint, | ||||
|                           const std::vector<GLShader::ConstBufferEntry>& entries); | ||||
|     std::tuple<u8*, GLintptr, u32> SetupConstBuffers( | ||||
|         u8* buffer_ptr, GLintptr buffer_offset, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | ||||
|         GLuint program, u32 current_bindpoint, | ||||
|         const std::vector<GLShader::ConstBufferEntry>& entries); | ||||
| 
 | ||||
|     /*
 | ||||
|      * Configures the current textures to use for the draw command. | ||||
| @ -139,7 +141,6 @@ private: | ||||
|     /// Syncs the blend state to match the guest state
 | ||||
|     void SyncBlendState(); | ||||
| 
 | ||||
|     bool has_ARB_buffer_storage = false; | ||||
|     bool has_ARB_direct_state_access = false; | ||||
|     bool has_ARB_separate_shader_objects = false; | ||||
|     bool has_ARB_vertex_attrib_binding = false; | ||||
| @ -155,22 +156,24 @@ private: | ||||
|     OGLVertexArray hw_vao; | ||||
| 
 | ||||
|     std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers; | ||||
|     std::array<std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers>, | ||||
|                Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> | ||||
|         ssbos; | ||||
| 
 | ||||
|     static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | ||||
|     std::unique_ptr<OGLStreamBuffer> stream_buffer; | ||||
|     OGLStreamBuffer stream_buffer; | ||||
|     OGLBuffer uniform_buffer; | ||||
|     OGLFramebuffer framebuffer; | ||||
|     GLint uniform_buffer_alignment; | ||||
| 
 | ||||
|     size_t CalculateVertexArraysSize() const; | ||||
| 
 | ||||
|     std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset); | ||||
| 
 | ||||
|     std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers; | ||||
|     std::pair<u8*, GLintptr> SetupShaders(u8* buffer_ptr, GLintptr buffer_offset); | ||||
| 
 | ||||
|     void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset); | ||||
|     std::pair<u8*, GLintptr> AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset, size_t alignment); | ||||
| 
 | ||||
|     std::tuple<u8*, GLintptr, GLintptr> UploadMemory(u8* buffer_ptr, GLintptr buffer_offset, | ||||
|                                                      Tegra::GPUVAddr gpu_addr, size_t size, | ||||
|                                                      size_t alignment = 4); | ||||
| 
 | ||||
|     enum class AccelDraw { Disabled, Arrays, Indexed }; | ||||
|     AccelDraw accelerate_draw = AccelDraw::Disabled; | ||||
|  | ||||
| @ -203,21 +203,6 @@ void OpenGLState::Apply() const { | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     // Constbuffers
 | ||||
|     for (std::size_t stage = 0; stage < draw.const_buffers.size(); ++stage) { | ||||
|         for (std::size_t buffer_id = 0; buffer_id < draw.const_buffers[stage].size(); ++buffer_id) { | ||||
|             const auto& current = cur_state.draw.const_buffers[stage][buffer_id]; | ||||
|             const auto& new_state = draw.const_buffers[stage][buffer_id]; | ||||
| 
 | ||||
|             if (current.enabled != new_state.enabled || current.bindpoint != new_state.bindpoint || | ||||
|                 current.ssbo != new_state.ssbo) { | ||||
|                 if (new_state.enabled) { | ||||
|                     glBindBufferBase(GL_UNIFORM_BUFFER, new_state.bindpoint, new_state.ssbo); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     // Framebuffer
 | ||||
|     if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { | ||||
|         glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); | ||||
|  | ||||
| @ -119,12 +119,6 @@ public: | ||||
|         GLuint uniform_buffer;   // GL_UNIFORM_BUFFER_BINDING
 | ||||
|         GLuint shader_program;   // GL_CURRENT_PROGRAM
 | ||||
|         GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING
 | ||||
|         struct ConstBufferConfig { | ||||
|             bool enabled = false; | ||||
|             GLuint bindpoint; | ||||
|             GLuint ssbo; | ||||
|         }; | ||||
|         std::array<std::array<ConstBufferConfig, Regs::MaxConstBuffers>, 5> const_buffers; | ||||
|     } draw; | ||||
| 
 | ||||
|     struct { | ||||
|  | ||||
| @ -9,174 +9,91 @@ | ||||
| #include "video_core/renderer_opengl/gl_state.h" | ||||
| #include "video_core/renderer_opengl/gl_stream_buffer.h" | ||||
| 
 | ||||
| class OrphanBuffer : public OGLStreamBuffer { | ||||
| public: | ||||
|     explicit OrphanBuffer(GLenum target) : OGLStreamBuffer(target) {} | ||||
|     ~OrphanBuffer() override; | ||||
| OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent) | ||||
|     : gl_target(target), buffer_size(size) { | ||||
|     gl_buffer.Create(); | ||||
|     glBindBuffer(gl_target, gl_buffer.handle); | ||||
| 
 | ||||
| private: | ||||
|     void Create(size_t size, size_t sync_subdivide) override; | ||||
|     void Release() override; | ||||
|     GLsizeiptr allocate_size = size; | ||||
|     if (target == GL_ARRAY_BUFFER) { | ||||
|         // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
 | ||||
|         // read position is near the end and is an out-of-bound access to the vertex buffer. This is
 | ||||
|         // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
 | ||||
|         // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
 | ||||
|         // crash.
 | ||||
|         allocate_size *= 2; | ||||
|     } | ||||
| 
 | ||||
|     std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override; | ||||
|     void Unmap() override; | ||||
|     if (GLAD_GL_ARB_buffer_storage) { | ||||
|         persistent = true; | ||||
|         coherent = prefer_coherent; | ||||
|         GLbitfield flags = | ||||
|             GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); | ||||
|         glBufferStorage(gl_target, allocate_size, nullptr, flags); | ||||
|         mapped_ptr = static_cast<u8*>(glMapBufferRange( | ||||
|             gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); | ||||
|     } else { | ||||
|         glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|     std::vector<u8> data; | ||||
| }; | ||||
| 
 | ||||
| class StorageBuffer : public OGLStreamBuffer { | ||||
| public: | ||||
|     explicit StorageBuffer(GLenum target) : OGLStreamBuffer(target) {} | ||||
|     ~StorageBuffer() override; | ||||
| 
 | ||||
| private: | ||||
|     void Create(size_t size, size_t sync_subdivide) override; | ||||
|     void Release() override; | ||||
| 
 | ||||
|     std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override; | ||||
|     void Unmap() override; | ||||
| 
 | ||||
|     struct Fence { | ||||
|         OGLSync sync; | ||||
|         size_t offset; | ||||
|     }; | ||||
|     std::deque<Fence> head; | ||||
|     std::deque<Fence> tail; | ||||
| 
 | ||||
|     u8* mapped_ptr; | ||||
| }; | ||||
| 
 | ||||
| OGLStreamBuffer::OGLStreamBuffer(GLenum target) { | ||||
|     gl_target = target; | ||||
| OGLStreamBuffer::~OGLStreamBuffer() { | ||||
|     if (persistent) { | ||||
|         glBindBuffer(gl_target, gl_buffer.handle); | ||||
|         glUnmapBuffer(gl_target); | ||||
|     } | ||||
|     gl_buffer.Release(); | ||||
| } | ||||
| 
 | ||||
| GLuint OGLStreamBuffer::GetHandle() const { | ||||
|     return gl_buffer.handle; | ||||
| } | ||||
| 
 | ||||
| std::unique_ptr<OGLStreamBuffer> OGLStreamBuffer::MakeBuffer(bool storage_buffer, GLenum target) { | ||||
|     if (storage_buffer) { | ||||
|         return std::make_unique<StorageBuffer>(target); | ||||
|     } | ||||
|     return std::make_unique<OrphanBuffer>(target); | ||||
| GLsizeiptr OGLStreamBuffer::GetSize() const { | ||||
|     return buffer_size; | ||||
| } | ||||
| 
 | ||||
| OrphanBuffer::~OrphanBuffer() { | ||||
|     Release(); | ||||
| } | ||||
| 
 | ||||
| void OrphanBuffer::Create(size_t size, size_t /*sync_subdivide*/) { | ||||
|     buffer_pos = 0; | ||||
|     buffer_size = size; | ||||
|     data.resize(buffer_size); | ||||
| 
 | ||||
|     if (gl_buffer.handle == 0) { | ||||
|         gl_buffer.Create(); | ||||
|         glBindBuffer(gl_target, gl_buffer.handle); | ||||
|     } | ||||
| 
 | ||||
|     glBufferData(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, GL_STREAM_DRAW); | ||||
| } | ||||
| 
 | ||||
| void OrphanBuffer::Release() { | ||||
|     gl_buffer.Release(); | ||||
| } | ||||
| 
 | ||||
| std::pair<u8*, GLintptr> OrphanBuffer::Map(size_t size, size_t alignment) { | ||||
|     buffer_pos = Common::AlignUp(buffer_pos, alignment); | ||||
| 
 | ||||
|     if (buffer_pos + size > buffer_size) { | ||||
|         Create(std::max(buffer_size, size), 0); | ||||
|     } | ||||
| 
 | ||||
|     mapped_size = size; | ||||
|     return std::make_pair(&data[buffer_pos], static_cast<GLintptr>(buffer_pos)); | ||||
| } | ||||
| 
 | ||||
| void OrphanBuffer::Unmap() { | ||||
|     glBufferSubData(gl_target, static_cast<GLintptr>(buffer_pos), | ||||
|                     static_cast<GLsizeiptr>(mapped_size), &data[buffer_pos]); | ||||
|     buffer_pos += mapped_size; | ||||
| } | ||||
| 
 | ||||
| StorageBuffer::~StorageBuffer() { | ||||
|     Release(); | ||||
| } | ||||
| 
 | ||||
| void StorageBuffer::Create(size_t size, size_t sync_subdivide) { | ||||
|     if (gl_buffer.handle != 0) | ||||
|         return; | ||||
| 
 | ||||
|     buffer_pos = 0; | ||||
|     buffer_size = size; | ||||
|     buffer_sync_subdivide = std::max<size_t>(sync_subdivide, 1); | ||||
| 
 | ||||
|     gl_buffer.Create(); | ||||
|     glBindBuffer(gl_target, gl_buffer.handle); | ||||
| 
 | ||||
|     glBufferStorage(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, | ||||
|                     GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT); | ||||
|     mapped_ptr = reinterpret_cast<u8*>( | ||||
|         glMapBufferRange(gl_target, 0, static_cast<GLsizeiptr>(buffer_size), | ||||
|                          GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT)); | ||||
| } | ||||
| 
 | ||||
| void StorageBuffer::Release() { | ||||
|     if (gl_buffer.handle == 0) | ||||
|         return; | ||||
| 
 | ||||
|     glUnmapBuffer(gl_target); | ||||
| 
 | ||||
|     gl_buffer.Release(); | ||||
|     head.clear(); | ||||
|     tail.clear(); | ||||
| } | ||||
| 
 | ||||
| std::pair<u8*, GLintptr> StorageBuffer::Map(size_t size, size_t alignment) { | ||||
| std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { | ||||
|     ASSERT(size <= buffer_size); | ||||
| 
 | ||||
|     OGLSync sync; | ||||
| 
 | ||||
|     buffer_pos = Common::AlignUp(buffer_pos, alignment); | ||||
|     size_t effective_offset = Common::AlignDown(buffer_pos, buffer_sync_subdivide); | ||||
| 
 | ||||
|     if (!head.empty() && | ||||
|         (effective_offset > head.back().offset || buffer_pos + size > buffer_size)) { | ||||
|         ASSERT(head.back().sync.handle == 0); | ||||
|         head.back().sync.Create(); | ||||
|     } | ||||
| 
 | ||||
|     if (buffer_pos + size > buffer_size) { | ||||
|         if (!tail.empty()) { | ||||
|             std::swap(sync, tail.back().sync); | ||||
|             tail.clear(); | ||||
|         } | ||||
|         std::swap(tail, head); | ||||
|         buffer_pos = 0; | ||||
|         effective_offset = 0; | ||||
|     } | ||||
| 
 | ||||
|     while (!tail.empty() && buffer_pos + size > tail.front().offset) { | ||||
|         std::swap(sync, tail.front().sync); | ||||
|         tail.pop_front(); | ||||
|     } | ||||
| 
 | ||||
|     if (sync.handle != 0) { | ||||
|         glClientWaitSync(sync.handle, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); | ||||
|         sync.Release(); | ||||
|     } | ||||
| 
 | ||||
|     if (head.empty() || effective_offset > head.back().offset) { | ||||
|         head.emplace_back(); | ||||
|         head.back().offset = effective_offset; | ||||
|     } | ||||
| 
 | ||||
|     ASSERT(alignment <= buffer_size); | ||||
|     mapped_size = size; | ||||
|     return std::make_pair(&mapped_ptr[buffer_pos], static_cast<GLintptr>(buffer_pos)); | ||||
| 
 | ||||
|     if (alignment > 0) { | ||||
|         buffer_pos = Common::AlignUp<size_t>(buffer_pos, alignment); | ||||
|     } | ||||
| 
 | ||||
|     bool invalidate = false; | ||||
|     if (buffer_pos + size > buffer_size) { | ||||
|         buffer_pos = 0; | ||||
|         invalidate = true; | ||||
| 
 | ||||
|         if (persistent) { | ||||
|             glUnmapBuffer(gl_target); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (invalidate | !persistent) { | ||||
|         GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) | | ||||
|                            (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) | | ||||
|                            (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT); | ||||
|         mapped_ptr = static_cast<u8*>( | ||||
|             glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags)); | ||||
|         mapped_offset = buffer_pos; | ||||
|     } | ||||
| 
 | ||||
|     return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate); | ||||
| } | ||||
| 
 | ||||
| void StorageBuffer::Unmap() { | ||||
|     glFlushMappedBufferRange(gl_target, static_cast<GLintptr>(buffer_pos), | ||||
|                              static_cast<GLsizeiptr>(mapped_size)); | ||||
|     buffer_pos += mapped_size; | ||||
| void OGLStreamBuffer::Unmap(GLsizeiptr size) { | ||||
|     ASSERT(size <= mapped_size); | ||||
| 
 | ||||
|     if (!coherent && size > 0) { | ||||
|         glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size); | ||||
|     } | ||||
| 
 | ||||
|     if (!persistent) { | ||||
|         glUnmapBuffer(gl_target); | ||||
|     } | ||||
| 
 | ||||
|     buffer_pos += size; | ||||
| } | ||||
|  | ||||
| @ -2,35 +2,41 @@ | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <memory> | ||||
| #include <tuple> | ||||
| #include <glad/glad.h> | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
| 
 | ||||
| class OGLStreamBuffer : private NonCopyable { | ||||
| public: | ||||
|     explicit OGLStreamBuffer(GLenum target); | ||||
|     virtual ~OGLStreamBuffer() = default; | ||||
| 
 | ||||
| public: | ||||
|     static std::unique_ptr<OGLStreamBuffer> MakeBuffer(bool storage_buffer, GLenum target); | ||||
| 
 | ||||
|     virtual void Create(size_t size, size_t sync_subdivide) = 0; | ||||
|     virtual void Release() {} | ||||
|     explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent = false); | ||||
|     ~OGLStreamBuffer(); | ||||
| 
 | ||||
|     GLuint GetHandle() const; | ||||
|     GLsizeiptr GetSize() const; | ||||
| 
 | ||||
|     virtual std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) = 0; | ||||
|     virtual void Unmap() = 0; | ||||
|     /*
 | ||||
|      * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes | ||||
|      * and the optional alignment requirement. | ||||
|      * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. | ||||
|      * The return values are the pointer to the new chunk, the offset within the buffer, | ||||
|      * and the invalidation flag for previous chunks. | ||||
|      * The actual used size must be specified on unmapping the chunk. | ||||
|      */ | ||||
|     std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0); | ||||
| 
 | ||||
| protected: | ||||
|     void Unmap(GLsizeiptr size); | ||||
| 
 | ||||
| private: | ||||
|     OGLBuffer gl_buffer; | ||||
|     GLenum gl_target; | ||||
| 
 | ||||
|     size_t buffer_pos = 0; | ||||
|     size_t buffer_size = 0; | ||||
|     size_t buffer_sync_subdivide = 0; | ||||
|     size_t mapped_size = 0; | ||||
|     bool coherent = false; | ||||
|     bool persistent = false; | ||||
| 
 | ||||
|     GLintptr buffer_pos = 0; | ||||
|     GLsizeiptr buffer_size = 0; | ||||
|     GLintptr mapped_offset = 0; | ||||
|     GLsizeiptr mapped_size = 0; | ||||
|     u8* mapped_ptr = nullptr; | ||||
| }; | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 bunnei
						bunnei