mirror of
				https://git.zaroz.cloud/nintendo-back-up/yuzu/yuzu.git
				synced 2025-05-12 00:45:25 +00:00 
			
		
		
		
	Merge pull request #1730 from hrydgard/vertex-loader
* Remove late accesses to attribute_config * Refactor: Extract VertexLoader from command_processor.cpp. Preparation for a similar concept to Dolphin or PPSSPP. These can be JIT-ed and cached. * Move "&" to their proper place, add missing includes and make some properly relative. * Don't keep base_address in the loader, it doesn't belong there (with it, the loader can't be cached). * Optimize the vertex loader, nearly doubling its speed. * Debugger fix * Move and rename the MemoryAccesses class to MemoryAccessTracker.
This commit is contained in:
		
						commit
						90243c56fb
					
				@ -16,6 +16,7 @@ set(SRCS
 | 
				
			|||||||
            shader/shader_interpreter.cpp
 | 
					            shader/shader_interpreter.cpp
 | 
				
			||||||
            swrasterizer.cpp
 | 
					            swrasterizer.cpp
 | 
				
			||||||
            utils.cpp
 | 
					            utils.cpp
 | 
				
			||||||
 | 
					            vertex_loader.cpp
 | 
				
			||||||
            video_core.cpp
 | 
					            video_core.cpp
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -43,6 +44,7 @@ set(HEADERS
 | 
				
			|||||||
            shader/shader_interpreter.h
 | 
					            shader/shader_interpreter.h
 | 
				
			||||||
            swrasterizer.h
 | 
					            swrasterizer.h
 | 
				
			||||||
            utils.h
 | 
					            utils.h
 | 
				
			||||||
 | 
					            vertex_loader.h
 | 
				
			||||||
            video_core.h
 | 
					            video_core.h
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -21,6 +21,7 @@
 | 
				
			|||||||
#include "video_core/video_core.h"
 | 
					#include "video_core/video_core.h"
 | 
				
			||||||
#include "video_core/debug_utils/debug_utils.h"
 | 
					#include "video_core/debug_utils/debug_utils.h"
 | 
				
			||||||
#include "video_core/shader/shader_interpreter.h"
 | 
					#include "video_core/shader/shader_interpreter.h"
 | 
				
			||||||
 | 
					#include "video_core/vertex_loader.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace Pica {
 | 
					namespace Pica {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -188,54 +189,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
 | 
				
			|||||||
#if PICA_LOG_TEV
 | 
					#if PICA_LOG_TEV
 | 
				
			||||||
            DebugUtils::DumpTevStageConfig(regs.GetTevStages());
 | 
					            DebugUtils::DumpTevStageConfig(regs.GetTevStages());
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					 | 
				
			||||||
            if (g_debug_context)
 | 
					            if (g_debug_context)
 | 
				
			||||||
                g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
 | 
					                g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            const auto& attribute_config = regs.vertex_attributes;
 | 
					            // Processes information about internal vertex attributes to figure out how a vertex is loaded.
 | 
				
			||||||
            const u32 base_address = attribute_config.GetPhysicalBaseAddress();
 | 
					            // Later, these can be compiled and cached.
 | 
				
			||||||
 | 
					            VertexLoader loader;
 | 
				
			||||||
            // Information about internal vertex attributes
 | 
					            const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress();
 | 
				
			||||||
            u32 vertex_attribute_sources[16];
 | 
					            loader.Setup(regs);
 | 
				
			||||||
            boost::fill(vertex_attribute_sources, 0xdeadbeef);
 | 
					 | 
				
			||||||
            u32 vertex_attribute_strides[16] = {};
 | 
					 | 
				
			||||||
            Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            u32 vertex_attribute_elements[16] = {};
 | 
					 | 
				
			||||||
            u32 vertex_attribute_element_size[16] = {};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            // Setup attribute data from loaders
 | 
					 | 
				
			||||||
            for (int loader = 0; loader < 12; ++loader) {
 | 
					 | 
				
			||||||
                const auto& loader_config = attribute_config.attribute_loaders[loader];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                u32 offset = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                // TODO: What happens if a loader overwrites a previous one's data?
 | 
					 | 
				
			||||||
                for (unsigned component = 0; component < loader_config.component_count; ++component) {
 | 
					 | 
				
			||||||
                    if (component >= 12) {
 | 
					 | 
				
			||||||
                        LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
 | 
					 | 
				
			||||||
                        continue;
 | 
					 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    u32 attribute_index = loader_config.GetComponent(component);
 | 
					 | 
				
			||||||
                    if (attribute_index < 12) {
 | 
					 | 
				
			||||||
                        int element_size = attribute_config.GetElementSizeInBytes(attribute_index);
 | 
					 | 
				
			||||||
                        offset = Common::AlignUp(offset, element_size);
 | 
					 | 
				
			||||||
                        vertex_attribute_sources[attribute_index] = base_address + loader_config.data_offset + offset;
 | 
					 | 
				
			||||||
                        vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
 | 
					 | 
				
			||||||
                        vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
 | 
					 | 
				
			||||||
                        vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
 | 
					 | 
				
			||||||
                        vertex_attribute_element_size[attribute_index] = element_size;
 | 
					 | 
				
			||||||
                        offset += attribute_config.GetStride(attribute_index);
 | 
					 | 
				
			||||||
                    } else if (attribute_index < 16) {
 | 
					 | 
				
			||||||
                        // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
 | 
					 | 
				
			||||||
                        offset = Common::AlignUp(offset, 4);
 | 
					 | 
				
			||||||
                        offset += (attribute_index - 11) * 4;
 | 
					 | 
				
			||||||
                    } else {
 | 
					 | 
				
			||||||
                        UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
 | 
					 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
            // Load vertices
 | 
					            // Load vertices
 | 
				
			||||||
            bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed));
 | 
					            bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed));
 | 
				
			||||||
@ -259,32 +220,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
 | 
				
			|||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            class {
 | 
					            DebugUtils::MemoryAccessTracker memory_accesses;
 | 
				
			||||||
                /// Combine overlapping and close ranges
 | 
					 | 
				
			||||||
                void SimplifyRanges() {
 | 
					 | 
				
			||||||
                    for (auto it = ranges.begin(); it != ranges.end(); ++it) {
 | 
					 | 
				
			||||||
                        // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
 | 
					 | 
				
			||||||
                        auto it2 = std::next(it);
 | 
					 | 
				
			||||||
                        while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
 | 
					 | 
				
			||||||
                            it->second = std::max(it->second, it2->first + it2->second - it->first);
 | 
					 | 
				
			||||||
                            it2 = ranges.erase(it2);
 | 
					 | 
				
			||||||
                        }
 | 
					 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            public:
 | 
					 | 
				
			||||||
                /// Record a particular memory access in the list
 | 
					 | 
				
			||||||
                void AddAccess(u32 paddr, u32 size) {
 | 
					 | 
				
			||||||
                    // Create new range or extend existing one
 | 
					 | 
				
			||||||
                    ranges[paddr] = std::max(ranges[paddr], size);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    // Simplify ranges...
 | 
					 | 
				
			||||||
                    SimplifyRanges();
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                /// Map of accessed ranges (mapping start address to range size)
 | 
					 | 
				
			||||||
                std::map<u32, u32> ranges;
 | 
					 | 
				
			||||||
            } memory_accesses;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
            // Simple circular-replacement vertex cache
 | 
					            // Simple circular-replacement vertex cache
 | 
				
			||||||
            // The size has been tuned for optimal balance between hit-rate and the cost of lookup
 | 
					            // The size has been tuned for optimal balance between hit-rate and the cost of lookup
 | 
				
			||||||
@ -328,60 +264,13 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
 | 
				
			|||||||
                if (!vertex_cache_hit) {
 | 
					                if (!vertex_cache_hit) {
 | 
				
			||||||
                    // Initialize data for the current vertex
 | 
					                    // Initialize data for the current vertex
 | 
				
			||||||
                    Shader::InputVertex input;
 | 
					                    Shader::InputVertex input;
 | 
				
			||||||
 | 
					                    loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
 | 
				
			||||||
                    for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
 | 
					 | 
				
			||||||
                        if (vertex_attribute_elements[i] != 0) {
 | 
					 | 
				
			||||||
                            // Default attribute values set if array elements have < 4 components. This
 | 
					 | 
				
			||||||
                            // is *not* carried over from the default attribute settings even if they're
 | 
					 | 
				
			||||||
                            // enabled for this attribute.
 | 
					 | 
				
			||||||
                            static const float24 zero = float24::FromFloat32(0.0f);
 | 
					 | 
				
			||||||
                            static const float24 one = float24::FromFloat32(1.0f);
 | 
					 | 
				
			||||||
                            input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                            // Load per-vertex data from the loader arrays
 | 
					 | 
				
			||||||
                            for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
 | 
					 | 
				
			||||||
                                u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
 | 
					 | 
				
			||||||
                                const u8* srcdata = Memory::GetPhysicalPointer(source_addr);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                                if (g_debug_context && Pica::g_debug_context->recorder) {
 | 
					 | 
				
			||||||
                                    memory_accesses.AddAccess(source_addr,
 | 
					 | 
				
			||||||
                                        (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
 | 
					 | 
				
			||||||
                                        : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
 | 
					 | 
				
			||||||
                                }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                                const float srcval =
 | 
					 | 
				
			||||||
                                    (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE)  ? *reinterpret_cast<const s8*>(srcdata) :
 | 
					 | 
				
			||||||
                                    (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast<const u8*>(srcdata) :
 | 
					 | 
				
			||||||
                                    (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast<const s16*>(srcdata) :
 | 
					 | 
				
			||||||
                                    *reinterpret_cast<const float*>(srcdata);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                                input.attr[i][comp] = float24::FromFloat32(srcval);
 | 
					 | 
				
			||||||
                                LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f",
 | 
					 | 
				
			||||||
                                    comp, i, vertex, index,
 | 
					 | 
				
			||||||
                                    attribute_config.GetPhysicalBaseAddress(),
 | 
					 | 
				
			||||||
                                    vertex_attribute_sources[i] - base_address,
 | 
					 | 
				
			||||||
                                    vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
 | 
					 | 
				
			||||||
                                    input.attr[i][comp].ToFloat32());
 | 
					 | 
				
			||||||
                            }
 | 
					 | 
				
			||||||
                        } else if (attribute_config.IsDefaultAttribute(i)) {
 | 
					 | 
				
			||||||
                            // Load the default attribute if we're configured to do so
 | 
					 | 
				
			||||||
                            input.attr[i] = g_state.vs.default_attributes[i];
 | 
					 | 
				
			||||||
                            LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
 | 
					 | 
				
			||||||
                                      i, vertex, index,
 | 
					 | 
				
			||||||
                                      input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
 | 
					 | 
				
			||||||
                                      input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
 | 
					 | 
				
			||||||
                        } else {
 | 
					 | 
				
			||||||
                            // TODO(yuriks): In this case, no data gets loaded and the vertex
 | 
					 | 
				
			||||||
                            // remains with the last value it had. This isn't currently maintained
 | 
					 | 
				
			||||||
                            // as global state, however, and so won't work in Citra yet.
 | 
					 | 
				
			||||||
                        }
 | 
					 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    if (g_debug_context)
 | 
					                    if (g_debug_context)
 | 
				
			||||||
                        g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
 | 
					                        g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    // Send to vertex shader
 | 
					                    // Send to vertex shader
 | 
				
			||||||
                    output = Shader::Run(shader_unit, input, attribute_config.GetNumTotalAttributes());
 | 
					                    output = Shader::Run(shader_unit, input, loader.GetNumTotalAttributes());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    if (is_indexed) {
 | 
					                    if (is_indexed) {
 | 
				
			||||||
                        vertex_cache[vertex_cache_pos] = output;
 | 
					                        vertex_cache[vertex_cache_pos] = output;
 | 
				
			||||||
 | 
				
			|||||||
@ -216,6 +216,36 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data);
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages);
 | 
					void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/**
 | 
				
			||||||
 | 
					 * Used in the vertex loader to merge access records. TODO: Investigate if actually useful.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					class MemoryAccessTracker {
 | 
				
			||||||
 | 
					    /// Combine overlapping and close ranges
 | 
				
			||||||
 | 
					    void SimplifyRanges() {
 | 
				
			||||||
 | 
					        for (auto it = ranges.begin(); it != ranges.end(); ++it) {
 | 
				
			||||||
 | 
					            // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
 | 
				
			||||||
 | 
					            auto it2 = std::next(it);
 | 
				
			||||||
 | 
					            while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
 | 
				
			||||||
 | 
					                it->second = std::max(it->second, it2->first + it2->second - it->first);
 | 
				
			||||||
 | 
					                it2 = ranges.erase(it2);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					    /// Record a particular memory access in the list
 | 
				
			||||||
 | 
					    void AddAccess(u32 paddr, u32 size) {
 | 
				
			||||||
 | 
					        // Create new range or extend existing one
 | 
				
			||||||
 | 
					        ranges[paddr] = std::max(ranges[paddr], size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Simplify ranges...
 | 
				
			||||||
 | 
					        SimplifyRanges();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Map of accessed ranges (mapping start address to range size)
 | 
				
			||||||
 | 
					    std::map<u32, u32> ranges;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
} // namespace
 | 
					} // namespace
 | 
				
			||||||
 | 
					
 | 
				
			||||||
} // namespace
 | 
					} // namespace
 | 
				
			||||||
 | 
				
			|||||||
@ -25,7 +25,7 @@ namespace Pica {
 | 
				
			|||||||
namespace Shader {
 | 
					namespace Shader {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct InputVertex {
 | 
					struct InputVertex {
 | 
				
			||||||
    Math::Vec4<float24> attr[16];
 | 
					    alignas(16) Math::Vec4<float24> attr[16];
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct OutputVertex {
 | 
					struct OutputVertex {
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										140
									
								
								src/video_core/vertex_loader.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										140
									
								
								src/video_core/vertex_loader.cpp
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,140 @@
 | 
				
			|||||||
 | 
					#include <cmath>
 | 
				
			||||||
 | 
					#include <string>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "boost/range/algorithm/fill.hpp"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "common/assert.h"
 | 
				
			||||||
 | 
					#include "common/alignment.h"
 | 
				
			||||||
 | 
					#include "common/bit_field.h"
 | 
				
			||||||
 | 
					#include "common/common_funcs.h"
 | 
				
			||||||
 | 
					#include "common/common_types.h"
 | 
				
			||||||
 | 
					#include "common/logging/log.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "core/memory.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "video_core/debug_utils/debug_utils.h"
 | 
				
			||||||
 | 
					#include "video_core/pica.h"
 | 
				
			||||||
 | 
					#include "video_core/pica_state.h"
 | 
				
			||||||
 | 
					#include "video_core/pica_types.h"
 | 
				
			||||||
 | 
					#include "video_core/vertex_loader.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace Pica {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void VertexLoader::Setup(const Pica::Regs& regs) {
 | 
				
			||||||
 | 
					    const auto& attribute_config = regs.vertex_attributes;
 | 
				
			||||||
 | 
					    num_total_attributes = attribute_config.GetNumTotalAttributes();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    boost::fill(vertex_attribute_sources, 0xdeadbeef);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (int i = 0; i < 16; i++) {
 | 
				
			||||||
 | 
					        vertex_attribute_is_default[i] = attribute_config.IsDefaultAttribute(i);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Setup attribute data from loaders
 | 
				
			||||||
 | 
					    for (int loader = 0; loader < 12; ++loader) {
 | 
				
			||||||
 | 
					        const auto& loader_config = attribute_config.attribute_loaders[loader];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        u32 offset = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // TODO: What happens if a loader overwrites a previous one's data?
 | 
				
			||||||
 | 
					        for (unsigned component = 0; component < loader_config.component_count; ++component) {
 | 
				
			||||||
 | 
					            if (component >= 12) {
 | 
				
			||||||
 | 
					                LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
 | 
				
			||||||
 | 
					                continue;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            u32 attribute_index = loader_config.GetComponent(component);
 | 
				
			||||||
 | 
					            if (attribute_index < 12) {
 | 
				
			||||||
 | 
					                offset = Common::AlignUp(offset, attribute_config.GetElementSizeInBytes(attribute_index));
 | 
				
			||||||
 | 
					                vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset;
 | 
				
			||||||
 | 
					                vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
 | 
				
			||||||
 | 
					                vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
 | 
				
			||||||
 | 
					                vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
 | 
				
			||||||
 | 
					                offset += attribute_config.GetStride(attribute_index);
 | 
				
			||||||
 | 
					            } else if (attribute_index < 16) {
 | 
				
			||||||
 | 
					                // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
 | 
				
			||||||
 | 
					                offset = Common::AlignUp(offset, 4);
 | 
				
			||||||
 | 
					                offset += (attribute_index - 11) * 4;
 | 
				
			||||||
 | 
					            } else {
 | 
				
			||||||
 | 
					                UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses) {
 | 
				
			||||||
 | 
					    for (int i = 0; i < num_total_attributes; ++i) {
 | 
				
			||||||
 | 
					        if (vertex_attribute_elements[i] != 0) {
 | 
				
			||||||
 | 
					            // Load per-vertex data from the loader arrays
 | 
				
			||||||
 | 
					            u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if (g_debug_context && Pica::g_debug_context->recorder) {
 | 
				
			||||||
 | 
					                memory_accesses.AddAccess(source_addr, vertex_attribute_elements[i] * (
 | 
				
			||||||
 | 
					                    (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
 | 
				
			||||||
 | 
					                    : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1));
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            switch (vertex_attribute_formats[i]) {
 | 
				
			||||||
 | 
					            case Regs::VertexAttributeFormat::BYTE:
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                const s8* srcdata = reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr));
 | 
				
			||||||
 | 
					                for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
 | 
				
			||||||
 | 
					                    input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                break;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            case Regs::VertexAttributeFormat::UBYTE:
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                const u8* srcdata = reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr));
 | 
				
			||||||
 | 
					                for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
 | 
				
			||||||
 | 
					                    input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                break;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            case Regs::VertexAttributeFormat::SHORT:
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                const s16* srcdata = reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr));
 | 
				
			||||||
 | 
					                for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
 | 
				
			||||||
 | 
					                    input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                break;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            case Regs::VertexAttributeFormat::FLOAT:
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                const float* srcdata = reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr));
 | 
				
			||||||
 | 
					                for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
 | 
				
			||||||
 | 
					                    input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                break;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            // Default attribute values set if array elements have < 4 components. This
 | 
				
			||||||
 | 
					            // is *not* carried over from the default attribute settings even if they're
 | 
				
			||||||
 | 
					            // enabled for this attribute.
 | 
				
			||||||
 | 
					            for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) {
 | 
				
			||||||
 | 
					                input.attr[i][comp] = comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f %f %f %f",
 | 
				
			||||||
 | 
					                vertex_attribute_elements[i], i, vertex, index,
 | 
				
			||||||
 | 
					                base_address,
 | 
				
			||||||
 | 
					                vertex_attribute_sources[i],
 | 
				
			||||||
 | 
					                vertex_attribute_strides[i] * vertex,
 | 
				
			||||||
 | 
					                input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
 | 
				
			||||||
 | 
					        } else if (vertex_attribute_is_default[i]) {
 | 
				
			||||||
 | 
					            // Load the default attribute if we're configured to do so
 | 
				
			||||||
 | 
					            input.attr[i] = g_state.vs.default_attributes[i];
 | 
				
			||||||
 | 
					            LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
 | 
				
			||||||
 | 
					                i, vertex, index,
 | 
				
			||||||
 | 
					                input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
 | 
				
			||||||
 | 
					                input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            // TODO(yuriks): In this case, no data gets loaded and the vertex
 | 
				
			||||||
 | 
					            // remains with the last value it had. This isn't currently maintained
 | 
				
			||||||
 | 
					            // as global state, however, and so won't work in Citra yet.
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace Pica
 | 
				
			||||||
							
								
								
									
										28
									
								
								src/video_core/vertex_loader.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								src/video_core/vertex_loader.h
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,28 @@
 | 
				
			|||||||
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <iterator>
 | 
				
			||||||
 | 
					#include <algorithm>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "video_core/pica.h"
 | 
				
			||||||
 | 
					#include "video_core/shader/shader.h"
 | 
				
			||||||
 | 
					#include "video_core/debug_utils/debug_utils.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace Pica {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class VertexLoader {
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					    void Setup(const Pica::Regs& regs);
 | 
				
			||||||
 | 
					    void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int GetNumTotalAttributes() const { return num_total_attributes; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					private:
 | 
				
			||||||
 | 
					    u32 vertex_attribute_sources[16];
 | 
				
			||||||
 | 
					    u32 vertex_attribute_strides[16] = {};
 | 
				
			||||||
 | 
					    Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
 | 
				
			||||||
 | 
					    u32 vertex_attribute_elements[16] = {};
 | 
				
			||||||
 | 
					    bool vertex_attribute_is_default[16];
 | 
				
			||||||
 | 
					    int num_total_attributes;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace Pica
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user