mirror of
				https://git.zaroz.cloud/nintendo-back-up/yuzu/yuzu.git
				synced 2025-05-12 00:45:25 +00:00 
			
		
		
		
	Merge pull request #2476 from yuriks/shader-refactor3
Oh No! More shader changes!
This commit is contained in:
		
						commit
						97e06b0a0d
					
				@ -71,8 +71,8 @@ void GraphicsTracingWidget::StartRecording() {
 | 
				
			|||||||
    std::array<u32, 4 * 16> default_attributes;
 | 
					    std::array<u32, 4 * 16> default_attributes;
 | 
				
			||||||
    for (unsigned i = 0; i < 16; ++i) {
 | 
					    for (unsigned i = 0; i < 16; ++i) {
 | 
				
			||||||
        for (unsigned comp = 0; comp < 3; ++comp) {
 | 
					        for (unsigned comp = 0; comp < 3; ++comp) {
 | 
				
			||||||
            default_attributes[4 * i + comp] =
 | 
					            default_attributes[4 * i + comp] = nihstro::to_float24(
 | 
				
			||||||
                nihstro::to_float24(Pica::g_state.vs_default_attributes[i][comp].ToFloat32());
 | 
					                Pica::g_state.input_default_attributes.attr[i][comp].ToFloat32());
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -511,7 +511,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d
 | 
				
			|||||||
    auto& shader_config = Pica::g_state.regs.vs;
 | 
					    auto& shader_config = Pica::g_state.regs.vs;
 | 
				
			||||||
    for (auto instr : shader_setup.program_code)
 | 
					    for (auto instr : shader_setup.program_code)
 | 
				
			||||||
        info.code.push_back({instr});
 | 
					        info.code.push_back({instr});
 | 
				
			||||||
    int num_attributes = Pica::g_state.regs.vertex_attributes.GetNumTotalAttributes();
 | 
					    int num_attributes = shader_config.max_input_attribute_index + 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for (auto pattern : shader_setup.swizzle_data)
 | 
					    for (auto pattern : shader_setup.swizzle_data)
 | 
				
			||||||
        info.swizzle_info.push_back({pattern});
 | 
					        info.swizzle_info.push_back({pattern});
 | 
				
			||||||
@ -522,11 +522,11 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d
 | 
				
			|||||||
    // Generate debug information
 | 
					    // Generate debug information
 | 
				
			||||||
    Pica::Shader::InterpreterEngine shader_engine;
 | 
					    Pica::Shader::InterpreterEngine shader_engine;
 | 
				
			||||||
    shader_engine.SetupBatch(shader_setup, entry_point);
 | 
					    shader_engine.SetupBatch(shader_setup, entry_point);
 | 
				
			||||||
    debug_data = shader_engine.ProduceDebugInfo(shader_setup, input_vertex, num_attributes);
 | 
					    debug_data = shader_engine.ProduceDebugInfo(shader_setup, input_vertex, shader_config);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Reload widget state
 | 
					    // Reload widget state
 | 
				
			||||||
    for (int attr = 0; attr < num_attributes; ++attr) {
 | 
					    for (int attr = 0; attr < num_attributes; ++attr) {
 | 
				
			||||||
        unsigned source_attr = shader_config.input_register_map.GetRegisterForAttribute(attr);
 | 
					        unsigned source_attr = shader_config.GetRegisterForAttribute(attr);
 | 
				
			||||||
        input_data_mapping[attr]->setText(QString("-> v%1").arg(source_attr));
 | 
					        input_data_mapping[attr]->setText(QString("-> v%1").arg(source_attr));
 | 
				
			||||||
        input_data_container[attr]->setVisible(true);
 | 
					        input_data_container[attr]->setVisible(true);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
				
			|||||||
@ -82,7 +82,7 @@ private:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    nihstro::ShaderInfo info;
 | 
					    nihstro::ShaderInfo info;
 | 
				
			||||||
    Pica::Shader::DebugData<true> debug_data;
 | 
					    Pica::Shader::DebugData<true> debug_data;
 | 
				
			||||||
    Pica::Shader::InputVertex input_vertex;
 | 
					    Pica::Shader::AttributeBuffer input_vertex;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    friend class GraphicsVertexShaderModel;
 | 
					    friend class GraphicsVertexShaderModel;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
				
			|||||||
@ -121,22 +121,19 @@ public:
 | 
				
			|||||||
    class Iterator {
 | 
					    class Iterator {
 | 
				
			||||||
    public:
 | 
					    public:
 | 
				
			||||||
        Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {}
 | 
					        Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {}
 | 
				
			||||||
        Iterator(IntTy val, int bit) : m_val(val), m_bit(bit) {}
 | 
					        Iterator(IntTy val) : m_val(val), m_bit(0) {}
 | 
				
			||||||
        Iterator& operator=(Iterator other) {
 | 
					        Iterator& operator=(Iterator other) {
 | 
				
			||||||
            new (this) Iterator(other);
 | 
					            new (this) Iterator(other);
 | 
				
			||||||
            return *this;
 | 
					            return *this;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        int operator*() {
 | 
					        int operator*() {
 | 
				
			||||||
            return m_bit;
 | 
					            return m_bit + ComputeLsb();
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        Iterator& operator++() {
 | 
					        Iterator& operator++() {
 | 
				
			||||||
            if (m_val == 0) {
 | 
					            int lsb = ComputeLsb();
 | 
				
			||||||
                m_bit = -1;
 | 
					            m_val >>= lsb + 1;
 | 
				
			||||||
            } else {
 | 
					            m_bit += lsb + 1;
 | 
				
			||||||
                int bit = LeastSignificantSetBit(m_val);
 | 
					            m_has_lsb = false;
 | 
				
			||||||
                m_val &= ~(1 << bit);
 | 
					 | 
				
			||||||
                m_bit = bit;
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            return *this;
 | 
					            return *this;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        Iterator operator++(int _) {
 | 
					        Iterator operator++(int _) {
 | 
				
			||||||
@ -145,15 +142,24 @@ public:
 | 
				
			|||||||
            return other;
 | 
					            return other;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        bool operator==(Iterator other) const {
 | 
					        bool operator==(Iterator other) const {
 | 
				
			||||||
            return m_bit == other.m_bit;
 | 
					            return m_val == other.m_val;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        bool operator!=(Iterator other) const {
 | 
					        bool operator!=(Iterator other) const {
 | 
				
			||||||
            return m_bit != other.m_bit;
 | 
					            return m_val != other.m_val;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    private:
 | 
					    private:
 | 
				
			||||||
 | 
					        int ComputeLsb() {
 | 
				
			||||||
 | 
					            if (!m_has_lsb) {
 | 
				
			||||||
 | 
					                m_lsb = LeastSignificantSetBit(m_val);
 | 
				
			||||||
 | 
					                m_has_lsb = true;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            return m_lsb;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
        IntTy m_val;
 | 
					        IntTy m_val;
 | 
				
			||||||
        int m_bit;
 | 
					        int m_bit;
 | 
				
			||||||
 | 
					        int m_lsb = -1;
 | 
				
			||||||
 | 
					        bool m_has_lsb = false;
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    BitSet() : m_val(0) {}
 | 
					    BitSet() : m_val(0) {}
 | 
				
			||||||
@ -221,11 +227,10 @@ public:
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Iterator begin() const {
 | 
					    Iterator begin() const {
 | 
				
			||||||
        Iterator it(m_val, 0);
 | 
					        return Iterator(m_val);
 | 
				
			||||||
        return ++it;
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    Iterator end() const {
 | 
					    Iterator end() const {
 | 
				
			||||||
        return Iterator(m_val, -1);
 | 
					        return Iterator(0);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    IntTy m_val;
 | 
					    IntTy m_val;
 | 
				
			||||||
 | 
				
			|||||||
@ -18,6 +18,8 @@
 | 
				
			|||||||
#include "video_core/rasterizer.h"
 | 
					#include "video_core/rasterizer.h"
 | 
				
			||||||
#include "video_core/shader/shader.h"
 | 
					#include "video_core/shader/shader.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					using Pica::Rasterizer::Vertex;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace Pica {
 | 
					namespace Pica {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace Clipper {
 | 
					namespace Clipper {
 | 
				
			||||||
@ -29,20 +31,20 @@ public:
 | 
				
			|||||||
                                                 float24::FromFloat32(0), float24::FromFloat32(0)))
 | 
					                                                 float24::FromFloat32(0), float24::FromFloat32(0)))
 | 
				
			||||||
        : coeffs(coeffs), bias(bias) {}
 | 
					        : coeffs(coeffs), bias(bias) {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    bool IsInside(const OutputVertex& vertex) const {
 | 
					    bool IsInside(const Vertex& vertex) const {
 | 
				
			||||||
        return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0);
 | 
					        return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    bool IsOutSide(const OutputVertex& vertex) const {
 | 
					    bool IsOutSide(const Vertex& vertex) const {
 | 
				
			||||||
        return !IsInside(vertex);
 | 
					        return !IsInside(vertex);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const {
 | 
					    Vertex GetIntersection(const Vertex& v0, const Vertex& v1) const {
 | 
				
			||||||
        float24 dp = Math::Dot(v0.pos + bias, coeffs);
 | 
					        float24 dp = Math::Dot(v0.pos + bias, coeffs);
 | 
				
			||||||
        float24 dp_prev = Math::Dot(v1.pos + bias, coeffs);
 | 
					        float24 dp_prev = Math::Dot(v1.pos + bias, coeffs);
 | 
				
			||||||
        float24 factor = dp_prev / (dp_prev - dp);
 | 
					        float24 factor = dp_prev / (dp_prev - dp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return OutputVertex::Lerp(factor, v0, v1);
 | 
					        return Vertex::Lerp(factor, v0, v1);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
private:
 | 
					private:
 | 
				
			||||||
@ -51,7 +53,7 @@ private:
 | 
				
			|||||||
    Math::Vec4<float24> bias;
 | 
					    Math::Vec4<float24> bias;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void InitScreenCoordinates(OutputVertex& vtx) {
 | 
					static void InitScreenCoordinates(Vertex& vtx) {
 | 
				
			||||||
    struct {
 | 
					    struct {
 | 
				
			||||||
        float24 halfsize_x;
 | 
					        float24 halfsize_x;
 | 
				
			||||||
        float24 offset_x;
 | 
					        float24 offset_x;
 | 
				
			||||||
@ -91,8 +93,8 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu
 | 
				
			|||||||
    // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a
 | 
					    // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a
 | 
				
			||||||
    // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9.
 | 
					    // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9.
 | 
				
			||||||
    static const size_t MAX_VERTICES = 9;
 | 
					    static const size_t MAX_VERTICES = 9;
 | 
				
			||||||
    static_vector<OutputVertex, MAX_VERTICES> buffer_a = {v0, v1, v2};
 | 
					    static_vector<Vertex, MAX_VERTICES> buffer_a = {v0, v1, v2};
 | 
				
			||||||
    static_vector<OutputVertex, MAX_VERTICES> buffer_b;
 | 
					    static_vector<Vertex, MAX_VERTICES> buffer_b;
 | 
				
			||||||
    auto* output_list = &buffer_a;
 | 
					    auto* output_list = &buffer_a;
 | 
				
			||||||
    auto* input_list = &buffer_b;
 | 
					    auto* input_list = &buffer_b;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -123,7 +125,7 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu
 | 
				
			|||||||
        std::swap(input_list, output_list);
 | 
					        std::swap(input_list, output_list);
 | 
				
			||||||
        output_list->clear();
 | 
					        output_list->clear();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        const OutputVertex* reference_vertex = &input_list->back();
 | 
					        const Vertex* reference_vertex = &input_list->back();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for (const auto& vertex : *input_list) {
 | 
					        for (const auto& vertex : *input_list) {
 | 
				
			||||||
            // NOTE: This algorithm changes vertex order in some cases!
 | 
					            // NOTE: This algorithm changes vertex order in some cases!
 | 
				
			||||||
@ -148,9 +150,9 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu
 | 
				
			|||||||
    InitScreenCoordinates((*output_list)[1]);
 | 
					    InitScreenCoordinates((*output_list)[1]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for (size_t i = 0; i < output_list->size() - 2; i++) {
 | 
					    for (size_t i = 0; i < output_list->size() - 2; i++) {
 | 
				
			||||||
        OutputVertex& vtx0 = (*output_list)[0];
 | 
					        Vertex& vtx0 = (*output_list)[0];
 | 
				
			||||||
        OutputVertex& vtx1 = (*output_list)[i + 1];
 | 
					        Vertex& vtx1 = (*output_list)[i + 1];
 | 
				
			||||||
        OutputVertex& vtx2 = (*output_list)[i + 2];
 | 
					        Vertex& vtx2 = (*output_list)[i + 2];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        InitScreenCoordinates(vtx2);
 | 
					        InitScreenCoordinates(vtx2);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -125,20 +125,21 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
            // TODO: Verify that this actually modifies the register!
 | 
					            // TODO: Verify that this actually modifies the register!
 | 
				
			||||||
            if (setup.index < 15) {
 | 
					            if (setup.index < 15) {
 | 
				
			||||||
                g_state.vs_default_attributes[setup.index] = attribute;
 | 
					                g_state.input_default_attributes.attr[setup.index] = attribute;
 | 
				
			||||||
                setup.index++;
 | 
					                setup.index++;
 | 
				
			||||||
            } else {
 | 
					            } else {
 | 
				
			||||||
                // Put each attribute into an immediate input buffer.
 | 
					                // Put each attribute into an immediate input buffer.  When all specified immediate
 | 
				
			||||||
                // When all specified immediate attributes are present, the Vertex Shader is invoked
 | 
					                // attributes are present, the Vertex Shader is invoked and everything is sent to
 | 
				
			||||||
                // and everything is
 | 
					                // the primitive assembler.
 | 
				
			||||||
                // sent to the primitive assembler.
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
                auto& immediate_input = g_state.immediate.input_vertex;
 | 
					                auto& immediate_input = g_state.immediate.input_vertex;
 | 
				
			||||||
                auto& immediate_attribute_id = g_state.immediate.current_attribute;
 | 
					                auto& immediate_attribute_id = g_state.immediate.current_attribute;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                immediate_input.attr[immediate_attribute_id++] = attribute;
 | 
					                immediate_input.attr[immediate_attribute_id] = attribute;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                if (immediate_attribute_id >= regs.vs.num_input_attributes + 1) {
 | 
					                if (immediate_attribute_id < regs.max_input_attrib_index) {
 | 
				
			||||||
 | 
					                    immediate_attribute_id += 1;
 | 
				
			||||||
 | 
					                } else {
 | 
				
			||||||
                    MICROPROFILE_SCOPE(GPU_Drawing);
 | 
					                    MICROPROFILE_SCOPE(GPU_Drawing);
 | 
				
			||||||
                    immediate_attribute_id = 0;
 | 
					                    immediate_attribute_id = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -150,10 +151,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
 | 
				
			|||||||
                        g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
 | 
					                        g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
 | 
				
			||||||
                                                 static_cast<void*>(&immediate_input));
 | 
					                                                 static_cast<void*>(&immediate_input));
 | 
				
			||||||
                    Shader::UnitState shader_unit;
 | 
					                    Shader::UnitState shader_unit;
 | 
				
			||||||
                    shader_unit.LoadInputVertex(immediate_input, regs.vs.num_input_attributes + 1);
 | 
					                    Shader::AttributeBuffer output{};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    shader_unit.LoadInput(regs.vs, immediate_input);
 | 
				
			||||||
                    shader_engine->Run(g_state.vs, shader_unit);
 | 
					                    shader_engine->Run(g_state.vs, shader_unit);
 | 
				
			||||||
                    auto output_vertex = Shader::OutputVertex::FromRegisters(
 | 
					                    shader_unit.WriteOutput(regs.vs, output);
 | 
				
			||||||
                        shader_unit.registers.output, regs, regs.vs.output_mask);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    // Send to renderer
 | 
					                    // Send to renderer
 | 
				
			||||||
                    using Pica::Shader::OutputVertex;
 | 
					                    using Pica::Shader::OutputVertex;
 | 
				
			||||||
@ -162,7 +164,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
 | 
				
			|||||||
                        VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
 | 
					                        VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
 | 
				
			||||||
                    };
 | 
					                    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    g_state.primitive_assembler.SubmitVertex(output_vertex, AddTriangle);
 | 
					                    g_state.primitive_assembler.SubmitVertex(
 | 
				
			||||||
 | 
					                        Shader::OutputVertex::FromAttributeBuffer(regs, output), AddTriangle);
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
@ -280,19 +283,19 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
            if (!vertex_cache_hit) {
 | 
					            if (!vertex_cache_hit) {
 | 
				
			||||||
                // Initialize data for the current vertex
 | 
					                // Initialize data for the current vertex
 | 
				
			||||||
                Shader::InputVertex input;
 | 
					                Shader::AttributeBuffer input, output{};
 | 
				
			||||||
                loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
 | 
					                loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                // Send to vertex shader
 | 
					                // Send to vertex shader
 | 
				
			||||||
                if (g_debug_context)
 | 
					                if (g_debug_context)
 | 
				
			||||||
                    g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
 | 
					                    g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
 | 
				
			||||||
                                             (void*)&input);
 | 
					                                             (void*)&input);
 | 
				
			||||||
                shader_unit.LoadInputVertex(input, loader.GetNumTotalAttributes());
 | 
					                shader_unit.LoadInput(regs.vs, input);
 | 
				
			||||||
                shader_engine->Run(g_state.vs, shader_unit);
 | 
					                shader_engine->Run(g_state.vs, shader_unit);
 | 
				
			||||||
 | 
					                shader_unit.WriteOutput(regs.vs, output);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                // Retrieve vertex from register data
 | 
					                // Retrieve vertex from register data
 | 
				
			||||||
                output_vertex = Shader::OutputVertex::FromRegisters(shader_unit.registers.output,
 | 
					                output_vertex = Shader::OutputVertex::FromAttributeBuffer(regs, output);
 | 
				
			||||||
                                                                    regs, regs.vs.output_mask);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
                if (is_indexed) {
 | 
					                if (is_indexed) {
 | 
				
			||||||
                    vertex_cache[vertex_cache_pos] = output_vertex;
 | 
					                    vertex_cache[vertex_cache_pos] = output_vertex;
 | 
				
			||||||
 | 
				
			|||||||
@ -99,7 +99,8 @@ struct Regs {
 | 
				
			|||||||
            TEXCOORD1_U = 14,
 | 
					            TEXCOORD1_U = 14,
 | 
				
			||||||
            TEXCOORD1_V = 15,
 | 
					            TEXCOORD1_V = 15,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            // TODO: Not verified
 | 
					            TEXCOORD0_W = 16,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            VIEW_X = 18,
 | 
					            VIEW_X = 18,
 | 
				
			||||||
            VIEW_Y = 19,
 | 
					            VIEW_Y = 19,
 | 
				
			||||||
            VIEW_Z = 20,
 | 
					            VIEW_Z = 20,
 | 
				
			||||||
@ -871,7 +872,7 @@ struct Regs {
 | 
				
			|||||||
        LightSrc light[8];
 | 
					        LightSrc light[8];
 | 
				
			||||||
        LightColor global_ambient; // Emission + (material.ambient * lighting.ambient)
 | 
					        LightColor global_ambient; // Emission + (material.ambient * lighting.ambient)
 | 
				
			||||||
        INSERT_PADDING_WORDS(0x1);
 | 
					        INSERT_PADDING_WORDS(0x1);
 | 
				
			||||||
        BitField<0, 3, u32> num_lights; // Number of enabled lights - 1
 | 
					        BitField<0, 3, u32> max_light_index; // Number of enabled lights - 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        union {
 | 
					        union {
 | 
				
			||||||
            BitField<2, 2, LightingFresnelSelector> fresnel_selector;
 | 
					            BitField<2, 2, LightingFresnelSelector> fresnel_selector;
 | 
				
			||||||
@ -1048,7 +1049,7 @@ struct Regs {
 | 
				
			|||||||
            BitField<48, 12, u64> attribute_mask;
 | 
					            BitField<48, 12, u64> attribute_mask;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            // number of total attributes minus 1
 | 
					            // number of total attributes minus 1
 | 
				
			||||||
            BitField<60, 4, u64> num_extra_attributes;
 | 
					            BitField<60, 4, u64> max_attribute_index;
 | 
				
			||||||
        };
 | 
					        };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        inline VertexAttributeFormat GetFormat(int n) const {
 | 
					        inline VertexAttributeFormat GetFormat(int n) const {
 | 
				
			||||||
@ -1079,7 +1080,7 @@ struct Regs {
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        inline int GetNumTotalAttributes() const {
 | 
					        inline int GetNumTotalAttributes() const {
 | 
				
			||||||
            return (int)num_extra_attributes + 1;
 | 
					            return (int)max_attribute_index + 1;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // Attribute loaders map the source vertex data to input attributes
 | 
					        // Attribute loaders map the source vertex data to input attributes
 | 
				
			||||||
@ -1179,7 +1180,12 @@ struct Regs {
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    } command_buffer;
 | 
					    } command_buffer;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    INSERT_PADDING_WORDS(0x07);
 | 
					    INSERT_PADDING_WORDS(4);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Number of input attributes to the vertex shader minus 1
 | 
				
			||||||
 | 
					    BitField<0, 4, u32> max_input_attrib_index;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    INSERT_PADDING_WORDS(2);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    enum class GPUMode : u32 {
 | 
					    enum class GPUMode : u32 {
 | 
				
			||||||
        Drawing = 0,
 | 
					        Drawing = 0,
 | 
				
			||||||
@ -1217,42 +1223,21 @@ struct Regs {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        union {
 | 
					        union {
 | 
				
			||||||
            // Number of input attributes to shader unit - 1
 | 
					            // Number of input attributes to shader unit - 1
 | 
				
			||||||
            BitField<0, 4, u32> num_input_attributes;
 | 
					            BitField<0, 4, u32> max_input_attribute_index;
 | 
				
			||||||
        };
 | 
					        };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // Offset to shader program entry point (in words)
 | 
					        // Offset to shader program entry point (in words)
 | 
				
			||||||
        BitField<0, 16, u32> main_offset;
 | 
					        BitField<0, 16, u32> main_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        union {
 | 
					        /// Maps input attributes to registers. 4-bits per attribute, specifying a register index
 | 
				
			||||||
            BitField<0, 4, u64> attribute0_register;
 | 
					        u32 input_attribute_to_register_map_low;
 | 
				
			||||||
            BitField<4, 4, u64> attribute1_register;
 | 
					        u32 input_attribute_to_register_map_high;
 | 
				
			||||||
            BitField<8, 4, u64> attribute2_register;
 | 
					 | 
				
			||||||
            BitField<12, 4, u64> attribute3_register;
 | 
					 | 
				
			||||||
            BitField<16, 4, u64> attribute4_register;
 | 
					 | 
				
			||||||
            BitField<20, 4, u64> attribute5_register;
 | 
					 | 
				
			||||||
            BitField<24, 4, u64> attribute6_register;
 | 
					 | 
				
			||||||
            BitField<28, 4, u64> attribute7_register;
 | 
					 | 
				
			||||||
            BitField<32, 4, u64> attribute8_register;
 | 
					 | 
				
			||||||
            BitField<36, 4, u64> attribute9_register;
 | 
					 | 
				
			||||||
            BitField<40, 4, u64> attribute10_register;
 | 
					 | 
				
			||||||
            BitField<44, 4, u64> attribute11_register;
 | 
					 | 
				
			||||||
            BitField<48, 4, u64> attribute12_register;
 | 
					 | 
				
			||||||
            BitField<52, 4, u64> attribute13_register;
 | 
					 | 
				
			||||||
            BitField<56, 4, u64> attribute14_register;
 | 
					 | 
				
			||||||
            BitField<60, 4, u64> attribute15_register;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
            int GetRegisterForAttribute(int attribute_index) const {
 | 
					        unsigned int GetRegisterForAttribute(unsigned int attribute_index) const {
 | 
				
			||||||
                u64 fields[] = {
 | 
					            u64 map = ((u64)input_attribute_to_register_map_high << 32) |
 | 
				
			||||||
                    attribute0_register,  attribute1_register,  attribute2_register,
 | 
					                      (u64)input_attribute_to_register_map_low;
 | 
				
			||||||
                    attribute3_register,  attribute4_register,  attribute5_register,
 | 
					            return (map >> (attribute_index * 4)) & 0b1111;
 | 
				
			||||||
                    attribute6_register,  attribute7_register,  attribute8_register,
 | 
					        }
 | 
				
			||||||
                    attribute9_register,  attribute10_register, attribute11_register,
 | 
					 | 
				
			||||||
                    attribute12_register, attribute13_register, attribute14_register,
 | 
					 | 
				
			||||||
                    attribute15_register,
 | 
					 | 
				
			||||||
                };
 | 
					 | 
				
			||||||
                return (int)fields[attribute_index];
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        } input_register_map;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        BitField<0, 16, u32> output_mask;
 | 
					        BitField<0, 16, u32> output_mask;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -23,7 +23,7 @@ struct State {
 | 
				
			|||||||
    Shader::ShaderSetup vs;
 | 
					    Shader::ShaderSetup vs;
 | 
				
			||||||
    Shader::ShaderSetup gs;
 | 
					    Shader::ShaderSetup gs;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::array<Math::Vec4<float24>, 16> vs_default_attributes;
 | 
					    Shader::AttributeBuffer input_default_attributes;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    struct {
 | 
					    struct {
 | 
				
			||||||
        union LutEntry {
 | 
					        union LutEntry {
 | 
				
			||||||
@ -66,7 +66,7 @@ struct State {
 | 
				
			|||||||
    /// Struct used to describe immediate mode rendering state
 | 
					    /// Struct used to describe immediate mode rendering state
 | 
				
			||||||
    struct ImmediateModeState {
 | 
					    struct ImmediateModeState {
 | 
				
			||||||
        // Used to buffer partial vertices for immediate-mode rendering.
 | 
					        // Used to buffer partial vertices for immediate-mode rendering.
 | 
				
			||||||
        Shader::InputVertex input_vertex;
 | 
					        Shader::AttributeBuffer input_vertex;
 | 
				
			||||||
        // Index of the next attribute to be loaded into `input_vertex`.
 | 
					        // Index of the next attribute to be loaded into `input_vertex`.
 | 
				
			||||||
        u32 current_attribute = 0;
 | 
					        u32 current_attribute = 0;
 | 
				
			||||||
    } immediate;
 | 
					    } immediate;
 | 
				
			||||||
 | 
				
			|||||||
@ -14,7 +14,7 @@ PrimitiveAssembler<VertexType>::PrimitiveAssembler(Regs::TriangleTopology topolo
 | 
				
			|||||||
    : topology(topology), buffer_index(0) {}
 | 
					    : topology(topology), buffer_index(0) {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template <typename VertexType>
 | 
					template <typename VertexType>
 | 
				
			||||||
void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx,
 | 
					void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx,
 | 
				
			||||||
                                                  TriangleHandler triangle_handler) {
 | 
					                                                  TriangleHandler triangle_handler) {
 | 
				
			||||||
    switch (topology) {
 | 
					    switch (topology) {
 | 
				
			||||||
    // TODO: Figure out what's different with TriangleTopology::Shader.
 | 
					    // TODO: Figure out what's different with TriangleTopology::Shader.
 | 
				
			||||||
 | 
				
			|||||||
@ -15,7 +15,8 @@ namespace Pica {
 | 
				
			|||||||
 */
 | 
					 */
 | 
				
			||||||
template <typename VertexType>
 | 
					template <typename VertexType>
 | 
				
			||||||
struct PrimitiveAssembler {
 | 
					struct PrimitiveAssembler {
 | 
				
			||||||
    using TriangleHandler = std::function<void(VertexType& v0, VertexType& v1, VertexType& v2)>;
 | 
					    using TriangleHandler =
 | 
				
			||||||
 | 
					        std::function<void(const VertexType& v0, const VertexType& v1, const VertexType& v2)>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    PrimitiveAssembler(Regs::TriangleTopology topology = Regs::TriangleTopology::List);
 | 
					    PrimitiveAssembler(Regs::TriangleTopology topology = Regs::TriangleTopology::List);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -25,7 +26,7 @@ struct PrimitiveAssembler {
 | 
				
			|||||||
     * NOTE: We could specify the triangle handler in the constructor, but this way we can
 | 
					     * NOTE: We could specify the triangle handler in the constructor, but this way we can
 | 
				
			||||||
     * keep event and handler code next to each other.
 | 
					     * keep event and handler code next to each other.
 | 
				
			||||||
     */
 | 
					     */
 | 
				
			||||||
    void SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler);
 | 
					    void SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * Resets the internal state of the PrimitiveAssembler.
 | 
					     * Resets the internal state of the PrimitiveAssembler.
 | 
				
			||||||
 | 
				
			|||||||
@ -308,8 +308,8 @@ MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 24
 | 
				
			|||||||
 * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing
 | 
					 * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing
 | 
				
			||||||
 * culling via recursion.
 | 
					 * culling via recursion.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1,
 | 
					static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Vertex& v2,
 | 
				
			||||||
                                    const Shader::OutputVertex& v2, bool reversed = false) {
 | 
					                                    bool reversed = false) {
 | 
				
			||||||
    const auto& regs = g_state.regs;
 | 
					    const auto& regs = g_state.regs;
 | 
				
			||||||
    MICROPROFILE_SCOPE(GPU_Rasterization);
 | 
					    MICROPROFILE_SCOPE(GPU_Rasterization);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -1277,8 +1277,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1,
 | 
					void ProcessTriangle(const Vertex& v0, const Vertex& v1, const Vertex& v2) {
 | 
				
			||||||
                     const Shader::OutputVertex& v2) {
 | 
					 | 
				
			||||||
    ProcessTriangleInternal(v0, v1, v2);
 | 
					    ProcessTriangleInternal(v0, v1, v2);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -4,16 +4,44 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#pragma once
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace Pica {
 | 
					#include "video_core/shader/shader.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace Shader {
 | 
					namespace Pica {
 | 
				
			||||||
struct OutputVertex;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace Rasterizer {
 | 
					namespace Rasterizer {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1,
 | 
					struct Vertex : Shader::OutputVertex {
 | 
				
			||||||
                     const Shader::OutputVertex& v2);
 | 
					    Vertex(const OutputVertex& v) : OutputVertex(v) {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Attributes used to store intermediate results
 | 
				
			||||||
 | 
					    // position after perspective divide
 | 
				
			||||||
 | 
					    Math::Vec3<float24> screenpos;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Linear interpolation
 | 
				
			||||||
 | 
					    // factor: 0=this, 1=vtx
 | 
				
			||||||
 | 
					    void Lerp(float24 factor, const Vertex& vtx) {
 | 
				
			||||||
 | 
					        pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // TODO: Should perform perspective correct interpolation here...
 | 
				
			||||||
 | 
					        tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor);
 | 
				
			||||||
 | 
					        tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor);
 | 
				
			||||||
 | 
					        tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        color = color * factor + vtx.color * (float24::FromFloat32(1) - factor);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Linear interpolation
 | 
				
			||||||
 | 
					    // factor: 0=v0, 1=v1
 | 
				
			||||||
 | 
					    static Vertex Lerp(float24 factor, const Vertex& v0, const Vertex& v1) {
 | 
				
			||||||
 | 
					        Vertex ret = v0;
 | 
				
			||||||
 | 
					        ret.Lerp(factor, v1);
 | 
				
			||||||
 | 
					        return ret;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void ProcessTriangle(const Vertex& v0, const Vertex& v1, const Vertex& v2);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
} // namespace Rasterizer
 | 
					} // namespace Rasterizer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -467,7 +467,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    // Fragment lighting switches
 | 
					    // Fragment lighting switches
 | 
				
			||||||
    case PICA_REG_INDEX(lighting.disable):
 | 
					    case PICA_REG_INDEX(lighting.disable):
 | 
				
			||||||
    case PICA_REG_INDEX(lighting.num_lights):
 | 
					    case PICA_REG_INDEX(lighting.max_light_index):
 | 
				
			||||||
    case PICA_REG_INDEX(lighting.config0):
 | 
					    case PICA_REG_INDEX(lighting.config0):
 | 
				
			||||||
    case PICA_REG_INDEX(lighting.config1):
 | 
					    case PICA_REG_INDEX(lighting.config1):
 | 
				
			||||||
    case PICA_REG_INDEX(lighting.abs_lut_input):
 | 
					    case PICA_REG_INDEX(lighting.abs_lut_input):
 | 
				
			||||||
 | 
				
			|||||||
@ -84,7 +84,7 @@ union PicaShaderConfig {
 | 
				
			|||||||
        // Fragment lighting
 | 
					        // Fragment lighting
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        state.lighting.enable = !regs.lighting.disable;
 | 
					        state.lighting.enable = !regs.lighting.disable;
 | 
				
			||||||
        state.lighting.src_num = regs.lighting.num_lights + 1;
 | 
					        state.lighting.src_num = regs.lighting.max_light_index + 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) {
 | 
					        for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) {
 | 
				
			||||||
            unsigned num = regs.lighting.light_enable.GetNum(light_index);
 | 
					            unsigned num = regs.lighting.light_enable.GetNum(light_index);
 | 
				
			||||||
 | 
				
			|||||||
@ -4,6 +4,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#include <cmath>
 | 
					#include <cmath>
 | 
				
			||||||
#include <cstring>
 | 
					#include <cstring>
 | 
				
			||||||
 | 
					#include "common/bit_set.h"
 | 
				
			||||||
#include "common/logging/log.h"
 | 
					#include "common/logging/log.h"
 | 
				
			||||||
#include "common/microprofile.h"
 | 
					#include "common/microprofile.h"
 | 
				
			||||||
#include "video_core/pica.h"
 | 
					#include "video_core/pica.h"
 | 
				
			||||||
@ -19,38 +20,32 @@ namespace Pica {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
namespace Shader {
 | 
					namespace Shader {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
OutputVertex OutputVertex::FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs,
 | 
					OutputVertex OutputVertex::FromAttributeBuffer(const Regs& regs, AttributeBuffer& input) {
 | 
				
			||||||
                                         u32 output_mask) {
 | 
					 | 
				
			||||||
    // Setup output data
 | 
					    // Setup output data
 | 
				
			||||||
    OutputVertex ret;
 | 
					    union {
 | 
				
			||||||
    // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
 | 
					        OutputVertex ret{};
 | 
				
			||||||
    // figure out what those circumstances are and enable the remaining outputs then.
 | 
					        std::array<float24, 24> vertex_slots;
 | 
				
			||||||
    unsigned index = 0;
 | 
					    };
 | 
				
			||||||
    for (unsigned i = 0; i < 7; ++i) {
 | 
					    static_assert(sizeof(vertex_slots) == sizeof(ret), "Struct and array have different sizes.");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (index >= regs.vs_output_total)
 | 
					    unsigned int num_attributes = regs.vs_output_total;
 | 
				
			||||||
            break;
 | 
					    ASSERT(num_attributes <= 7);
 | 
				
			||||||
 | 
					    for (unsigned int i = 0; i < num_attributes; ++i) {
 | 
				
			||||||
 | 
					        const auto& output_register_map = regs.vs_output_attributes[i];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if ((output_mask & (1 << i)) == 0)
 | 
					        Regs::VSOutputAttributes::Semantic semantics[4] = {
 | 
				
			||||||
            continue;
 | 
					            output_register_map.map_x, output_register_map.map_y, output_register_map.map_z,
 | 
				
			||||||
 | 
					            output_register_map.map_w};
 | 
				
			||||||
        const auto& output_register_map = regs.vs_output_attributes[index];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y,
 | 
					 | 
				
			||||||
                            output_register_map.map_z, output_register_map.map_w};
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for (unsigned comp = 0; comp < 4; ++comp) {
 | 
					        for (unsigned comp = 0; comp < 4; ++comp) {
 | 
				
			||||||
            float24* out = ((float24*)&ret) + semantics[comp];
 | 
					            Regs::VSOutputAttributes::Semantic semantic = semantics[comp];
 | 
				
			||||||
            if (semantics[comp] != Regs::VSOutputAttributes::INVALID) {
 | 
					            float24* out = &vertex_slots[semantic];
 | 
				
			||||||
                *out = output_regs[i][comp];
 | 
					            if (semantic < vertex_slots.size()) {
 | 
				
			||||||
            } else {
 | 
					                *out = input.attr[i][comp];
 | 
				
			||||||
                // Zero output so that attributes which aren't output won't have denormals in them,
 | 
					            } else if (semantic != Regs::VSOutputAttributes::INVALID) {
 | 
				
			||||||
                // which would slow us down later.
 | 
					                LOG_ERROR(HW_GPU, "Invalid/unknown semantic id: %u", (unsigned int)semantic);
 | 
				
			||||||
                memset(out, 0, sizeof(*out));
 | 
					 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					 | 
				
			||||||
        index++;
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // The hardware takes the absolute and saturates vertex colors like this, *before* doing
 | 
					    // The hardware takes the absolute and saturates vertex colors like this, *before* doing
 | 
				
			||||||
@ -71,12 +66,20 @@ OutputVertex OutputVertex::FromRegisters(Math::Vec4<float24> output_regs[16], co
 | 
				
			|||||||
    return ret;
 | 
					    return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) {
 | 
					void UnitState::LoadInput(const Regs::ShaderConfig& config, const AttributeBuffer& input) {
 | 
				
			||||||
    // Setup input register table
 | 
					    const unsigned max_attribute = config.max_input_attribute_index;
 | 
				
			||||||
    const auto& attribute_register_map = g_state.regs.vs.input_register_map;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for (int i = 0; i < num_attributes; i++)
 | 
					    for (unsigned attr = 0; attr <= max_attribute; ++attr) {
 | 
				
			||||||
        registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
 | 
					        unsigned reg = config.GetRegisterForAttribute(attr);
 | 
				
			||||||
 | 
					        registers.input[reg] = input.attr[attr];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void UnitState::WriteOutput(const Regs::ShaderConfig& config, AttributeBuffer& output) {
 | 
				
			||||||
 | 
					    unsigned int output_i = 0;
 | 
				
			||||||
 | 
					    for (unsigned int reg : Common::BitSet<u32>(config.output_mask)) {
 | 
				
			||||||
 | 
					        output.attr[output_i++] = registers.output[reg];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
 | 
					MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
 | 
				
			||||||
 | 
				
			|||||||
@ -23,14 +23,11 @@ namespace Pica {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
namespace Shader {
 | 
					namespace Shader {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct InputVertex {
 | 
					struct AttributeBuffer {
 | 
				
			||||||
    alignas(16) Math::Vec4<float24> attr[16];
 | 
					    alignas(16) Math::Vec4<float24> attr[16];
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct OutputVertex {
 | 
					struct OutputVertex {
 | 
				
			||||||
    OutputVertex() = default;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // VS output attributes
 | 
					 | 
				
			||||||
    Math::Vec4<float24> pos;
 | 
					    Math::Vec4<float24> pos;
 | 
				
			||||||
    Math::Vec4<float24> quat;
 | 
					    Math::Vec4<float24> quat;
 | 
				
			||||||
    Math::Vec4<float24> color;
 | 
					    Math::Vec4<float24> color;
 | 
				
			||||||
@ -42,43 +39,22 @@ struct OutputVertex {
 | 
				
			|||||||
    INSERT_PADDING_WORDS(1);
 | 
					    INSERT_PADDING_WORDS(1);
 | 
				
			||||||
    Math::Vec2<float24> tc2;
 | 
					    Math::Vec2<float24> tc2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Padding for optimal alignment
 | 
					    static OutputVertex FromAttributeBuffer(const Regs& regs, AttributeBuffer& output);
 | 
				
			||||||
    INSERT_PADDING_WORDS(4);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Attributes used to store intermediate results
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // position after perspective divide
 | 
					 | 
				
			||||||
    Math::Vec3<float24> screenpos;
 | 
					 | 
				
			||||||
    INSERT_PADDING_WORDS(1);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Linear interpolation
 | 
					 | 
				
			||||||
    // factor: 0=this, 1=vtx
 | 
					 | 
				
			||||||
    void Lerp(float24 factor, const OutputVertex& vtx) {
 | 
					 | 
				
			||||||
        pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        // TODO: Should perform perspective correct interpolation here...
 | 
					 | 
				
			||||||
        tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor);
 | 
					 | 
				
			||||||
        tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor);
 | 
					 | 
				
			||||||
        tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        color = color * factor + vtx.color * (float24::FromFloat32(1) - factor);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Linear interpolation
 | 
					 | 
				
			||||||
    // factor: 0=v0, 1=v1
 | 
					 | 
				
			||||||
    static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) {
 | 
					 | 
				
			||||||
        OutputVertex ret = v0;
 | 
					 | 
				
			||||||
        ret.Lerp(factor, v1);
 | 
					 | 
				
			||||||
        return ret;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    static OutputVertex FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs,
 | 
					 | 
				
			||||||
                                      u32 output_mask);
 | 
					 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					#define ASSERT_POS(var, pos)                                                                       \
 | 
				
			||||||
 | 
					    static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong "       \
 | 
				
			||||||
 | 
					                                                                        "offset.")
 | 
				
			||||||
 | 
					ASSERT_POS(pos, Regs::VSOutputAttributes::POSITION_X);
 | 
				
			||||||
 | 
					ASSERT_POS(quat, Regs::VSOutputAttributes::QUATERNION_X);
 | 
				
			||||||
 | 
					ASSERT_POS(color, Regs::VSOutputAttributes::COLOR_R);
 | 
				
			||||||
 | 
					ASSERT_POS(tc0, Regs::VSOutputAttributes::TEXCOORD0_U);
 | 
				
			||||||
 | 
					ASSERT_POS(tc1, Regs::VSOutputAttributes::TEXCOORD1_U);
 | 
				
			||||||
 | 
					ASSERT_POS(tc0_w, Regs::VSOutputAttributes::TEXCOORD0_W);
 | 
				
			||||||
 | 
					ASSERT_POS(view, Regs::VSOutputAttributes::VIEW_X);
 | 
				
			||||||
 | 
					ASSERT_POS(tc2, Regs::VSOutputAttributes::TEXCOORD2_U);
 | 
				
			||||||
 | 
					#undef ASSERT_POS
 | 
				
			||||||
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
 | 
					static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
 | 
				
			||||||
static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");
 | 
					static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * This structure contains the state information that needs to be unique for a shader unit. The 3DS
 | 
					 * This structure contains the state information that needs to be unique for a shader unit. The 3DS
 | 
				
			||||||
@ -137,10 +113,12 @@ struct UnitState {
 | 
				
			|||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * Loads the unit state with an input vertex.
 | 
					     * Loads the unit state with an input vertex.
 | 
				
			||||||
     *
 | 
					     *
 | 
				
			||||||
     * @param input Input vertex into the shader
 | 
					     * @param config Shader configuration registers corresponding to the unit.
 | 
				
			||||||
     * @param num_attributes The number of vertex shader attributes to load
 | 
					     * @param input Attribute buffer to load into the input registers.
 | 
				
			||||||
     */
 | 
					     */
 | 
				
			||||||
    void LoadInputVertex(const InputVertex& input, int num_attributes);
 | 
					    void LoadInput(const Regs::ShaderConfig& config, const AttributeBuffer& input);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void WriteOutput(const Regs::ShaderConfig& config, AttributeBuffer& output);
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct ShaderSetup {
 | 
					struct ShaderSetup {
 | 
				
			||||||
 | 
				
			|||||||
@ -668,14 +668,14 @@ void InterpreterEngine::Run(const ShaderSetup& setup, UnitState& state) const {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
DebugData<true> InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup,
 | 
					DebugData<true> InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup,
 | 
				
			||||||
                                                    const InputVertex& input,
 | 
					                                                    const AttributeBuffer& input,
 | 
				
			||||||
                                                    int num_attributes) const {
 | 
					                                                    const Regs::ShaderConfig& config) const {
 | 
				
			||||||
    UnitState state;
 | 
					    UnitState state;
 | 
				
			||||||
    DebugData<true> debug_data;
 | 
					    DebugData<true> debug_data;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Setup input register table
 | 
					    // Setup input register table
 | 
				
			||||||
    boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero()));
 | 
					    boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero()));
 | 
				
			||||||
    state.LoadInputVertex(input, num_attributes);
 | 
					    state.LoadInput(config, input);
 | 
				
			||||||
    RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point);
 | 
					    RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point);
 | 
				
			||||||
    return debug_data;
 | 
					    return debug_data;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
@ -19,12 +19,11 @@ public:
 | 
				
			|||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * Produce debug information based on the given shader and input vertex
 | 
					     * Produce debug information based on the given shader and input vertex
 | 
				
			||||||
     * @param input Input vertex into the shader
 | 
					     * @param input Input vertex into the shader
 | 
				
			||||||
     * @param num_attributes The number of vertex shader attributes
 | 
					 | 
				
			||||||
     * @param config Configuration object for the shader pipeline
 | 
					     * @param config Configuration object for the shader pipeline
 | 
				
			||||||
     * @return Debug information for this shader with regards to the given vertex
 | 
					     * @return Debug information for this shader with regards to the given vertex
 | 
				
			||||||
     */
 | 
					     */
 | 
				
			||||||
    DebugData<true> ProduceDebugInfo(const ShaderSetup& setup, const InputVertex& input,
 | 
					    DebugData<true> ProduceDebugInfo(const ShaderSetup& setup, const AttributeBuffer& input,
 | 
				
			||||||
                                     int num_attributes) const;
 | 
					                                     const Regs::ShaderConfig& config) const;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
} // namespace
 | 
					} // namespace
 | 
				
			||||||
 | 
				
			|||||||
@ -70,7 +70,8 @@ void VertexLoader::Setup(const Pica::Regs& regs) {
 | 
				
			|||||||
    is_setup = true;
 | 
					    is_setup = true;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input,
 | 
					void VertexLoader::LoadVertex(u32 base_address, int index, int vertex,
 | 
				
			||||||
 | 
					                              Shader::AttributeBuffer& input,
 | 
				
			||||||
                              DebugUtils::MemoryAccessTracker& memory_accesses) {
 | 
					                              DebugUtils::MemoryAccessTracker& memory_accesses) {
 | 
				
			||||||
    ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices.");
 | 
					    ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices.");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -142,7 +143,7 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I
 | 
				
			|||||||
                      input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
 | 
					                      input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
 | 
				
			||||||
        } else if (vertex_attribute_is_default[i]) {
 | 
					        } else if (vertex_attribute_is_default[i]) {
 | 
				
			||||||
            // Load the default attribute if we're configured to do so
 | 
					            // Load the default attribute if we're configured to do so
 | 
				
			||||||
            input.attr[i] = g_state.vs_default_attributes[i];
 | 
					            input.attr[i] = g_state.input_default_attributes.attr[i];
 | 
				
			||||||
            LOG_TRACE(HW_GPU,
 | 
					            LOG_TRACE(HW_GPU,
 | 
				
			||||||
                      "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", i,
 | 
					                      "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", i,
 | 
				
			||||||
                      vertex, index, input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
 | 
					                      vertex, index, input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
 | 
				
			||||||
 | 
				
			|||||||
@ -11,7 +11,7 @@ class MemoryAccessTracker;
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace Shader {
 | 
					namespace Shader {
 | 
				
			||||||
struct InputVertex;
 | 
					struct AttributeBuffer;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class VertexLoader {
 | 
					class VertexLoader {
 | 
				
			||||||
@ -22,7 +22,7 @@ public:
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void Setup(const Pica::Regs& regs);
 | 
					    void Setup(const Pica::Regs& regs);
 | 
				
			||||||
    void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input,
 | 
					    void LoadVertex(u32 base_address, int index, int vertex, Shader::AttributeBuffer& input,
 | 
				
			||||||
                    DebugUtils::MemoryAccessTracker& memory_accesses);
 | 
					                    DebugUtils::MemoryAccessTracker& memory_accesses);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    int GetNumTotalAttributes() const {
 | 
					    int GetNumTotalAttributes() const {
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user