mirror of
				https://git.zaroz.cloud/nintendo-back-up/yuzu/yuzu-mainline.git
				synced 2025-03-21 01:53:15 +00:00 
			
		
		
		
	maxwell_3d: Restructure macro upload to use a single macro code memory.
- Fixes an issue where macros could be skipped. - Fixes rendering of distant objects in Super Mario Odyssey.
This commit is contained in:
		
							parent
							
								
									d08457f879
								
							
						
					
					
						commit
						de0ab806df
					
				@ -43,15 +43,17 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
 | 
			
		||||
    // Reset the current macro.
 | 
			
		||||
    executing_macro = 0;
 | 
			
		||||
 | 
			
		||||
    // The requested macro must have been uploaded already.
 | 
			
		||||
    auto macro_code = uploaded_macros.find(method);
 | 
			
		||||
    if (macro_code == uploaded_macros.end()) {
 | 
			
		||||
        LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method);
 | 
			
		||||
    // Lookup the macro offset
 | 
			
		||||
    const u32 entry{(method - MacroRegistersStart) >> 1};
 | 
			
		||||
    const auto& search{macro_offsets.find(entry)};
 | 
			
		||||
    if (search == macro_offsets.end()) {
 | 
			
		||||
        LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method);
 | 
			
		||||
        UNREACHABLE();
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Execute the current macro.
 | 
			
		||||
    macro_interpreter.Execute(macro_code->second, std::move(parameters));
 | 
			
		||||
    macro_interpreter.Execute(search->second, std::move(parameters));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
 | 
			
		||||
@ -97,6 +99,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
 | 
			
		||||
        ProcessMacroUpload(value);
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case MAXWELL3D_REG_INDEX(macros.bind): {
 | 
			
		||||
        ProcessMacroBind(value);
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
 | 
			
		||||
    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
 | 
			
		||||
    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
 | 
			
		||||
@ -158,9 +164,13 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Maxwell3D::ProcessMacroUpload(u32 data) {
 | 
			
		||||
    // Store the uploaded macro code to interpret them when they're called.
 | 
			
		||||
    auto& macro = uploaded_macros[regs.macros.entry * 2 + MacroRegistersStart];
 | 
			
		||||
    macro.push_back(data);
 | 
			
		||||
    ASSERT_MSG(regs.macros.upload_address < macro_memory.size(),
 | 
			
		||||
               "upload_address exceeded macro_memory size!");
 | 
			
		||||
    macro_memory[regs.macros.upload_address++] = data;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Maxwell3D::ProcessMacroBind(u32 data) {
 | 
			
		||||
    macro_offsets[regs.macros.entry] = data;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Maxwell3D::ProcessQueryGet() {
 | 
			
		||||
 | 
			
		||||
@ -475,12 +475,13 @@ public:
 | 
			
		||||
                INSERT_PADDING_WORDS(0x45);
 | 
			
		||||
 | 
			
		||||
                struct {
 | 
			
		||||
                    INSERT_PADDING_WORDS(1);
 | 
			
		||||
                    u32 upload_address;
 | 
			
		||||
                    u32 data;
 | 
			
		||||
                    u32 entry;
 | 
			
		||||
                    u32 bind;
 | 
			
		||||
                } macros;
 | 
			
		||||
 | 
			
		||||
                INSERT_PADDING_WORDS(0x189);
 | 
			
		||||
                INSERT_PADDING_WORDS(0x188);
 | 
			
		||||
 | 
			
		||||
                u32 tfb_enabled;
 | 
			
		||||
 | 
			
		||||
@ -994,12 +995,25 @@ public:
 | 
			
		||||
    /// Returns the texture information for a specific texture in a specific shader stage.
 | 
			
		||||
    Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
 | 
			
		||||
 | 
			
		||||
    /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
 | 
			
		||||
    /// we've seen used.
 | 
			
		||||
    using MacroMemory = std::array<u32, 0x40000>;
 | 
			
		||||
 | 
			
		||||
    /// Gets a reference to macro memory.
 | 
			
		||||
    const MacroMemory& GetMacroMemory() const {
 | 
			
		||||
        return macro_memory;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    void InitializeRegisterDefaults();
 | 
			
		||||
 | 
			
		||||
    VideoCore::RasterizerInterface& rasterizer;
 | 
			
		||||
 | 
			
		||||
    std::unordered_map<u32, std::vector<u32>> uploaded_macros;
 | 
			
		||||
    /// Start offsets of each macro in macro_memory
 | 
			
		||||
    std::unordered_map<u32, u32> macro_offsets;
 | 
			
		||||
 | 
			
		||||
    /// Memory for macro code
 | 
			
		||||
    MacroMemory macro_memory;
 | 
			
		||||
 | 
			
		||||
    /// Macro method that is currently being executed / being fed parameters.
 | 
			
		||||
    u32 executing_macro = 0;
 | 
			
		||||
@ -1022,9 +1036,12 @@ private:
 | 
			
		||||
     */
 | 
			
		||||
    void CallMacroMethod(u32 method, std::vector<u32> parameters);
 | 
			
		||||
 | 
			
		||||
    /// Handles writes to the macro uploading registers.
 | 
			
		||||
    /// Handles writes to the macro uploading register.
 | 
			
		||||
    void ProcessMacroUpload(u32 data);
 | 
			
		||||
 | 
			
		||||
    /// Handles writes to the macro bind register.
 | 
			
		||||
    void ProcessMacroBind(u32 data);
 | 
			
		||||
 | 
			
		||||
    /// Handles a write to the CLEAR_BUFFERS register.
 | 
			
		||||
    void ProcessClearBuffers();
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -11,7 +11,7 @@ namespace Tegra {
 | 
			
		||||
 | 
			
		||||
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
 | 
			
		||||
 | 
			
		||||
void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> parameters) {
 | 
			
		||||
void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) {
 | 
			
		||||
    Reset();
 | 
			
		||||
    registers[1] = parameters[0];
 | 
			
		||||
    this->parameters = std::move(parameters);
 | 
			
		||||
@ -19,7 +19,7 @@ void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> pa
 | 
			
		||||
    // Execute the code until we hit an exit condition.
 | 
			
		||||
    bool keep_executing = true;
 | 
			
		||||
    while (keep_executing) {
 | 
			
		||||
        keep_executing = Step(code, false);
 | 
			
		||||
        keep_executing = Step(offset, false);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Assert the the macro used all the input parameters
 | 
			
		||||
@ -37,10 +37,10 @@ void MacroInterpreter::Reset() {
 | 
			
		||||
    next_parameter_index = 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {
 | 
			
		||||
bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
 | 
			
		||||
    u32 base_address = pc;
 | 
			
		||||
 | 
			
		||||
    Opcode opcode = GetOpcode(code);
 | 
			
		||||
    Opcode opcode = GetOpcode(offset);
 | 
			
		||||
    pc += 4;
 | 
			
		||||
 | 
			
		||||
    // Update the program counter if we were delayed
 | 
			
		||||
@ -108,7 +108,7 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {
 | 
			
		||||
 | 
			
		||||
            delayed_pc = base_address + opcode.GetBranchTarget();
 | 
			
		||||
            // Execute one more instruction due to the delay slot.
 | 
			
		||||
            return Step(code, true);
 | 
			
		||||
            return Step(offset, true);
 | 
			
		||||
        }
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
@ -121,17 +121,18 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {
 | 
			
		||||
        // Exit has a delay slot, execute the next instruction
 | 
			
		||||
        // Note: Executing an exit during a branch delay slot will cause the instruction at the
 | 
			
		||||
        // branch target to be executed before exiting.
 | 
			
		||||
        Step(code, true);
 | 
			
		||||
        Step(offset, true);
 | 
			
		||||
        return false;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
MacroInterpreter::Opcode MacroInterpreter::GetOpcode(const std::vector<u32>& code) const {
 | 
			
		||||
MacroInterpreter::Opcode MacroInterpreter::GetOpcode(u32 offset) const {
 | 
			
		||||
    const auto& macro_memory{maxwell3d.GetMacroMemory()};
 | 
			
		||||
    ASSERT((pc % sizeof(u32)) == 0);
 | 
			
		||||
    ASSERT(pc < code.size() * sizeof(u32));
 | 
			
		||||
    return {code[pc / sizeof(u32)]};
 | 
			
		||||
    ASSERT((pc + offset) < macro_memory.size() * sizeof(u32));
 | 
			
		||||
    return {macro_memory[offset + pc / sizeof(u32)]};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const {
 | 
			
		||||
 | 
			
		||||
@ -22,10 +22,10 @@ public:
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Executes the macro code with the specified input parameters.
 | 
			
		||||
     * @param code The macro byte code to execute
 | 
			
		||||
     * @param parameters The parameters of the macro
 | 
			
		||||
     * @param offset Offset to start execution at.
 | 
			
		||||
     * @param parameters The parameters of the macro.
 | 
			
		||||
     */
 | 
			
		||||
    void Execute(const std::vector<u32>& code, std::vector<u32> parameters);
 | 
			
		||||
    void Execute(u32 offset, std::vector<u32> parameters);
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    enum class Operation : u32 {
 | 
			
		||||
@ -110,11 +110,11 @@ private:
 | 
			
		||||
    /**
 | 
			
		||||
     * Executes a single macro instruction located at the current program counter. Returns whether
 | 
			
		||||
     * the interpreter should keep running.
 | 
			
		||||
     * @param code The macro code to execute.
 | 
			
		||||
     * @param offset Offset to start execution at.
 | 
			
		||||
     * @param is_delay_slot Whether the current step is being executed due to a delay slot in a
 | 
			
		||||
     * previous instruction.
 | 
			
		||||
     */
 | 
			
		||||
    bool Step(const std::vector<u32>& code, bool is_delay_slot);
 | 
			
		||||
    bool Step(u32 offset, bool is_delay_slot);
 | 
			
		||||
 | 
			
		||||
    /// Calculates the result of an ALU operation. src_a OP src_b;
 | 
			
		||||
    u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const;
 | 
			
		||||
@ -127,7 +127,7 @@ private:
 | 
			
		||||
    bool EvaluateBranchCondition(BranchCondition cond, u32 value) const;
 | 
			
		||||
 | 
			
		||||
    /// Reads an opcode at the current program counter location.
 | 
			
		||||
    Opcode GetOpcode(const std::vector<u32>& code) const;
 | 
			
		||||
    Opcode GetOpcode(u32 offset) const;
 | 
			
		||||
 | 
			
		||||
    /// Returns the specified register's value. Register 0 is hardcoded to always return 0.
 | 
			
		||||
    u32 GetRegister(u32 register_id) const;
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user