mirror of
				https://git.zaroz.cloud/nintendo-back-up/yuzu/yuzu.git
				synced 2025-05-12 00:45:25 +00:00 
			
		
		
		
	
						commit
						f40fabd688
					
				@ -64,6 +64,7 @@ MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
 | 
					OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
 | 
				
			||||||
    auto& config = g_state.regs.vs;
 | 
					    auto& config = g_state.regs.vs;
 | 
				
			||||||
 | 
					    auto& setup = g_state.vs;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    MICROPROFILE_SCOPE(GPU_Shader);
 | 
					    MICROPROFILE_SCOPE(GPU_Shader);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -81,11 +82,11 @@ OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#ifdef ARCHITECTURE_x86_64
 | 
					#ifdef ARCHITECTURE_x86_64
 | 
				
			||||||
    if (VideoCore::g_shader_jit_enabled)
 | 
					    if (VideoCore::g_shader_jit_enabled)
 | 
				
			||||||
        jit_shader->Run(&state.registers, g_state.regs.vs.main_offset);
 | 
					        jit_shader->Run(setup, state, config.main_offset);
 | 
				
			||||||
    else
 | 
					    else
 | 
				
			||||||
        RunInterpreter(state);
 | 
					        RunInterpreter(setup, state, config.main_offset);
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
    RunInterpreter(state);
 | 
					    RunInterpreter(setup, state, config.main_offset);
 | 
				
			||||||
#endif // ARCHITECTURE_x86_64
 | 
					#endif // ARCHITECTURE_x86_64
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Setup output data
 | 
					    // Setup output data
 | 
				
			||||||
@ -156,7 +157,7 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_
 | 
				
			|||||||
    state.conditional_code[0] = false;
 | 
					    state.conditional_code[0] = false;
 | 
				
			||||||
    state.conditional_code[1] = false;
 | 
					    state.conditional_code[1] = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    RunInterpreter(state);
 | 
					    RunInterpreter(setup, state, config.main_offset);
 | 
				
			||||||
    return state.debug;
 | 
					    return state.debug;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -283,10 +283,10 @@ struct UnitState {
 | 
				
			|||||||
    static size_t InputOffset(const SourceRegister& reg) {
 | 
					    static size_t InputOffset(const SourceRegister& reg) {
 | 
				
			||||||
        switch (reg.GetRegisterType()) {
 | 
					        switch (reg.GetRegisterType()) {
 | 
				
			||||||
        case RegisterType::Input:
 | 
					        case RegisterType::Input:
 | 
				
			||||||
            return offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
 | 
					            return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        case RegisterType::Temporary:
 | 
					        case RegisterType::Temporary:
 | 
				
			||||||
            return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
 | 
					            return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        default:
 | 
					        default:
 | 
				
			||||||
            UNREACHABLE();
 | 
					            UNREACHABLE();
 | 
				
			||||||
@ -297,10 +297,10 @@ struct UnitState {
 | 
				
			|||||||
    static size_t OutputOffset(const DestRegister& reg) {
 | 
					    static size_t OutputOffset(const DestRegister& reg) {
 | 
				
			||||||
        switch (reg.GetRegisterType()) {
 | 
					        switch (reg.GetRegisterType()) {
 | 
				
			||||||
        case RegisterType::Output:
 | 
					        case RegisterType::Output:
 | 
				
			||||||
            return offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
 | 
					            return offsetof(UnitState, registers.output) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        case RegisterType::Temporary:
 | 
					        case RegisterType::Temporary:
 | 
				
			||||||
            return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
 | 
					            return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        default:
 | 
					        default:
 | 
				
			||||||
            UNREACHABLE();
 | 
					            UNREACHABLE();
 | 
				
			||||||
@ -323,6 +323,23 @@ struct ShaderSetup {
 | 
				
			|||||||
        std::array<Math::Vec4<u8>, 4> i;
 | 
					        std::array<Math::Vec4<u8>, 4> i;
 | 
				
			||||||
    } uniforms;
 | 
					    } uniforms;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    static size_t UniformOffset(RegisterType type, unsigned index) {
 | 
				
			||||||
 | 
					        switch (type) {
 | 
				
			||||||
 | 
					        case RegisterType::FloatUniform:
 | 
				
			||||||
 | 
					            return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        case RegisterType::BoolUniform:
 | 
				
			||||||
 | 
					            return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        case RegisterType::IntUniform:
 | 
				
			||||||
 | 
					            return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        default:
 | 
				
			||||||
 | 
					            UNREACHABLE();
 | 
				
			||||||
 | 
					            return 0;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::array<u32, 1024> program_code;
 | 
					    std::array<u32, 1024> program_code;
 | 
				
			||||||
    std::array<u32, 1024> swizzle_data;
 | 
					    std::array<u32, 1024> swizzle_data;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -41,11 +41,11 @@ struct CallStackElement {
 | 
				
			|||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<bool Debug>
 | 
					template<bool Debug>
 | 
				
			||||||
void RunInterpreter(UnitState<Debug>& state) {
 | 
					void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) {
 | 
				
			||||||
    // TODO: Is there a maximal size for this?
 | 
					    // TODO: Is there a maximal size for this?
 | 
				
			||||||
    boost::container::static_vector<CallStackElement, 16> call_stack;
 | 
					    boost::container::static_vector<CallStackElement, 16> call_stack;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    u32 program_counter = g_state.regs.vs.main_offset;
 | 
					    u32 program_counter = offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const auto& uniforms = g_state.vs.uniforms;
 | 
					    const auto& uniforms = g_state.vs.uniforms;
 | 
				
			||||||
    const auto& swizzle_data = g_state.vs.swizzle_data;
 | 
					    const auto& swizzle_data = g_state.vs.swizzle_data;
 | 
				
			||||||
@ -647,8 +647,8 @@ void RunInterpreter(UnitState<Debug>& state) {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Explicit instantiation
 | 
					// Explicit instantiation
 | 
				
			||||||
template void RunInterpreter(UnitState<false>& state);
 | 
					template void RunInterpreter(const ShaderSetup& setup, UnitState<false>& state, unsigned offset);
 | 
				
			||||||
template void RunInterpreter(UnitState<true>& state);
 | 
					template void RunInterpreter(const ShaderSetup& setup, UnitState<true>& state, unsigned offset);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
} // namespace
 | 
					} // namespace
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -11,7 +11,7 @@ namespace Shader {
 | 
				
			|||||||
template <bool Debug> struct UnitState;
 | 
					template <bool Debug> struct UnitState;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<bool Debug>
 | 
					template<bool Debug>
 | 
				
			||||||
void RunInterpreter(UnitState<Debug>& state);
 | 
					void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
} // namespace
 | 
					} // namespace
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -102,7 +102,7 @@ const JitFunction instr_table[64] = {
 | 
				
			|||||||
// purposes, as documented below:
 | 
					// purposes, as documented below:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/// Pointer to the uniform memory
 | 
					/// Pointer to the uniform memory
 | 
				
			||||||
static const X64Reg UNIFORMS = R9;
 | 
					static const X64Reg SETUP = R9;
 | 
				
			||||||
/// The two 32-bit VS address offset registers set by the MOVA instruction
 | 
					/// The two 32-bit VS address offset registers set by the MOVA instruction
 | 
				
			||||||
static const X64Reg ADDROFFS_REG_0 = R10;
 | 
					static const X64Reg ADDROFFS_REG_0 = R10;
 | 
				
			||||||
static const X64Reg ADDROFFS_REG_1 = R11;
 | 
					static const X64Reg ADDROFFS_REG_1 = R11;
 | 
				
			||||||
@ -117,7 +117,7 @@ static const X64Reg COND0 = R13;
 | 
				
			|||||||
/// Result of the previous CMP instruction for the Y-component comparison
 | 
					/// Result of the previous CMP instruction for the Y-component comparison
 | 
				
			||||||
static const X64Reg COND1 = R14;
 | 
					static const X64Reg COND1 = R14;
 | 
				
			||||||
/// Pointer to the UnitState instance for the current VS unit
 | 
					/// Pointer to the UnitState instance for the current VS unit
 | 
				
			||||||
static const X64Reg REGISTERS = R15;
 | 
					static const X64Reg STATE = R15;
 | 
				
			||||||
/// SIMD scratch register
 | 
					/// SIMD scratch register
 | 
				
			||||||
static const X64Reg SCRATCH = XMM0;
 | 
					static const X64Reg SCRATCH = XMM0;
 | 
				
			||||||
/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
 | 
					/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
 | 
				
			||||||
@ -136,7 +136,7 @@ static const X64Reg NEGBIT = XMM15;
 | 
				
			|||||||
// State registers that must not be modified by external functions calls
 | 
					// State registers that must not be modified by external functions calls
 | 
				
			||||||
// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
 | 
					// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
 | 
				
			||||||
static const BitSet32 persistent_regs = {
 | 
					static const BitSet32 persistent_regs = {
 | 
				
			||||||
    UNIFORMS, REGISTERS, // Pointers to register blocks
 | 
					    SETUP, STATE, // Pointers to register blocks
 | 
				
			||||||
    ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
 | 
					    ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
 | 
				
			||||||
    ONE+16, NEGBIT+16, // Constants
 | 
					    ONE+16, NEGBIT+16, // Constants
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
@ -177,10 +177,10 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
 | 
				
			|||||||
    size_t src_offset;
 | 
					    size_t src_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
 | 
					    if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
 | 
				
			||||||
        src_ptr = UNIFORMS;
 | 
					        src_ptr = SETUP;
 | 
				
			||||||
        src_offset = src_reg.GetIndex() * sizeof(float24) * 4;
 | 
					        src_offset = ShaderSetup::UniformOffset(RegisterType::FloatUniform, src_reg.GetIndex());
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        src_ptr = REGISTERS;
 | 
					        src_ptr = STATE;
 | 
				
			||||||
        src_offset = UnitState<false>::InputOffset(src_reg);
 | 
					        src_offset = UnitState<false>::InputOffset(src_reg);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -264,11 +264,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
 | 
				
			|||||||
    // If all components are enabled, write the result to the destination register
 | 
					    // If all components are enabled, write the result to the destination register
 | 
				
			||||||
    if (swiz.dest_mask == NO_DEST_REG_MASK) {
 | 
					    if (swiz.dest_mask == NO_DEST_REG_MASK) {
 | 
				
			||||||
        // Store dest back to memory
 | 
					        // Store dest back to memory
 | 
				
			||||||
        MOVAPS(MDisp(REGISTERS, dest_offset_disp), src);
 | 
					        MOVAPS(MDisp(STATE, dest_offset_disp), src);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        // Not all components are enabled, so mask the result when storing to the destination register...
 | 
					        // Not all components are enabled, so mask the result when storing to the destination register...
 | 
				
			||||||
        MOVAPS(SCRATCH, MDisp(REGISTERS, dest_offset_disp));
 | 
					        MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (Common::GetCPUCaps().sse4_1) {
 | 
					        if (Common::GetCPUCaps().sse4_1) {
 | 
				
			||||||
            u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
 | 
					            u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
 | 
				
			||||||
@ -287,7 +287,7 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // Store dest back to memory
 | 
					        // Store dest back to memory
 | 
				
			||||||
        MOVAPS(MDisp(REGISTERS, dest_offset_disp), SCRATCH);
 | 
					        MOVAPS(MDisp(STATE, dest_offset_disp), SCRATCH);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -336,8 +336,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void JitShader::Compile_UniformCondition(Instruction instr) {
 | 
					void JitShader::Compile_UniformCondition(Instruction instr) {
 | 
				
			||||||
    int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool));
 | 
					    int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id);
 | 
				
			||||||
    CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
 | 
					    CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
BitSet32 JitShader::PersistentCallerSavedRegs() {
 | 
					BitSet32 JitShader::PersistentCallerSavedRegs() {
 | 
				
			||||||
@ -714,8 +714,8 @@ void JitShader::Compile_LOOP(Instruction instr) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    looping = true;
 | 
					    looping = true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    int offset = offsetof(decltype(g_state.vs.uniforms), i) + (instr.flow_control.int_uniform_id * sizeof(Math::Vec4<u8>));
 | 
					    int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
 | 
				
			||||||
    MOV(32, R(LOOPCOUNT), MDisp(UNIFORMS, offset));
 | 
					    MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset));
 | 
				
			||||||
    MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
 | 
					    MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
 | 
				
			||||||
    SHR(32, R(LOOPCOUNT_REG), Imm8(8));
 | 
					    SHR(32, R(LOOPCOUNT_REG), Imm8(8));
 | 
				
			||||||
    AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
 | 
					    AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
 | 
				
			||||||
@ -826,8 +826,8 @@ void JitShader::Compile() {
 | 
				
			|||||||
    // The stack pointer is 8 modulo 16 at the entry of a procedure
 | 
					    // The stack pointer is 8 modulo 16 at the entry of a procedure
 | 
				
			||||||
    ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
 | 
					    ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1));
 | 
					    MOV(PTRBITS, R(SETUP), R(ABI_PARAM1));
 | 
				
			||||||
    MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms));
 | 
					    MOV(PTRBITS, R(STATE), R(ABI_PARAM2));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Zero address/loop  registers
 | 
					    // Zero address/loop  registers
 | 
				
			||||||
    XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0));
 | 
					    XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0));
 | 
				
			||||||
@ -845,7 +845,7 @@ void JitShader::Compile() {
 | 
				
			|||||||
    MOVAPS(NEGBIT, MatR(RAX));
 | 
					    MOVAPS(NEGBIT, MatR(RAX));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Jump to start of the shader program
 | 
					    // Jump to start of the shader program
 | 
				
			||||||
    JMPptr(R(ABI_PARAM2));
 | 
					    JMPptr(R(ABI_PARAM3));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Compile entire program
 | 
					    // Compile entire program
 | 
				
			||||||
    Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
 | 
					    Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
 | 
				
			||||||
 | 
				
			|||||||
@ -36,8 +36,8 @@ class JitShader : public Gen::XCodeBlock {
 | 
				
			|||||||
public:
 | 
					public:
 | 
				
			||||||
    JitShader();
 | 
					    JitShader();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void Run(void* registers, unsigned offset) const {
 | 
					    void Run(const ShaderSetup& setup, UnitState<false>& state, unsigned offset) const {
 | 
				
			||||||
        program(registers, code_ptr[offset]);
 | 
					        program(&setup, &state, code_ptr[offset]);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void Compile();
 | 
					    void Compile();
 | 
				
			||||||
@ -117,7 +117,7 @@ private:
 | 
				
			|||||||
    /// Branches that need to be fixed up once the entire shader program is compiled
 | 
					    /// Branches that need to be fixed up once the entire shader program is compiled
 | 
				
			||||||
    std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
 | 
					    std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    using CompiledShader = void(void* registers, const u8* start_addr);
 | 
					    using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
 | 
				
			||||||
    CompiledShader* program = nullptr;
 | 
					    CompiledShader* program = nullptr;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user