mirror of
				https://git.zaroz.cloud/nintendo-back-up/yuzu/yuzu-mainline.git
				synced 2025-03-21 01:53:15 +00:00 
			
		
		
		
	Merge pull request #2609 from FernandoS27/new-scan
Implement a New Shader Scanner, Decompile Flow Stack and implement BRX BRA.CC
This commit is contained in:
		
						commit
						bb67091c77
					
				| @ -82,6 +82,8 @@ set(HASH_FILES | ||||
|     "${VIDEO_CORE}/shader/decode/shift.cpp" | ||||
|     "${VIDEO_CORE}/shader/decode/video.cpp" | ||||
|     "${VIDEO_CORE}/shader/decode/xmad.cpp" | ||||
|     "${VIDEO_CORE}/shader/control_flow.cpp" | ||||
|     "${VIDEO_CORE}/shader/control_flow.h" | ||||
|     "${VIDEO_CORE}/shader/decode.cpp" | ||||
|     "${VIDEO_CORE}/shader/node.h" | ||||
|     "${VIDEO_CORE}/shader/node_helper.cpp" | ||||
|  | ||||
| @ -56,6 +56,8 @@ add_custom_command(OUTPUT scm_rev.cpp | ||||
|       "${VIDEO_CORE}/shader/decode/shift.cpp" | ||||
|       "${VIDEO_CORE}/shader/decode/video.cpp" | ||||
|       "${VIDEO_CORE}/shader/decode/xmad.cpp" | ||||
|       "${VIDEO_CORE}/shader/control_flow.cpp" | ||||
|       "${VIDEO_CORE}/shader/control_flow.h" | ||||
|       "${VIDEO_CORE}/shader/decode.cpp" | ||||
|       "${VIDEO_CORE}/shader/node.h" | ||||
|       "${VIDEO_CORE}/shader/node_helper.cpp" | ||||
|  | ||||
| @ -103,6 +103,8 @@ add_library(video_core STATIC | ||||
|     shader/decode/video.cpp | ||||
|     shader/decode/xmad.cpp | ||||
|     shader/decode/other.cpp | ||||
|     shader/control_flow.cpp | ||||
|     shader/control_flow.h | ||||
|     shader/decode.cpp | ||||
|     shader/node_helper.cpp | ||||
|     shader/node_helper.h | ||||
|  | ||||
| @ -1367,6 +1367,20 @@ union Instruction { | ||||
|         } | ||||
|     } bra; | ||||
| 
 | ||||
|     union { | ||||
|         BitField<20, 24, u64> target; | ||||
|         BitField<5, 1, u64> constant_buffer; | ||||
| 
 | ||||
|         s32 GetBranchExtend() const { | ||||
|             // Sign extend the branch target offset
 | ||||
|             u32 mask = 1U << (24 - 1); | ||||
|             u32 value = static_cast<u32>(target); | ||||
|             // The branch offset is relative to the next instruction and is stored in bytes, so
 | ||||
|             // divide it by the size of an instruction and add 1 to it.
 | ||||
|             return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1; | ||||
|         } | ||||
|     } brx; | ||||
| 
 | ||||
|     union { | ||||
|         BitField<39, 1, u64> emit; // EmitVertex
 | ||||
|         BitField<40, 1, u64> cut;  // EndPrimitive
 | ||||
| @ -1464,6 +1478,7 @@ public: | ||||
|         BFE_IMM, | ||||
|         BFI_IMM_R, | ||||
|         BRA, | ||||
|         BRX, | ||||
|         PBK, | ||||
|         LD_A, | ||||
|         LD_L, | ||||
| @ -1738,6 +1753,7 @@ private: | ||||
|             INST("111000101001----", Id::SSY, Type::Flow, "SSY"), | ||||
|             INST("111000101010----", Id::PBK, Type::Flow, "PBK"), | ||||
|             INST("111000100100----", Id::BRA, Type::Flow, "BRA"), | ||||
|             INST("111000100101----", Id::BRX, Type::Flow, "BRX"), | ||||
|             INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), | ||||
|             INST("111000110100---", Id::BRK, Type::Flow, "BRK"), | ||||
|             INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), | ||||
|  | ||||
| @ -129,9 +129,11 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | ||||
| 
 | ||||
| /// Hashes one (or two) program streams
 | ||||
| u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code, | ||||
|                         const ProgramCode& code_b) { | ||||
|     u64 unique_identifier = | ||||
|         Common::CityHash64(reinterpret_cast<const char*>(code.data()), CalculateProgramSize(code)); | ||||
|                         const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { | ||||
|     if (size_a == 0) { | ||||
|         size_a = CalculateProgramSize(code); | ||||
|     } | ||||
|     u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); | ||||
|     if (program_type != Maxwell::ShaderProgram::VertexA) { | ||||
|         return unique_identifier; | ||||
|     } | ||||
| @ -140,8 +142,11 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& | ||||
|     std::size_t seed = 0; | ||||
|     boost::hash_combine(seed, unique_identifier); | ||||
| 
 | ||||
|     const u64 identifier_b = Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), | ||||
|                                                 CalculateProgramSize(code_b)); | ||||
|     if (size_b == 0) { | ||||
|         size_b = CalculateProgramSize(code_b); | ||||
|     } | ||||
|     const u64 identifier_b = | ||||
|         Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b); | ||||
|     boost::hash_combine(seed, identifier_b); | ||||
|     return static_cast<u64>(seed); | ||||
| } | ||||
| @ -150,14 +155,17 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& | ||||
| GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type, | ||||
|                                       ProgramCode program_code, ProgramCode program_code_b) { | ||||
|     GLShader::ShaderSetup setup(program_code); | ||||
|     setup.program.size_a = CalculateProgramSize(program_code); | ||||
|     setup.program.size_b = 0; | ||||
|     if (program_type == Maxwell::ShaderProgram::VertexA) { | ||||
|         // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
 | ||||
|         // Conventional HW does not support this, so we combine VertexA and VertexB into one
 | ||||
|         // stage here.
 | ||||
|         setup.SetProgramB(program_code_b); | ||||
|         setup.program.size_b = CalculateProgramSize(program_code_b); | ||||
|     } | ||||
|     setup.program.unique_identifier = | ||||
|         GetUniqueIdentifier(program_type, program_code, program_code_b); | ||||
|     setup.program.unique_identifier = GetUniqueIdentifier( | ||||
|         program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); | ||||
| 
 | ||||
|     switch (program_type) { | ||||
|     case Maxwell::ShaderProgram::VertexA: | ||||
|  | ||||
| @ -191,10 +191,12 @@ public: | ||||
| 
 | ||||
|         // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
 | ||||
|         // unlikely that shaders will use 20 nested SSYs and PBKs.
 | ||||
|         constexpr u32 FLOW_STACK_SIZE = 20; | ||||
|         for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { | ||||
|             code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); | ||||
|             code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); | ||||
|         if (!ir.IsFlowStackDisabled()) { | ||||
|             constexpr u32 FLOW_STACK_SIZE = 20; | ||||
|             for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { | ||||
|                 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); | ||||
|                 code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         code.AddLine("while (true) {{"); | ||||
| @ -1555,6 +1557,14 @@ private: | ||||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     std::string BranchIndirect(Operation operation) { | ||||
|         const std::string op_a = VisitOperand(operation, 0, Type::Uint); | ||||
| 
 | ||||
|         code.AddLine("jmp_to = {};", op_a); | ||||
|         code.AddLine("break;"); | ||||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     std::string PushFlowStack(Operation operation) { | ||||
|         const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||||
|         const auto target = std::get_if<ImmediateNode>(&*operation[0]); | ||||
| @ -1789,6 +1799,7 @@ private: | ||||
|         &GLSLDecompiler::ImageStore, | ||||
| 
 | ||||
|         &GLSLDecompiler::Branch, | ||||
|         &GLSLDecompiler::BranchIndirect, | ||||
|         &GLSLDecompiler::PushFlowStack, | ||||
|         &GLSLDecompiler::PopFlowStack, | ||||
|         &GLSLDecompiler::Exit, | ||||
|  | ||||
| @ -29,14 +29,14 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | ||||
| }; | ||||
| 
 | ||||
| )"; | ||||
|     const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | ||||
|     const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | ||||
|     ProgramResult program = | ||||
|         Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); | ||||
| 
 | ||||
|     out += program.first; | ||||
| 
 | ||||
|     if (setup.IsDualProgram()) { | ||||
|         const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); | ||||
|         const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b); | ||||
|         ProgramResult program_b = | ||||
|             Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); | ||||
| 
 | ||||
| @ -80,7 +80,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | ||||
| }; | ||||
| 
 | ||||
| )"; | ||||
|     const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | ||||
|     const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | ||||
|     ProgramResult program = | ||||
|         Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); | ||||
|     out += program.first; | ||||
| @ -115,7 +115,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { | ||||
| }; | ||||
| 
 | ||||
| )"; | ||||
|     const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | ||||
|     const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | ||||
|     ProgramResult program = | ||||
|         Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); | ||||
| 
 | ||||
|  | ||||
| @ -27,6 +27,8 @@ struct ShaderSetup { | ||||
|         ProgramCode code; | ||||
|         ProgramCode code_b; // Used for dual vertex shaders
 | ||||
|         u64 unique_identifier; | ||||
|         std::size_t size_a; | ||||
|         std::size_t size_b; | ||||
|     } program; | ||||
| 
 | ||||
|     /// Used in scenarios where we have a dual vertex shaders
 | ||||
|  | ||||
| @ -949,6 +949,14 @@ private: | ||||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     Id BranchIndirect(Operation operation) { | ||||
|         const Id op_a = VisitOperand<Type::Uint>(operation, 0); | ||||
| 
 | ||||
|         Emit(OpStore(jmp_to, op_a)); | ||||
|         BranchingOp([&]() { Emit(OpBranch(continue_label)); }); | ||||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     Id PushFlowStack(Operation operation) { | ||||
|         const auto target = std::get_if<ImmediateNode>(&*operation[0]); | ||||
|         ASSERT(target); | ||||
| @ -1334,6 +1342,7 @@ private: | ||||
|         &SPIRVDecompiler::ImageStore, | ||||
| 
 | ||||
|         &SPIRVDecompiler::Branch, | ||||
|         &SPIRVDecompiler::BranchIndirect, | ||||
|         &SPIRVDecompiler::PushFlowStack, | ||||
|         &SPIRVDecompiler::PopFlowStack, | ||||
|         &SPIRVDecompiler::Exit, | ||||
|  | ||||
							
								
								
									
										476
									
								
								src/video_core/shader/control_flow.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										476
									
								
								src/video_core/shader/control_flow.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,476 @@ | ||||
| // Copyright 2019 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <list> | ||||
| #include <map> | ||||
| #include <stack> | ||||
| #include <unordered_map> | ||||
| #include <unordered_set> | ||||
| #include <vector> | ||||
| 
 | ||||
| #include "common/assert.h" | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/shader/control_flow.h" | ||||
| #include "video_core/shader/shader_ir.h" | ||||
| 
 | ||||
| namespace VideoCommon::Shader { | ||||
| 
 | ||||
| using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
| 
 | ||||
| constexpr s32 unassigned_branch = -2; | ||||
| 
 | ||||
| struct Query { | ||||
|     u32 address{}; | ||||
|     std::stack<u32> ssy_stack{}; | ||||
|     std::stack<u32> pbk_stack{}; | ||||
| }; | ||||
| 
 | ||||
| struct BlockStack { | ||||
|     BlockStack() = default; | ||||
|     BlockStack(const BlockStack& b) = default; | ||||
|     BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} | ||||
|     std::stack<u32> ssy_stack{}; | ||||
|     std::stack<u32> pbk_stack{}; | ||||
| }; | ||||
| 
 | ||||
| struct BlockBranchInfo { | ||||
|     Condition condition{}; | ||||
|     s32 address{exit_branch}; | ||||
|     bool kill{}; | ||||
|     bool is_sync{}; | ||||
|     bool is_brk{}; | ||||
|     bool ignore{}; | ||||
| }; | ||||
| 
 | ||||
| struct BlockInfo { | ||||
|     u32 start{}; | ||||
|     u32 end{}; | ||||
|     bool visited{}; | ||||
|     BlockBranchInfo branch{}; | ||||
| 
 | ||||
|     bool IsInside(const u32 address) const { | ||||
|         return start <= address && address <= end; | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| struct CFGRebuildState { | ||||
|     explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, | ||||
|                              const u32 start) | ||||
|         : program_code{program_code}, program_size{program_size}, start{start} {} | ||||
| 
 | ||||
|     u32 start{}; | ||||
|     std::vector<BlockInfo> block_info{}; | ||||
|     std::list<u32> inspect_queries{}; | ||||
|     std::list<Query> queries{}; | ||||
|     std::unordered_map<u32, u32> registered{}; | ||||
|     std::unordered_set<u32> labels{}; | ||||
|     std::map<u32, u32> ssy_labels{}; | ||||
|     std::map<u32, u32> pbk_labels{}; | ||||
|     std::unordered_map<u32, BlockStack> stacks{}; | ||||
|     const ProgramCode& program_code; | ||||
|     const std::size_t program_size; | ||||
| }; | ||||
| 
 | ||||
| enum class BlockCollision : u32 { None, Found, Inside }; | ||||
| 
 | ||||
| std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) { | ||||
|     const auto& blocks = state.block_info; | ||||
|     for (u32 index = 0; index < blocks.size(); index++) { | ||||
|         if (blocks[index].start == address) { | ||||
|             return {BlockCollision::Found, index}; | ||||
|         } | ||||
|         if (blocks[index].IsInside(address)) { | ||||
|             return {BlockCollision::Inside, index}; | ||||
|         } | ||||
|     } | ||||
|     return {BlockCollision::None, -1}; | ||||
| } | ||||
| 
 | ||||
| struct ParseInfo { | ||||
|     BlockBranchInfo branch_info{}; | ||||
|     u32 end_address{}; | ||||
| }; | ||||
| 
 | ||||
| BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { | ||||
|     auto& it = state.block_info.emplace_back(); | ||||
|     it.start = start; | ||||
|     it.end = end; | ||||
|     const u32 index = static_cast<u32>(state.block_info.size() - 1); | ||||
|     state.registered.insert({start, index}); | ||||
|     return it; | ||||
| } | ||||
| 
 | ||||
| Pred GetPredicate(u32 index, bool negated) { | ||||
|     return static_cast<Pred>(index + (negated ? 8 : 0)); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Returns whether the instruction at the specified offset is a 'sched' instruction. | ||||
|  * Sched instructions always appear before a sequence of 3 instructions. | ||||
|  */ | ||||
| constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | ||||
|     constexpr u32 SchedPeriod = 4; | ||||
|     u32 absolute_offset = offset - main_offset; | ||||
| 
 | ||||
|     return (absolute_offset % SchedPeriod) == 0; | ||||
| } | ||||
| 
 | ||||
| enum class ParseResult : u32 { | ||||
|     ControlCaught, | ||||
|     BlockEnd, | ||||
|     AbnormalFlow, | ||||
| }; | ||||
| 
 | ||||
| std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { | ||||
|     u32 offset = static_cast<u32>(address); | ||||
|     const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction)); | ||||
|     ParseInfo parse_info{}; | ||||
| 
 | ||||
|     const auto insert_label = [](CFGRebuildState& state, u32 address) { | ||||
|         const auto pair = state.labels.emplace(address); | ||||
|         if (pair.second) { | ||||
|             state.inspect_queries.push_back(address); | ||||
|         } | ||||
|     }; | ||||
| 
 | ||||
|     while (true) { | ||||
|         if (offset >= end_address) { | ||||
|             // ASSERT_OR_EXECUTE can't be used, as it ignores the break
 | ||||
|             ASSERT_MSG(false, "Shader passed the current limit!"); | ||||
|             parse_info.branch_info.address = exit_branch; | ||||
|             parse_info.branch_info.ignore = false; | ||||
|             break; | ||||
|         } | ||||
|         if (state.registered.count(offset) != 0) { | ||||
|             parse_info.branch_info.address = offset; | ||||
|             parse_info.branch_info.ignore = true; | ||||
|             break; | ||||
|         } | ||||
|         if (IsSchedInstruction(offset, state.start)) { | ||||
|             offset++; | ||||
|             continue; | ||||
|         } | ||||
|         const Instruction instr = {state.program_code[offset]}; | ||||
|         const auto opcode = OpCode::Decode(instr); | ||||
|         if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) { | ||||
|             offset++; | ||||
|             continue; | ||||
|         } | ||||
| 
 | ||||
|         switch (opcode->get().GetId()) { | ||||
|         case OpCode::Id::EXIT: { | ||||
|             const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||||
|             parse_info.branch_info.condition.predicate = | ||||
|                 GetPredicate(pred_index, instr.negate_pred != 0); | ||||
|             if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||||
|                 offset++; | ||||
|                 continue; | ||||
|             } | ||||
|             const ConditionCode cc = instr.flow_condition_code; | ||||
|             parse_info.branch_info.condition.cc = cc; | ||||
|             if (cc == ConditionCode::F) { | ||||
|                 offset++; | ||||
|                 continue; | ||||
|             } | ||||
|             parse_info.branch_info.address = exit_branch; | ||||
|             parse_info.branch_info.kill = false; | ||||
|             parse_info.branch_info.is_sync = false; | ||||
|             parse_info.branch_info.is_brk = false; | ||||
|             parse_info.branch_info.ignore = false; | ||||
|             parse_info.end_address = offset; | ||||
| 
 | ||||
|             return {ParseResult::ControlCaught, parse_info}; | ||||
|         } | ||||
|         case OpCode::Id::BRA: { | ||||
|             if (instr.bra.constant_buffer != 0) { | ||||
|                 return {ParseResult::AbnormalFlow, parse_info}; | ||||
|             } | ||||
|             const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||||
|             parse_info.branch_info.condition.predicate = | ||||
|                 GetPredicate(pred_index, instr.negate_pred != 0); | ||||
|             if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||||
|                 offset++; | ||||
|                 continue; | ||||
|             } | ||||
|             const ConditionCode cc = instr.flow_condition_code; | ||||
|             parse_info.branch_info.condition.cc = cc; | ||||
|             if (cc == ConditionCode::F) { | ||||
|                 offset++; | ||||
|                 continue; | ||||
|             } | ||||
|             const u32 branch_offset = offset + instr.bra.GetBranchTarget(); | ||||
|             if (branch_offset == 0) { | ||||
|                 parse_info.branch_info.address = exit_branch; | ||||
|             } else { | ||||
|                 parse_info.branch_info.address = branch_offset; | ||||
|             } | ||||
|             insert_label(state, branch_offset); | ||||
|             parse_info.branch_info.kill = false; | ||||
|             parse_info.branch_info.is_sync = false; | ||||
|             parse_info.branch_info.is_brk = false; | ||||
|             parse_info.branch_info.ignore = false; | ||||
|             parse_info.end_address = offset; | ||||
| 
 | ||||
|             return {ParseResult::ControlCaught, parse_info}; | ||||
|         } | ||||
|         case OpCode::Id::SYNC: { | ||||
|             const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||||
|             parse_info.branch_info.condition.predicate = | ||||
|                 GetPredicate(pred_index, instr.negate_pred != 0); | ||||
|             if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||||
|                 offset++; | ||||
|                 continue; | ||||
|             } | ||||
|             const ConditionCode cc = instr.flow_condition_code; | ||||
|             parse_info.branch_info.condition.cc = cc; | ||||
|             if (cc == ConditionCode::F) { | ||||
|                 offset++; | ||||
|                 continue; | ||||
|             } | ||||
|             parse_info.branch_info.address = unassigned_branch; | ||||
|             parse_info.branch_info.kill = false; | ||||
|             parse_info.branch_info.is_sync = true; | ||||
|             parse_info.branch_info.is_brk = false; | ||||
|             parse_info.branch_info.ignore = false; | ||||
|             parse_info.end_address = offset; | ||||
| 
 | ||||
|             return {ParseResult::ControlCaught, parse_info}; | ||||
|         } | ||||
|         case OpCode::Id::BRK: { | ||||
|             const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||||
|             parse_info.branch_info.condition.predicate = | ||||
|                 GetPredicate(pred_index, instr.negate_pred != 0); | ||||
|             if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||||
|                 offset++; | ||||
|                 continue; | ||||
|             } | ||||
|             const ConditionCode cc = instr.flow_condition_code; | ||||
|             parse_info.branch_info.condition.cc = cc; | ||||
|             if (cc == ConditionCode::F) { | ||||
|                 offset++; | ||||
|                 continue; | ||||
|             } | ||||
|             parse_info.branch_info.address = unassigned_branch; | ||||
|             parse_info.branch_info.kill = false; | ||||
|             parse_info.branch_info.is_sync = false; | ||||
|             parse_info.branch_info.is_brk = true; | ||||
|             parse_info.branch_info.ignore = false; | ||||
|             parse_info.end_address = offset; | ||||
| 
 | ||||
|             return {ParseResult::ControlCaught, parse_info}; | ||||
|         } | ||||
|         case OpCode::Id::KIL: { | ||||
|             const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||||
|             parse_info.branch_info.condition.predicate = | ||||
|                 GetPredicate(pred_index, instr.negate_pred != 0); | ||||
|             if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||||
|                 offset++; | ||||
|                 continue; | ||||
|             } | ||||
|             const ConditionCode cc = instr.flow_condition_code; | ||||
|             parse_info.branch_info.condition.cc = cc; | ||||
|             if (cc == ConditionCode::F) { | ||||
|                 offset++; | ||||
|                 continue; | ||||
|             } | ||||
|             parse_info.branch_info.address = exit_branch; | ||||
|             parse_info.branch_info.kill = true; | ||||
|             parse_info.branch_info.is_sync = false; | ||||
|             parse_info.branch_info.is_brk = false; | ||||
|             parse_info.branch_info.ignore = false; | ||||
|             parse_info.end_address = offset; | ||||
| 
 | ||||
|             return {ParseResult::ControlCaught, parse_info}; | ||||
|         } | ||||
|         case OpCode::Id::SSY: { | ||||
|             const u32 target = offset + instr.bra.GetBranchTarget(); | ||||
|             insert_label(state, target); | ||||
|             state.ssy_labels.emplace(offset, target); | ||||
|             break; | ||||
|         } | ||||
|         case OpCode::Id::PBK: { | ||||
|             const u32 target = offset + instr.bra.GetBranchTarget(); | ||||
|             insert_label(state, target); | ||||
|             state.pbk_labels.emplace(offset, target); | ||||
|             break; | ||||
|         } | ||||
|         case OpCode::Id::BRX: { | ||||
|             return {ParseResult::AbnormalFlow, parse_info}; | ||||
|         } | ||||
|         default: | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         offset++; | ||||
|     } | ||||
|     parse_info.branch_info.kill = false; | ||||
|     parse_info.branch_info.is_sync = false; | ||||
|     parse_info.branch_info.is_brk = false; | ||||
|     parse_info.end_address = offset - 1; | ||||
|     return {ParseResult::BlockEnd, parse_info}; | ||||
| } | ||||
| 
 | ||||
| bool TryInspectAddress(CFGRebuildState& state) { | ||||
|     if (state.inspect_queries.empty()) { | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     const u32 address = state.inspect_queries.front(); | ||||
|     state.inspect_queries.pop_front(); | ||||
|     const auto [result, block_index] = TryGetBlock(state, address); | ||||
|     switch (result) { | ||||
|     case BlockCollision::Found: { | ||||
|         return true; | ||||
|     } | ||||
|     case BlockCollision::Inside: { | ||||
|         // This case is the tricky one:
 | ||||
|         // We need to Split the block in 2 sepparate blocks
 | ||||
|         const u32 end = state.block_info[block_index].end; | ||||
|         BlockInfo& new_block = CreateBlockInfo(state, address, end); | ||||
|         BlockInfo& current_block = state.block_info[block_index]; | ||||
|         current_block.end = address - 1; | ||||
|         new_block.branch = current_block.branch; | ||||
|         BlockBranchInfo forward_branch{}; | ||||
|         forward_branch.address = address; | ||||
|         forward_branch.ignore = true; | ||||
|         current_block.branch = forward_branch; | ||||
|         return true; | ||||
|     } | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
|     const auto [parse_result, parse_info] = ParseCode(state, address); | ||||
|     if (parse_result == ParseResult::AbnormalFlow) { | ||||
|         // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction
 | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); | ||||
|     block_info.branch = parse_info.branch_info; | ||||
|     if (parse_info.branch_info.condition.IsUnconditional()) { | ||||
|         return true; | ||||
|     } | ||||
| 
 | ||||
|     const u32 fallthrough_address = parse_info.end_address + 1; | ||||
|     state.inspect_queries.push_front(fallthrough_address); | ||||
|     return true; | ||||
| } | ||||
| 
 | ||||
| bool TryQuery(CFGRebuildState& state) { | ||||
|     const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels, | ||||
|                                   BlockInfo& block) { | ||||
|         auto gather_start = labels.lower_bound(block.start); | ||||
|         const auto gather_end = labels.upper_bound(block.end); | ||||
|         while (gather_start != gather_end) { | ||||
|             cc.push(gather_start->second); | ||||
|             gather_start++; | ||||
|         } | ||||
|     }; | ||||
|     if (state.queries.empty()) { | ||||
|         return false; | ||||
|     } | ||||
|     Query& q = state.queries.front(); | ||||
|     const u32 block_index = state.registered[q.address]; | ||||
|     BlockInfo& block = state.block_info[block_index]; | ||||
|     // If the block is visted, check if the stacks match, else gather the ssy/pbk
 | ||||
|     // labels into the current stack and look if the branch at the end of the block
 | ||||
|     // consumes a label. Schedule new queries accordingly
 | ||||
|     if (block.visited) { | ||||
|         BlockStack& stack = state.stacks[q.address]; | ||||
|         const bool all_okay = (stack.ssy_stack.size() == 0 || q.ssy_stack == stack.ssy_stack) && | ||||
|                               (stack.pbk_stack.size() == 0 || q.pbk_stack == stack.pbk_stack); | ||||
|         state.queries.pop_front(); | ||||
|         return all_okay; | ||||
|     } | ||||
|     block.visited = true; | ||||
|     state.stacks[q.address] = BlockStack{q}; | ||||
|     Query q2(q); | ||||
|     state.queries.pop_front(); | ||||
|     gather_labels(q2.ssy_stack, state.ssy_labels, block); | ||||
|     gather_labels(q2.pbk_stack, state.pbk_labels, block); | ||||
|     if (!block.branch.condition.IsUnconditional()) { | ||||
|         q2.address = block.end + 1; | ||||
|         state.queries.push_back(q2); | ||||
|     } | ||||
|     Query conditional_query{q2}; | ||||
|     if (block.branch.is_sync) { | ||||
|         if (block.branch.address == unassigned_branch) { | ||||
|             block.branch.address = conditional_query.ssy_stack.top(); | ||||
|         } | ||||
|         conditional_query.ssy_stack.pop(); | ||||
|     } | ||||
|     if (block.branch.is_brk) { | ||||
|         if (block.branch.address == unassigned_branch) { | ||||
|             block.branch.address = conditional_query.pbk_stack.top(); | ||||
|         } | ||||
|         conditional_query.pbk_stack.pop(); | ||||
|     } | ||||
|     conditional_query.address = block.branch.address; | ||||
|     state.queries.push_back(conditional_query); | ||||
|     return true; | ||||
| } | ||||
| 
 | ||||
| std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, | ||||
|                                               u32 start_address) { | ||||
|     CFGRebuildState state{program_code, program_size, start_address}; | ||||
|     // Inspect Code and generate blocks
 | ||||
|     state.labels.clear(); | ||||
|     state.labels.emplace(start_address); | ||||
|     state.inspect_queries.push_back(state.start); | ||||
|     while (!state.inspect_queries.empty()) { | ||||
|         if (!TryInspectAddress(state)) { | ||||
|             return {}; | ||||
|         } | ||||
|     } | ||||
|     // Decompile Stacks
 | ||||
|     Query start_query{}; | ||||
|     start_query.address = state.start; | ||||
|     state.queries.push_back(start_query); | ||||
|     bool decompiled = true; | ||||
|     while (!state.queries.empty()) { | ||||
|         if (!TryQuery(state)) { | ||||
|             decompiled = false; | ||||
|             break; | ||||
|         } | ||||
|     } | ||||
|     // Sort and organize results
 | ||||
|     std::sort(state.block_info.begin(), state.block_info.end(), | ||||
|               [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); | ||||
|     ShaderCharacteristics result_out{}; | ||||
|     result_out.decompilable = decompiled; | ||||
|     result_out.start = start_address; | ||||
|     result_out.end = start_address; | ||||
|     for (auto& block : state.block_info) { | ||||
|         ShaderBlock new_block{}; | ||||
|         new_block.start = block.start; | ||||
|         new_block.end = block.end; | ||||
|         new_block.ignore_branch = block.branch.ignore; | ||||
|         if (!new_block.ignore_branch) { | ||||
|             new_block.branch.cond = block.branch.condition; | ||||
|             new_block.branch.kills = block.branch.kill; | ||||
|             new_block.branch.address = block.branch.address; | ||||
|         } | ||||
|         result_out.end = std::max(result_out.end, block.end); | ||||
|         result_out.blocks.push_back(new_block); | ||||
|     } | ||||
|     if (result_out.decompilable) { | ||||
|         result_out.labels = std::move(state.labels); | ||||
|         return {result_out}; | ||||
|     } | ||||
|     // If it's not decompilable, merge the unlabelled blocks together
 | ||||
|     auto back = result_out.blocks.begin(); | ||||
|     auto next = std::next(back); | ||||
|     while (next != result_out.blocks.end()) { | ||||
|         if (state.labels.count(next->start) == 0 && next->start == back->end + 1) { | ||||
|             back->end = next->end; | ||||
|             next = result_out.blocks.erase(next); | ||||
|             continue; | ||||
|         } | ||||
|         back = next; | ||||
|         next++; | ||||
|     } | ||||
|     return {result_out}; | ||||
| } | ||||
| } // namespace VideoCommon::Shader
 | ||||
							
								
								
									
										63
									
								
								src/video_core/shader/control_flow.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								src/video_core/shader/control_flow.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,63 @@ | ||||
| // Copyright 2019 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <cstring> | ||||
| #include <list> | ||||
| #include <optional> | ||||
| #include <unordered_set> | ||||
| 
 | ||||
| #include "video_core/engines/shader_bytecode.h" | ||||
| #include "video_core/shader/shader_ir.h" | ||||
| 
 | ||||
| namespace VideoCommon::Shader { | ||||
| 
 | ||||
| using Tegra::Shader::ConditionCode; | ||||
| using Tegra::Shader::Pred; | ||||
| 
 | ||||
| constexpr s32 exit_branch = -1; | ||||
| 
 | ||||
| struct Condition { | ||||
|     Pred predicate{Pred::UnusedIndex}; | ||||
|     ConditionCode cc{ConditionCode::T}; | ||||
| 
 | ||||
|     bool IsUnconditional() const { | ||||
|         return predicate == Pred::UnusedIndex && cc == ConditionCode::T; | ||||
|     } | ||||
|     bool operator==(const Condition& other) const { | ||||
|         return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| struct ShaderBlock { | ||||
|     u32 start{}; | ||||
|     u32 end{}; | ||||
|     bool ignore_branch{}; | ||||
|     struct Branch { | ||||
|         Condition cond{}; | ||||
|         bool kills{}; | ||||
|         s32 address{}; | ||||
|         bool operator==(const Branch& b) const { | ||||
|             return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); | ||||
|         } | ||||
|     } branch{}; | ||||
|     bool operator==(const ShaderBlock& sb) const { | ||||
|         return std::tie(start, end, ignore_branch, branch) == | ||||
|                std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| struct ShaderCharacteristics { | ||||
|     std::list<ShaderBlock> blocks{}; | ||||
|     bool decompilable{}; | ||||
|     u32 start{}; | ||||
|     u32 end{}; | ||||
|     std::unordered_set<u32> labels{}; | ||||
| }; | ||||
| 
 | ||||
| std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, | ||||
|                                               u32 start_address); | ||||
| 
 | ||||
| } // namespace VideoCommon::Shader
 | ||||
| @ -11,6 +11,7 @@ | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/engines/shader_bytecode.h" | ||||
| #include "video_core/engines/shader_header.h" | ||||
| #include "video_core/shader/control_flow.h" | ||||
| #include "video_core/shader/node_helper.h" | ||||
| #include "video_core/shader/shader_ir.h" | ||||
| 
 | ||||
| @ -21,20 +22,6 @@ using Tegra::Shader::OpCode; | ||||
| 
 | ||||
| namespace { | ||||
| 
 | ||||
| /// Merges exit method of two parallel branches.
 | ||||
| constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) { | ||||
|     if (a == ExitMethod::Undetermined) { | ||||
|         return b; | ||||
|     } | ||||
|     if (b == ExitMethod::Undetermined) { | ||||
|         return a; | ||||
|     } | ||||
|     if (a == b) { | ||||
|         return a; | ||||
|     } | ||||
|     return ExitMethod::Conditional; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Returns whether the instruction at the specified offset is a 'sched' instruction. | ||||
|  * Sched instructions always appear before a sequence of 3 instructions. | ||||
| @ -51,87 +38,106 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | ||||
| void ShaderIR::Decode() { | ||||
|     std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | ||||
| 
 | ||||
|     std::set<u32> labels; | ||||
|     const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); | ||||
|     if (exit_method != ExitMethod::AlwaysEnd) { | ||||
|         UNREACHABLE_MSG("Program does not always end"); | ||||
|     } | ||||
| 
 | ||||
|     if (labels.empty()) { | ||||
|         basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); | ||||
|     disable_flow_stack = false; | ||||
|     const auto info = ScanFlow(program_code, program_size, main_offset); | ||||
|     if (info) { | ||||
|         const auto& shader_info = *info; | ||||
|         coverage_begin = shader_info.start; | ||||
|         coverage_end = shader_info.end; | ||||
|         if (shader_info.decompilable) { | ||||
|             disable_flow_stack = true; | ||||
|             const auto insert_block = ([this](NodeBlock& nodes, u32 label) { | ||||
|                 if (label == exit_branch) { | ||||
|                     return; | ||||
|                 } | ||||
|                 basic_blocks.insert({label, nodes}); | ||||
|             }); | ||||
|             const auto& blocks = shader_info.blocks; | ||||
|             NodeBlock current_block; | ||||
|             u32 current_label = exit_branch; | ||||
|             for (auto& block : blocks) { | ||||
|                 if (shader_info.labels.count(block.start) != 0) { | ||||
|                     insert_block(current_block, current_label); | ||||
|                     current_block.clear(); | ||||
|                     current_label = block.start; | ||||
|                 } | ||||
|                 if (!block.ignore_branch) { | ||||
|                     DecodeRangeInner(current_block, block.start, block.end); | ||||
|                     InsertControlFlow(current_block, block); | ||||
|                 } else { | ||||
|                     DecodeRangeInner(current_block, block.start, block.end + 1); | ||||
|                 } | ||||
|             } | ||||
|             insert_block(current_block, current_label); | ||||
|             return; | ||||
|         } | ||||
|         LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method"); | ||||
|         // we can't decompile it, fallback to standard method
 | ||||
|         for (const auto& block : shader_info.blocks) { | ||||
|             basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); | ||||
|         } | ||||
|         return; | ||||
|     } | ||||
|     LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling"); | ||||
| 
 | ||||
|     labels.insert(main_offset); | ||||
| 
 | ||||
|     for (const u32 label : labels) { | ||||
|         const auto next_it = labels.lower_bound(label + 1); | ||||
|         const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; | ||||
| 
 | ||||
|         basic_blocks.insert({label, DecodeRange(label, next_label)}); | ||||
|     // Now we need to deal with an undecompilable shader. We need to brute force
 | ||||
|     // a shader that captures every position.
 | ||||
|     coverage_begin = main_offset; | ||||
|     const u32 shader_end = static_cast<u32>(program_size / sizeof(u64)); | ||||
|     coverage_end = shader_end; | ||||
|     for (u32 label = main_offset; label < shader_end; label++) { | ||||
|         basic_blocks.insert({label, DecodeRange(label, label + 1)}); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { | ||||
|     const auto [iter, inserted] = | ||||
|         exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); | ||||
|     ExitMethod& exit_method = iter->second; | ||||
|     if (!inserted) | ||||
|         return exit_method; | ||||
| 
 | ||||
|     for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) { | ||||
|         coverage_begin = std::min(coverage_begin, offset); | ||||
|         coverage_end = std::max(coverage_end, offset + 1); | ||||
| 
 | ||||
|         const Instruction instr = {program_code[offset]}; | ||||
|         const auto opcode = OpCode::Decode(instr); | ||||
|         if (!opcode) | ||||
|             continue; | ||||
|         switch (opcode->get().GetId()) { | ||||
|         case OpCode::Id::EXIT: { | ||||
|             // The EXIT instruction can be predicated, which means that the shader can conditionally
 | ||||
|             // end on this instruction. We have to consider the case where the condition is not met
 | ||||
|             // and check the exit method of that other basic block.
 | ||||
|             using Tegra::Shader::Pred; | ||||
|             if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { | ||||
|                 return exit_method = ExitMethod::AlwaysEnd; | ||||
|             } else { | ||||
|                 const ExitMethod not_met = Scan(offset + 1, end, labels); | ||||
|                 return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met); | ||||
|             } | ||||
|         } | ||||
|         case OpCode::Id::BRA: { | ||||
|             const u32 target = offset + instr.bra.GetBranchTarget(); | ||||
|             labels.insert(target); | ||||
|             const ExitMethod no_jmp = Scan(offset + 1, end, labels); | ||||
|             const ExitMethod jmp = Scan(target, end, labels); | ||||
|             return exit_method = ParallelExit(no_jmp, jmp); | ||||
|         } | ||||
|         case OpCode::Id::SSY: | ||||
|         case OpCode::Id::PBK: { | ||||
|             // The SSY and PBK use a similar encoding as the BRA instruction.
 | ||||
|             UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||||
|                                  "Constant buffer branching is not supported"); | ||||
|             const u32 target = offset + instr.bra.GetBranchTarget(); | ||||
|             labels.insert(target); | ||||
|             // Continue scanning for an exit method.
 | ||||
|             break; | ||||
|         } | ||||
|         default: | ||||
|             break; | ||||
|         } | ||||
|     } | ||||
|     return exit_method = ExitMethod::AlwaysReturn; | ||||
| } | ||||
| 
 | ||||
| NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { | ||||
|     NodeBlock basic_block; | ||||
|     for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||||
|         pc = DecodeInstr(basic_block, pc); | ||||
|     } | ||||
|     DecodeRangeInner(basic_block, begin, end); | ||||
|     return basic_block; | ||||
| } | ||||
| 
 | ||||
| void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { | ||||
|     for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||||
|         pc = DecodeInstr(bb, pc); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { | ||||
|     const auto apply_conditions = ([&](const Condition& cond, Node n) -> Node { | ||||
|         Node result = n; | ||||
|         if (cond.cc != ConditionCode::T) { | ||||
|             result = Conditional(GetConditionCode(cond.cc), {result}); | ||||
|         } | ||||
|         if (cond.predicate != Pred::UnusedIndex) { | ||||
|             u32 pred = static_cast<u32>(cond.predicate); | ||||
|             const bool is_neg = pred > 7; | ||||
|             if (is_neg) { | ||||
|                 pred -= 8; | ||||
|             } | ||||
|             result = Conditional(GetPredicate(pred, is_neg), {result}); | ||||
|         } | ||||
|         return result; | ||||
|     }); | ||||
|     if (block.branch.address < 0) { | ||||
|         if (block.branch.kills) { | ||||
|             Node n = Operation(OperationCode::Discard); | ||||
|             n = apply_conditions(block.branch.cond, n); | ||||
|             bb.push_back(n); | ||||
|             global_code.push_back(n); | ||||
|             return; | ||||
|         } | ||||
|         Node n = Operation(OperationCode::Exit); | ||||
|         n = apply_conditions(block.branch.cond, n); | ||||
|         bb.push_back(n); | ||||
|         global_code.push_back(n); | ||||
|         return; | ||||
|     } | ||||
|     Node n = Operation(OperationCode::Branch, Immediate(block.branch.address)); | ||||
|     n = apply_conditions(block.branch.cond, n); | ||||
|     bb.push_back(n); | ||||
|     global_code.push_back(n); | ||||
| } | ||||
| 
 | ||||
| u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | ||||
|     // Ignore sched instructions when generating code.
 | ||||
|     if (IsSchedInstruction(pc, main_offset)) { | ||||
| @ -140,15 +146,18 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | ||||
| 
 | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|     const u32 nv_address = ConvertAddressToNvidiaSpace(pc); | ||||
| 
 | ||||
|     // Decoding failure
 | ||||
|     if (!opcode) { | ||||
|         UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); | ||||
|         bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})", | ||||
|                                          nv_address, instr.value))); | ||||
|         return pc + 1; | ||||
|     } | ||||
| 
 | ||||
|     bb.push_back( | ||||
|         Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value))); | ||||
|     bb.push_back(Comment( | ||||
|         fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value))); | ||||
| 
 | ||||
|     using Tegra::Shader::Pred; | ||||
|     UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, | ||||
|  | ||||
| @ -91,11 +91,46 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | ||||
|         break; | ||||
|     } | ||||
|     case OpCode::Id::BRA: { | ||||
|         UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||||
|                              "BRA with constant buffers are not implemented"); | ||||
|         Node branch; | ||||
|         if (instr.bra.constant_buffer == 0) { | ||||
|             const u32 target = pc + instr.bra.GetBranchTarget(); | ||||
|             branch = Operation(OperationCode::Branch, Immediate(target)); | ||||
|         } else { | ||||
|             const u32 target = pc + 1; | ||||
|             const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); | ||||
|             const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||||
|                                                  PRECISE, op_a, Immediate(3)); | ||||
|             const Node operand = | ||||
|                 Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||||
|             branch = Operation(OperationCode::BranchIndirect, convert); | ||||
|         } | ||||
| 
 | ||||
|         const u32 target = pc + instr.bra.GetBranchTarget(); | ||||
|         const Node branch = Operation(OperationCode::Branch, Immediate(target)); | ||||
|         const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||||
|         if (cc != Tegra::Shader::ConditionCode::T) { | ||||
|             bb.push_back(Conditional(GetConditionCode(cc), {branch})); | ||||
|         } else { | ||||
|             bb.push_back(branch); | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
|     case OpCode::Id::BRX: { | ||||
|         Node operand; | ||||
|         if (instr.brx.constant_buffer != 0) { | ||||
|             const s32 target = pc + 1; | ||||
|             const Node index = GetRegister(instr.gpr8); | ||||
|             const Node op_a = | ||||
|                 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); | ||||
|             const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||||
|                                                  PRECISE, op_a, Immediate(3)); | ||||
|             operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||||
|         } else { | ||||
|             const s32 target = pc + instr.brx.GetBranchExtend(); | ||||
|             const Node op_a = GetRegister(instr.gpr8); | ||||
|             const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||||
|                                                  PRECISE, op_a, Immediate(3)); | ||||
|             operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||||
|         } | ||||
|         const Node branch = Operation(OperationCode::BranchIndirect, operand); | ||||
| 
 | ||||
|         const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||||
|         if (cc != Tegra::Shader::ConditionCode::T) { | ||||
| @ -109,6 +144,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | ||||
|         UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||||
|                              "Constant buffer flow is not supported"); | ||||
| 
 | ||||
|         if (disable_flow_stack) { | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
 | ||||
|         const u32 target = pc + instr.bra.GetBranchTarget(); | ||||
|         bb.push_back( | ||||
| @ -119,6 +158,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | ||||
|         UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||||
|                              "Constant buffer PBK is not supported"); | ||||
| 
 | ||||
|         if (disable_flow_stack) { | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         // PBK pushes to a stack the address where BRK will jump to.
 | ||||
|         const u32 target = pc + instr.bra.GetBranchTarget(); | ||||
|         bb.push_back( | ||||
| @ -130,6 +173,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | ||||
|         UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", | ||||
|                              static_cast<u32>(cc)); | ||||
| 
 | ||||
|         if (disable_flow_stack) { | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         // The SYNC opcode jumps to the address previously set by the SSY opcode
 | ||||
|         bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); | ||||
|         break; | ||||
| @ -138,6 +185,9 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | ||||
|         const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||||
|         UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", | ||||
|                              static_cast<u32>(cc)); | ||||
|         if (disable_flow_stack) { | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         // The BRK opcode jumps to the address previously set by the PBK opcode
 | ||||
|         bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); | ||||
|  | ||||
| @ -148,11 +148,12 @@ enum class OperationCode { | ||||
| 
 | ||||
|     ImageStore, /// (MetaImage, float[N] coords) -> void
 | ||||
| 
 | ||||
|     Branch,        /// (uint branch_target) -> void
 | ||||
|     PushFlowStack, /// (uint branch_target) -> void
 | ||||
|     PopFlowStack,  /// () -> void
 | ||||
|     Exit,          /// () -> void
 | ||||
|     Discard,       /// () -> void
 | ||||
|     Branch,         /// (uint branch_target) -> void
 | ||||
|     BranchIndirect, /// (uint branch_target) -> void
 | ||||
|     PushFlowStack,  /// (uint branch_target) -> void
 | ||||
|     PopFlowStack,   /// () -> void
 | ||||
|     Exit,           /// () -> void
 | ||||
|     Discard,        /// () -> void
 | ||||
| 
 | ||||
|     EmitVertex,   /// () -> void
 | ||||
|     EndPrimitive, /// () -> void
 | ||||
|  | ||||
| @ -22,8 +22,8 @@ using Tegra::Shader::PredCondition; | ||||
| using Tegra::Shader::PredOperation; | ||||
| using Tegra::Shader::Register; | ||||
| 
 | ||||
| ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset) | ||||
|     : program_code{program_code}, main_offset{main_offset} { | ||||
| ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size) | ||||
|     : program_code{program_code}, main_offset{main_offset}, program_size{size} { | ||||
|     Decode(); | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -22,18 +22,12 @@ | ||||
| 
 | ||||
| namespace VideoCommon::Shader { | ||||
| 
 | ||||
| struct ShaderBlock; | ||||
| 
 | ||||
| using ProgramCode = std::vector<u64>; | ||||
| 
 | ||||
| constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; | ||||
| 
 | ||||
| /// Describes the behaviour of code path of a given entry point and a return point.
 | ||||
| enum class ExitMethod { | ||||
|     Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
 | ||||
|     AlwaysReturn, ///< All code paths reach the return point.
 | ||||
|     Conditional,  ///< Code path reaches the return point or an END instruction conditionally.
 | ||||
|     AlwaysEnd,    ///< All code paths reach a END instruction.
 | ||||
| }; | ||||
| 
 | ||||
| class ConstBuffer { | ||||
| public: | ||||
|     explicit ConstBuffer(u32 max_offset, bool is_indirect) | ||||
| @ -73,7 +67,7 @@ struct GlobalMemoryUsage { | ||||
| 
 | ||||
| class ShaderIR final { | ||||
| public: | ||||
|     explicit ShaderIR(const ProgramCode& program_code, u32 main_offset); | ||||
|     explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size); | ||||
|     ~ShaderIR(); | ||||
| 
 | ||||
|     const std::map<u32, NodeBlock>& GetBasicBlocks() const { | ||||
| @ -129,12 +123,20 @@ public: | ||||
|         return header; | ||||
|     } | ||||
| 
 | ||||
|     bool IsFlowStackDisabled() const { | ||||
|         return disable_flow_stack; | ||||
|     } | ||||
| 
 | ||||
|     u32 ConvertAddressToNvidiaSpace(const u32 address) const { | ||||
|         return (address - main_offset) * sizeof(Tegra::Shader::Instruction); | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     void Decode(); | ||||
| 
 | ||||
|     ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels); | ||||
| 
 | ||||
|     NodeBlock DecodeRange(u32 begin, u32 end); | ||||
|     void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); | ||||
|     void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); | ||||
| 
 | ||||
|     /**
 | ||||
|      * Decodes a single instruction from Tegra to IR. | ||||
| @ -326,10 +328,11 @@ private: | ||||
| 
 | ||||
|     const ProgramCode& program_code; | ||||
|     const u32 main_offset; | ||||
|     const std::size_t program_size; | ||||
|     bool disable_flow_stack{}; | ||||
| 
 | ||||
|     u32 coverage_begin{}; | ||||
|     u32 coverage_end{}; | ||||
|     std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; | ||||
| 
 | ||||
|     std::map<u32, NodeBlock> basic_blocks; | ||||
|     NodeBlock global_code; | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 bunnei
						bunnei