mirror of
				https://git.zaroz.cloud/nintendo-back-up/yuzu/yuzu-mainline.git
				synced 2025-03-21 01:53:15 +00:00 
			
		
		
		
	shader_ir: Unify blocks in decompiled shaders.
This commit is contained in:
		
							parent
							
								
									926b80102f
								
							
						
					
					
						commit
						d5533b440c
					
				| @ -191,10 +191,12 @@ public: | ||||
| 
 | ||||
|         // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
 | ||||
|         // unlikely that shaders will use 20 nested SSYs and PBKs.
 | ||||
|         constexpr u32 FLOW_STACK_SIZE = 20; | ||||
|         for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { | ||||
|             code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); | ||||
|             code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); | ||||
|         if (!ir.IsFlowStackDisabled()) { | ||||
|             constexpr u32 FLOW_STACK_SIZE = 20; | ||||
|             for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { | ||||
|                 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); | ||||
|                 code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         code.AddLine("while (true) {{"); | ||||
|  | ||||
| @ -1,5 +1,6 @@ | ||||
| 
 | ||||
| #include <list> | ||||
| #include <map> | ||||
| #include <unordered_map> | ||||
| #include <unordered_set> | ||||
| #include <vector> | ||||
| @ -104,28 +105,6 @@ struct BlockInfo { | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| struct Stamp { | ||||
|     Stamp() = default; | ||||
|     Stamp(u32 address, u32 target) : address{address}, target{target} {} | ||||
|     u32 address{}; | ||||
|     u32 target{}; | ||||
|     bool operator==(const Stamp& sb) const { | ||||
|         return std::tie(address, target) == std::tie(sb.address, sb.target); | ||||
|     } | ||||
|     bool operator<(const Stamp& sb) const { | ||||
|         return address < sb.address; | ||||
|     } | ||||
|     bool operator>(const Stamp& sb) const { | ||||
|         return address > sb.address; | ||||
|     } | ||||
|     bool operator<=(const Stamp& sb) const { | ||||
|         return address <= sb.address; | ||||
|     } | ||||
|     bool operator>=(const Stamp& sb) const { | ||||
|         return address >= sb.address; | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| struct CFGRebuildState { | ||||
|     explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size) | ||||
|         : program_code{program_code}, program_size{program_size} { | ||||
| @ -144,8 +123,8 @@ struct CFGRebuildState { | ||||
|     std::list<Query> queries{}; | ||||
|     std::unordered_map<u32, u32> registered{}; | ||||
|     std::unordered_set<u32> labels{}; | ||||
|     std::set<Stamp> ssy_labels; | ||||
|     std::set<Stamp> pbk_labels; | ||||
|     std::map<u32, u32> ssy_labels; | ||||
|     std::map<u32, u32> pbk_labels; | ||||
|     std::unordered_map<u32, BlockStack> stacks{}; | ||||
|     const ProgramCode& program_code; | ||||
|     const std::size_t program_size; | ||||
| @ -393,7 +372,7 @@ bool TryInspectAddress(CFGRebuildState& state) { | ||||
|     } | ||||
|     case BlockCollision::Inside: { | ||||
|         // This case is the tricky one:
 | ||||
|         // We need to Split the block in 2 sepprate blocks
 | ||||
|         // We need to Split the block in 2 sepparate blocks
 | ||||
|         auto it = search_result.second; | ||||
|         block_info = CreateBlockInfo(state, address, it->end); | ||||
|         it->end = address - 1; | ||||
| @ -428,13 +407,11 @@ bool TryInspectAddress(CFGRebuildState& state) { | ||||
| } | ||||
| 
 | ||||
| bool TryQuery(CFGRebuildState& state) { | ||||
|     auto gather_labels = ([](ControlStack& cc, std::set<Stamp> labels, BlockInfo& block) { | ||||
|         Stamp start{block.start, 0}; | ||||
|         Stamp end{block.end, 0}; | ||||
|         auto gather_start = labels.lower_bound(start); | ||||
|         auto gather_end = labels.upper_bound(end); | ||||
|     auto gather_labels = ([](ControlStack& cc, std::map<u32, u32>& labels, BlockInfo& block) { | ||||
|         auto gather_start = labels.lower_bound(block.start); | ||||
|         auto gather_end = labels.upper_bound(block.end); | ||||
|         while (gather_start != gather_end) { | ||||
|             cc.Push(gather_start->target); | ||||
|             cc.Push(gather_start->second); | ||||
|             gather_start++; | ||||
|         } | ||||
|     }); | ||||
| @ -444,9 +421,13 @@ bool TryQuery(CFGRebuildState& state) { | ||||
|     Query& q = state.queries.front(); | ||||
|     u32 block_index = state.registered[q.address]; | ||||
|     BlockInfo& block = state.block_info[block_index]; | ||||
|     // If the block is visted, check if the stacks match, else gather the ssy/pbk
 | ||||
|     // labels into the current stack and look if the branch at the end of the block
 | ||||
|     // consumes a label. Schedule new queries accordingly
 | ||||
|     if (block.visited) { | ||||
|         BlockStack& stack = state.stacks[q.address]; | ||||
|         bool all_okay = q.ssy_stack.Compare(stack.ssy_stack) && q.pbk_stack.Compare(stack.pbk_stack); | ||||
|         bool all_okay = (stack.ssy_stack.Size() == 0 || q.ssy_stack.Compare(stack.ssy_stack)) && | ||||
|                         (stack.pbk_stack.Size() == 0 || q.pbk_stack.Compare(stack.pbk_stack)); | ||||
|         state.queries.pop_front(); | ||||
|         return all_okay; | ||||
|     } | ||||
| @ -523,8 +504,10 @@ bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_addre | ||||
|         result_out.blocks.push_back(new_block); | ||||
|     } | ||||
|     if (result_out.decompilable) { | ||||
|         result_out.labels = std::move(state.labels); | ||||
|         return true; | ||||
|     } | ||||
|     // If it's not decompilable, merge the unlabelled blocks together
 | ||||
|     auto back = result_out.blocks.begin(); | ||||
|     auto next = std::next(back); | ||||
|     while (next != result_out.blocks.end()) { | ||||
|  | ||||
| @ -3,7 +3,7 @@ | ||||
| #include <cstring> | ||||
| #include <list> | ||||
| #include <optional> | ||||
| #include <vector> | ||||
| #include <unordered_set> | ||||
| 
 | ||||
| #include "video_core/engines/shader_bytecode.h" | ||||
| #include "video_core/shader/shader_ir.h" | ||||
| @ -48,6 +48,7 @@ struct ShaderCharacteristics { | ||||
|     bool decompilable{}; | ||||
|     u32 start; | ||||
|     u32 end; | ||||
|     std::unordered_set<u32> labels{}; | ||||
| }; | ||||
| 
 | ||||
| bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address, | ||||
|  | ||||
| @ -38,32 +38,47 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | ||||
| void ShaderIR::Decode() { | ||||
|     std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | ||||
| 
 | ||||
|     disable_flow_stack = false; | ||||
|     ShaderCharacteristics shader_info{}; | ||||
|     bool can_proceed = ScanFlow(program_code, program_code.size(), main_offset, shader_info); | ||||
|     if (can_proceed) { | ||||
|         coverage_begin = shader_info.start; | ||||
|         coverage_end = shader_info.end; | ||||
|         if (shader_info.decompilable) { | ||||
|             std::list<ShaderBlock>& blocks = shader_info.blocks; | ||||
|             for (auto& block : blocks) { | ||||
|                 NodeBlock nodes; | ||||
|                 if (!block.ignore_branch) { | ||||
|                     nodes = DecodeRange(block.start, block.end); | ||||
|                     InsertControlFlow(nodes, block); | ||||
|                 } else { | ||||
|                     nodes = DecodeRange(block.start, block.end + 1); | ||||
|             disable_flow_stack = true; | ||||
|             auto insert_block = ([this](NodeBlock& nodes, u32 label) { | ||||
|                 if (label == exit_branch) { | ||||
|                     return; | ||||
|                 } | ||||
|                 basic_blocks.insert({label, nodes}); | ||||
|             }); | ||||
|             std::list<ShaderBlock>& blocks = shader_info.blocks; | ||||
|             NodeBlock current_block; | ||||
|             u32 current_label = exit_branch; | ||||
|             for (auto& block : blocks) { | ||||
|                 if (shader_info.labels.count(block.start) != 0) { | ||||
|                     insert_block(current_block, current_label); | ||||
|                     current_block.clear(); | ||||
|                     current_label = block.start; | ||||
|                 } | ||||
|                 if (!block.ignore_branch) { | ||||
|                     DecodeRangeInner(current_block, block.start, block.end); | ||||
|                     InsertControlFlow(current_block, block); | ||||
|                 } else { | ||||
|                     DecodeRangeInner(current_block, block.start, block.end + 1); | ||||
|                 } | ||||
|                 basic_blocks.insert({block.start, nodes}); | ||||
|             } | ||||
|             insert_block(current_block, current_label); | ||||
|             return; | ||||
|         } | ||||
|         LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method"); | ||||
|         // we can't decompile it, fallback to standard method
 | ||||
|         for (const auto& block : shader_info.blocks) { | ||||
|             basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); | ||||
|         } | ||||
|         return; | ||||
|     } | ||||
|     LOG_WARNING(HW_GPU, "Flow Analysis failed, falling back to brute force compiling"); | ||||
|     LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling"); | ||||
| 
 | ||||
|     // Now we need to deal with an undecompilable shader. We need to brute force
 | ||||
|     // a shader that captures every position.
 | ||||
| @ -78,12 +93,16 @@ void ShaderIR::Decode() { | ||||
| 
 | ||||
| NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { | ||||
|     NodeBlock basic_block; | ||||
|     for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||||
|         pc = DecodeInstr(basic_block, pc); | ||||
|     } | ||||
|     DecodeRangeInner(basic_block, begin, end); | ||||
|     return basic_block; | ||||
| } | ||||
| 
 | ||||
| void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { | ||||
|     for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||||
|         pc = DecodeInstr(bb, pc); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { | ||||
|     auto apply_conditions = ([&](const Condition& cond, Node n) -> Node { | ||||
|         Node result = n; | ||||
|  | ||||
| @ -98,9 +98,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | ||||
|         } else { | ||||
|             const u32 target = pc + 1; | ||||
|             const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); | ||||
|             const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, | ||||
|                                                true, PRECISE, op_a, Immediate(3)); | ||||
|             const Node operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||||
|             const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||||
|                                                  PRECISE, op_a, Immediate(3)); | ||||
|             const Node operand = | ||||
|                 Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||||
|             branch = Operation(OperationCode::BranchIndirect, convert); | ||||
|         } | ||||
| 
 | ||||
| @ -119,14 +120,14 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | ||||
|             const Node index = GetRegister(instr.gpr8); | ||||
|             const Node op_a = | ||||
|                 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); | ||||
|             const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, | ||||
|                                                true, PRECISE, op_a, Immediate(3)); | ||||
|             const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||||
|                                                  PRECISE, op_a, Immediate(3)); | ||||
|             operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||||
|         } else { | ||||
|             const s32 target = pc + instr.brx.GetBranchExtend(); | ||||
|             const Node op_a = GetRegister(instr.gpr8); | ||||
|             const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, | ||||
|                                                true, PRECISE, op_a, Immediate(3)); | ||||
|             const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||||
|                                                  PRECISE, op_a, Immediate(3)); | ||||
|             operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||||
|         } | ||||
|         const Node branch = Operation(OperationCode::BranchIndirect, operand); | ||||
| @ -143,6 +144,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | ||||
|         UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||||
|                              "Constant buffer flow is not supported"); | ||||
| 
 | ||||
|         if (disable_flow_stack) { | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
 | ||||
|         const u32 target = pc + instr.bra.GetBranchTarget(); | ||||
|         bb.push_back( | ||||
| @ -153,6 +158,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | ||||
|         UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||||
|                              "Constant buffer PBK is not supported"); | ||||
| 
 | ||||
|         if (disable_flow_stack) { | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         // PBK pushes to a stack the address where BRK will jump to.
 | ||||
|         const u32 target = pc + instr.bra.GetBranchTarget(); | ||||
|         bb.push_back( | ||||
| @ -164,6 +173,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | ||||
|         UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", | ||||
|                              static_cast<u32>(cc)); | ||||
| 
 | ||||
|         if (disable_flow_stack) { | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         // The SYNC opcode jumps to the address previously set by the SSY opcode
 | ||||
|         bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); | ||||
|         break; | ||||
| @ -172,6 +185,9 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | ||||
|         const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||||
|         UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", | ||||
|                              static_cast<u32>(cc)); | ||||
|         if (disable_flow_stack) { | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         // The BRK opcode jumps to the address previously set by the PBK opcode
 | ||||
|         bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); | ||||
|  | ||||
| @ -148,12 +148,12 @@ enum class OperationCode { | ||||
| 
 | ||||
|     ImageStore, /// (MetaImage, float[N] coords) -> void
 | ||||
| 
 | ||||
|     Branch,        /// (uint branch_target) -> void
 | ||||
|     BranchIndirect,/// (uint branch_target) -> void
 | ||||
|     PushFlowStack, /// (uint branch_target) -> void
 | ||||
|     PopFlowStack,  /// () -> void
 | ||||
|     Exit,          /// () -> void
 | ||||
|     Discard,       /// () -> void
 | ||||
|     Branch,         /// (uint branch_target) -> void
 | ||||
|     BranchIndirect, /// (uint branch_target) -> void
 | ||||
|     PushFlowStack,  /// (uint branch_target) -> void
 | ||||
|     PopFlowStack,   /// () -> void
 | ||||
|     Exit,           /// () -> void
 | ||||
|     Discard,        /// () -> void
 | ||||
| 
 | ||||
|     EmitVertex,   /// () -> void
 | ||||
|     EndPrimitive, /// () -> void
 | ||||
|  | ||||
| @ -123,10 +123,15 @@ public: | ||||
|         return header; | ||||
|     } | ||||
| 
 | ||||
|     bool IsFlowStackDisabled() const { | ||||
|         return disable_flow_stack; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     void Decode(); | ||||
| 
 | ||||
|     NodeBlock DecodeRange(u32 begin, u32 end); | ||||
|     void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); | ||||
|     void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); | ||||
| 
 | ||||
|     /**
 | ||||
| @ -320,6 +325,7 @@ private: | ||||
|     const ProgramCode& program_code; | ||||
|     const u32 main_offset; | ||||
|     const std::size_t program_size; | ||||
|     bool disable_flow_stack{}; | ||||
| 
 | ||||
|     u32 coverage_begin{}; | ||||
|     u32 coverage_end{}; | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Fernando Sahmkow
						Fernando Sahmkow