mirror of
				https://git.zaroz.cloud/nintendo-back-up/yuzu/yuzu.git
				synced 2025-05-12 00:45:25 +00:00 
			
		
		
		
	Merge pull request #2255 from JayFoxRox/lsl4
shader_jit: Load LOOPCOUNT_REG and LOOPINC 4 bit left-shifted
This commit is contained in:
		
						commit
						018191c1f0
					
				@ -102,11 +102,11 @@ static const X64Reg SETUP = R9;
 | 
				
			|||||||
/// The two 32-bit VS address offset registers set by the MOVA instruction
 | 
					/// The two 32-bit VS address offset registers set by the MOVA instruction
 | 
				
			||||||
static const X64Reg ADDROFFS_REG_0 = R10;
 | 
					static const X64Reg ADDROFFS_REG_0 = R10;
 | 
				
			||||||
static const X64Reg ADDROFFS_REG_1 = R11;
 | 
					static const X64Reg ADDROFFS_REG_1 = R11;
 | 
				
			||||||
/// VS loop count register
 | 
					/// VS loop count register (Multiplied by 16)
 | 
				
			||||||
static const X64Reg LOOPCOUNT_REG = R12;
 | 
					static const X64Reg LOOPCOUNT_REG = R12;
 | 
				
			||||||
/// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker)
 | 
					/// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker)
 | 
				
			||||||
static const X64Reg LOOPCOUNT = RSI;
 | 
					static const X64Reg LOOPCOUNT = RSI;
 | 
				
			||||||
/// Number to increment LOOPCOUNT_REG by on each loop iteration
 | 
					/// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16)
 | 
				
			||||||
static const X64Reg LOOPINC = RDI;
 | 
					static const X64Reg LOOPINC = RDI;
 | 
				
			||||||
/// Result of the previous CMP instruction for the X-component comparison
 | 
					/// Result of the previous CMP instruction for the X-component comparison
 | 
				
			||||||
static const X64Reg COND0 = R13;
 | 
					static const X64Reg COND0 = R13;
 | 
				
			||||||
@ -718,15 +718,18 @@ void JitShader::Compile_LOOP(Instruction instr) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    looping = true;
 | 
					    looping = true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id.
 | 
				
			||||||
 | 
					    // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by
 | 
				
			||||||
 | 
					    // 4 bits) to be used as an offset into the 16-byte vector registers later
 | 
				
			||||||
    int offset =
 | 
					    int offset =
 | 
				
			||||||
        ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
 | 
					        ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
 | 
				
			||||||
    MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset));
 | 
					    MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset));
 | 
				
			||||||
    MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
 | 
					    MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
 | 
				
			||||||
    SHR(32, R(LOOPCOUNT_REG), Imm8(8));
 | 
					    SHR(32, R(LOOPCOUNT_REG), Imm8(4));
 | 
				
			||||||
    AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
 | 
					    AND(32, R(LOOPCOUNT_REG), Imm32(0xFF0)); // Y-component is the start
 | 
				
			||||||
    MOV(32, R(LOOPINC), R(LOOPCOUNT));
 | 
					    MOV(32, R(LOOPINC), R(LOOPCOUNT));
 | 
				
			||||||
    SHR(32, R(LOOPINC), Imm8(16));
 | 
					    SHR(32, R(LOOPINC), Imm8(12));
 | 
				
			||||||
    MOVZX(32, 8, LOOPINC, R(LOOPINC));     // Z-component is the incrementer
 | 
					    AND(32, R(LOOPINC), Imm32(0xFF0));     // Z-component is the incrementer
 | 
				
			||||||
    MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count
 | 
					    MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count
 | 
				
			||||||
    ADD(32, R(LOOPCOUNT), Imm8(1));        // Iteration count is X-component + 1
 | 
					    ADD(32, R(LOOPCOUNT), Imm8(1));        // Iteration count is X-component + 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user