mirror of
				https://git.zaroz.cloud/nintendo-back-up/yuzu/yuzu.git
				synced 2025-05-12 00:45:25 +00:00 
			
		
		
		
	Merge pull request #996 from yuriks/texture-copy
GPU: Implement TextureCopy-mode display transfers
This commit is contained in:
		
						commit
						a9fc659809
					
				| @ -418,7 +418,7 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { | ||||
| 
 | ||||
|     case CommandId::SET_DISPLAY_TRANSFER: | ||||
|     { | ||||
|         auto& params = command.image_copy; | ||||
|         auto& params = command.display_transfer; | ||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), | ||||
|                 Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); | ||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), | ||||
| @ -433,17 +433,22 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { | ||||
|     // TODO: Check if texture copies are implemented correctly..
 | ||||
|     case CommandId::SET_TEXTURE_COPY: | ||||
|     { | ||||
|         auto& params = command.image_copy; | ||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), | ||||
|         auto& params = command.texture_copy; | ||||
|         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.input_address), | ||||
|                 Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); | ||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), | ||||
|         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.output_address), | ||||
|                 Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3); | ||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)), params.in_buffer_size); | ||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)), params.out_buffer_size); | ||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.flags)), params.flags); | ||||
|         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.size), | ||||
|                 params.size); | ||||
|         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.input_size), | ||||
|                 params.in_width_gap); | ||||
|         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.output_size), | ||||
|                 params.out_width_gap); | ||||
|         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.flags), | ||||
|                 params.flags); | ||||
| 
 | ||||
|         // TODO: Should this register be set to 1 or should instead its value be OR-ed with 1?
 | ||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.trigger)), 1); | ||||
|         // NOTE: Actual GSP ORs 1 with current register instead of overwriting. Doesn't seem to matter.
 | ||||
|         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.trigger), 1); | ||||
|         break; | ||||
|     } | ||||
| 
 | ||||
|  | ||||
| @ -127,7 +127,16 @@ struct Command { | ||||
|             u32 in_buffer_size; | ||||
|             u32 out_buffer_size; | ||||
|             u32 flags; | ||||
|         } image_copy; | ||||
|         } display_transfer; | ||||
| 
 | ||||
|         struct { | ||||
|             u32 in_buffer_address; | ||||
|             u32 out_buffer_address; | ||||
|             u32 size; | ||||
|             u32 in_width_gap; | ||||
|             u32 out_width_gap; | ||||
|             u32 flags; | ||||
|         } texture_copy; | ||||
| 
 | ||||
|         u8 raw_data[0x1C]; | ||||
|     }; | ||||
|  | ||||
| @ -3,6 +3,7 @@ | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <cstring> | ||||
| #include <numeric> | ||||
| #include <type_traits> | ||||
| 
 | ||||
| #include "common/color.h" | ||||
| @ -158,14 +159,59 @@ inline void Write(u32 addr, const T data) { | ||||
|             u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); | ||||
|             u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); | ||||
| 
 | ||||
|             if (config.is_texture_copy) { | ||||
|                 u32 input_width = config.texture_copy.input_width * 16; | ||||
|                 u32 input_gap = config.texture_copy.input_gap * 16; | ||||
|                 u32 output_width = config.texture_copy.output_width * 16; | ||||
|                 u32 output_gap = config.texture_copy.output_gap * 16; | ||||
| 
 | ||||
|                 size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap); | ||||
|                 VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), contiguous_input_size); | ||||
| 
 | ||||
|                 u32 remaining_size = config.texture_copy.size; | ||||
|                 u32 remaining_input = input_width; | ||||
|                 u32 remaining_output = output_width; | ||||
|                 while (remaining_size > 0) { | ||||
|                     u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size }); | ||||
| 
 | ||||
|                     std::memcpy(dst_pointer, src_pointer, copy_size); | ||||
|                     src_pointer += copy_size; | ||||
|                     dst_pointer += copy_size; | ||||
| 
 | ||||
|                     remaining_input -= copy_size; | ||||
|                     remaining_output -= copy_size; | ||||
|                     remaining_size -= copy_size; | ||||
| 
 | ||||
|                     if (remaining_input == 0) { | ||||
|                         remaining_input = input_width; | ||||
|                         src_pointer += input_gap; | ||||
|                     } | ||||
|                     if (remaining_output == 0) { | ||||
|                         remaining_output = output_width; | ||||
|                         dst_pointer += output_gap; | ||||
|                     } | ||||
|                 } | ||||
| 
 | ||||
|                 LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X", | ||||
|                     config.texture_copy.size, | ||||
|                     config.GetPhysicalInputAddress(), input_width, input_gap, | ||||
|                     config.GetPhysicalOutputAddress(), output_width, output_gap, | ||||
|                     config.flags); | ||||
| 
 | ||||
|                 size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); | ||||
|                 VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), contiguous_output_size); | ||||
| 
 | ||||
|                 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); | ||||
|                 break; | ||||
|             } | ||||
| 
 | ||||
|             if (config.scaling > config.ScaleXY) { | ||||
|                 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); | ||||
|                 UNIMPLEMENTED(); | ||||
|                 break; | ||||
|             } | ||||
| 
 | ||||
|             if (config.output_tiled && | ||||
|                     (config.scaling == config.ScaleXY || config.scaling == config.ScaleX)) { | ||||
|             if (config.input_linear && config.scaling != config.NoScale) { | ||||
|                 LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); | ||||
|                 UNIMPLEMENTED(); | ||||
|                 break; | ||||
| @ -182,23 +228,6 @@ inline void Write(u32 addr, const T data) { | ||||
| 
 | ||||
|             VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), input_size); | ||||
| 
 | ||||
|             if (config.raw_copy) { | ||||
|                 // Raw copies do not perform color conversion nor tiled->linear / linear->tiled conversions
 | ||||
|                 // TODO(Subv): Verify if raw copies perform scaling
 | ||||
|                 memcpy(dst_pointer, src_pointer, output_size); | ||||
| 
 | ||||
|                 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), output format: %x, flags 0x%08X, Raw copy", | ||||
|                     output_size, | ||||
|                     config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), | ||||
|                     config.GetPhysicalOutputAddress(), config.output_width.Value(), config.output_height.Value(), | ||||
|                     config.output_format.Value(), config.flags); | ||||
| 
 | ||||
|                 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); | ||||
| 
 | ||||
|                 VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), output_size); | ||||
|                 break; | ||||
|             } | ||||
| 
 | ||||
|             for (u32 y = 0; y < output_height; ++y) { | ||||
|                 for (u32 x = 0; x < output_width; ++x) { | ||||
|                     Math::Vec4<u8> src_color; | ||||
| @ -220,7 +249,7 @@ inline void Write(u32 addr, const T data) { | ||||
|                     u32 src_offset; | ||||
|                     u32 dst_offset; | ||||
| 
 | ||||
|                     if (config.output_tiled) { | ||||
|                     if (config.input_linear) { | ||||
|                         if (!config.dont_swizzle) { | ||||
|                             // Interpret the input as linear and the output as tiled
 | ||||
|                             u32 coarse_y = y & ~7; | ||||
|  | ||||
| @ -201,12 +201,14 @@ struct Regs { | ||||
|             u32 flags; | ||||
| 
 | ||||
|             BitField< 0, 1, u32> flip_vertically;  // flips input data vertically
 | ||||
|             BitField< 1, 1, u32> output_tiled;     // Converts from linear to tiled format
 | ||||
|             BitField< 3, 1, u32> raw_copy;         // Copies the data without performing any processing
 | ||||
|             BitField< 1, 1, u32> input_linear;     // Converts from linear to tiled format
 | ||||
|             BitField< 2, 1, u32> crop_input_lines; | ||||
|             BitField< 3, 1, u32> is_texture_copy;  // Copies the data without performing any processing and respecting texture copy fields
 | ||||
|             BitField< 5, 1, u32> dont_swizzle; | ||||
|             BitField< 8, 3, PixelFormat> input_format; | ||||
|             BitField<12, 3, PixelFormat> output_format; | ||||
| 
 | ||||
|             /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one.
 | ||||
|             BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented
 | ||||
|             BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer
 | ||||
|         }; | ||||
| 
 | ||||
| @ -214,10 +216,30 @@ struct Regs { | ||||
| 
 | ||||
|         // it seems that writing to this field triggers the display transfer
 | ||||
|         u32 trigger; | ||||
|     } display_transfer_config; | ||||
|     ASSERT_MEMBER_SIZE(display_transfer_config, 0x1c); | ||||
| 
 | ||||
|     INSERT_PADDING_WORDS(0x331); | ||||
|         INSERT_PADDING_WORDS(0x1); | ||||
| 
 | ||||
|         struct { | ||||
|             u32 size; | ||||
| 
 | ||||
|             union { | ||||
|                 u32 input_size; | ||||
| 
 | ||||
|                 BitField< 0, 16, u32> input_width; | ||||
|                 BitField<16, 16, u32> input_gap; | ||||
|             }; | ||||
| 
 | ||||
|             union { | ||||
|                 u32 output_size; | ||||
| 
 | ||||
|                 BitField< 0, 16, u32> output_width; | ||||
|                 BitField<16, 16, u32> output_gap; | ||||
|             }; | ||||
|         } texture_copy; | ||||
|     } display_transfer_config; | ||||
|     ASSERT_MEMBER_SIZE(display_transfer_config, 0x2c); | ||||
| 
 | ||||
|     INSERT_PADDING_WORDS(0x32D); | ||||
| 
 | ||||
|     struct { | ||||
|         // command list size (in bytes)
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Yuri Kunde Schlesner
						Yuri Kunde Schlesner