mirror of
				https://git.zaroz.cloud/nintendo-back-up/yuzu/yuzu.git
				synced 2025-05-12 00:45:25 +00:00 
			
		
		
		
	Merge pull request #996 from yuriks/texture-copy
GPU: Implement TextureCopy-mode display transfers
This commit is contained in:
		
						commit
						a9fc659809
					
				| @ -418,7 +418,7 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { | |||||||
| 
 | 
 | ||||||
|     case CommandId::SET_DISPLAY_TRANSFER: |     case CommandId::SET_DISPLAY_TRANSFER: | ||||||
|     { |     { | ||||||
|         auto& params = command.image_copy; |         auto& params = command.display_transfer; | ||||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), |         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), | ||||||
|                 Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); |                 Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); | ||||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), |         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), | ||||||
| @ -433,17 +433,22 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { | |||||||
|     // TODO: Check if texture copies are implemented correctly..
 |     // TODO: Check if texture copies are implemented correctly..
 | ||||||
|     case CommandId::SET_TEXTURE_COPY: |     case CommandId::SET_TEXTURE_COPY: | ||||||
|     { |     { | ||||||
|         auto& params = command.image_copy; |         auto& params = command.texture_copy; | ||||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), |         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.input_address), | ||||||
|                 Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); |                 Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); | ||||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), |         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.output_address), | ||||||
|                 Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3); |                 Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3); | ||||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)), params.in_buffer_size); |         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.size), | ||||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)), params.out_buffer_size); |                 params.size); | ||||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.flags)), params.flags); |         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.input_size), | ||||||
|  |                 params.in_width_gap); | ||||||
|  |         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.output_size), | ||||||
|  |                 params.out_width_gap); | ||||||
|  |         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.flags), | ||||||
|  |                 params.flags); | ||||||
| 
 | 
 | ||||||
|         // TODO: Should this register be set to 1 or should instead its value be OR-ed with 1?
 |         // NOTE: Actual GSP ORs 1 with current register instead of overwriting. Doesn't seem to matter.
 | ||||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.trigger)), 1); |         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.trigger), 1); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -127,7 +127,16 @@ struct Command { | |||||||
|             u32 in_buffer_size; |             u32 in_buffer_size; | ||||||
|             u32 out_buffer_size; |             u32 out_buffer_size; | ||||||
|             u32 flags; |             u32 flags; | ||||||
|         } image_copy; |         } display_transfer; | ||||||
|  | 
 | ||||||
|  |         struct { | ||||||
|  |             u32 in_buffer_address; | ||||||
|  |             u32 out_buffer_address; | ||||||
|  |             u32 size; | ||||||
|  |             u32 in_width_gap; | ||||||
|  |             u32 out_width_gap; | ||||||
|  |             u32 flags; | ||||||
|  |         } texture_copy; | ||||||
| 
 | 
 | ||||||
|         u8 raw_data[0x1C]; |         u8 raw_data[0x1C]; | ||||||
|     }; |     }; | ||||||
|  | |||||||
| @ -3,6 +3,7 @@ | |||||||
| // Refer to the license.txt file included.
 | // Refer to the license.txt file included.
 | ||||||
| 
 | 
 | ||||||
| #include <cstring> | #include <cstring> | ||||||
|  | #include <numeric> | ||||||
| #include <type_traits> | #include <type_traits> | ||||||
| 
 | 
 | ||||||
| #include "common/color.h" | #include "common/color.h" | ||||||
| @ -158,14 +159,59 @@ inline void Write(u32 addr, const T data) { | |||||||
|             u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); |             u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); | ||||||
|             u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); |             u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); | ||||||
| 
 | 
 | ||||||
|  |             if (config.is_texture_copy) { | ||||||
|  |                 u32 input_width = config.texture_copy.input_width * 16; | ||||||
|  |                 u32 input_gap = config.texture_copy.input_gap * 16; | ||||||
|  |                 u32 output_width = config.texture_copy.output_width * 16; | ||||||
|  |                 u32 output_gap = config.texture_copy.output_gap * 16; | ||||||
|  | 
 | ||||||
|  |                 size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap); | ||||||
|  |                 VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), contiguous_input_size); | ||||||
|  | 
 | ||||||
|  |                 u32 remaining_size = config.texture_copy.size; | ||||||
|  |                 u32 remaining_input = input_width; | ||||||
|  |                 u32 remaining_output = output_width; | ||||||
|  |                 while (remaining_size > 0) { | ||||||
|  |                     u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size }); | ||||||
|  | 
 | ||||||
|  |                     std::memcpy(dst_pointer, src_pointer, copy_size); | ||||||
|  |                     src_pointer += copy_size; | ||||||
|  |                     dst_pointer += copy_size; | ||||||
|  | 
 | ||||||
|  |                     remaining_input -= copy_size; | ||||||
|  |                     remaining_output -= copy_size; | ||||||
|  |                     remaining_size -= copy_size; | ||||||
|  | 
 | ||||||
|  |                     if (remaining_input == 0) { | ||||||
|  |                         remaining_input = input_width; | ||||||
|  |                         src_pointer += input_gap; | ||||||
|  |                     } | ||||||
|  |                     if (remaining_output == 0) { | ||||||
|  |                         remaining_output = output_width; | ||||||
|  |                         dst_pointer += output_gap; | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X", | ||||||
|  |                     config.texture_copy.size, | ||||||
|  |                     config.GetPhysicalInputAddress(), input_width, input_gap, | ||||||
|  |                     config.GetPhysicalOutputAddress(), output_width, output_gap, | ||||||
|  |                     config.flags); | ||||||
|  | 
 | ||||||
|  |                 size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); | ||||||
|  |                 VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), contiguous_output_size); | ||||||
|  | 
 | ||||||
|  |                 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|             if (config.scaling > config.ScaleXY) { |             if (config.scaling > config.ScaleXY) { | ||||||
|                 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); |                 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); | ||||||
|                 UNIMPLEMENTED(); |                 UNIMPLEMENTED(); | ||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             if (config.output_tiled && |             if (config.input_linear && config.scaling != config.NoScale) { | ||||||
|                     (config.scaling == config.ScaleXY || config.scaling == config.ScaleX)) { |  | ||||||
|                 LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); |                 LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); | ||||||
|                 UNIMPLEMENTED(); |                 UNIMPLEMENTED(); | ||||||
|                 break; |                 break; | ||||||
| @ -182,23 +228,6 @@ inline void Write(u32 addr, const T data) { | |||||||
| 
 | 
 | ||||||
|             VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), input_size); |             VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), input_size); | ||||||
| 
 | 
 | ||||||
|             if (config.raw_copy) { |  | ||||||
|                 // Raw copies do not perform color conversion nor tiled->linear / linear->tiled conversions
 |  | ||||||
|                 // TODO(Subv): Verify if raw copies perform scaling
 |  | ||||||
|                 memcpy(dst_pointer, src_pointer, output_size); |  | ||||||
| 
 |  | ||||||
|                 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), output format: %x, flags 0x%08X, Raw copy", |  | ||||||
|                     output_size, |  | ||||||
|                     config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), |  | ||||||
|                     config.GetPhysicalOutputAddress(), config.output_width.Value(), config.output_height.Value(), |  | ||||||
|                     config.output_format.Value(), config.flags); |  | ||||||
| 
 |  | ||||||
|                 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); |  | ||||||
| 
 |  | ||||||
|                 VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), output_size); |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             for (u32 y = 0; y < output_height; ++y) { |             for (u32 y = 0; y < output_height; ++y) { | ||||||
|                 for (u32 x = 0; x < output_width; ++x) { |                 for (u32 x = 0; x < output_width; ++x) { | ||||||
|                     Math::Vec4<u8> src_color; |                     Math::Vec4<u8> src_color; | ||||||
| @ -220,7 +249,7 @@ inline void Write(u32 addr, const T data) { | |||||||
|                     u32 src_offset; |                     u32 src_offset; | ||||||
|                     u32 dst_offset; |                     u32 dst_offset; | ||||||
| 
 | 
 | ||||||
|                     if (config.output_tiled) { |                     if (config.input_linear) { | ||||||
|                         if (!config.dont_swizzle) { |                         if (!config.dont_swizzle) { | ||||||
|                             // Interpret the input as linear and the output as tiled
 |                             // Interpret the input as linear and the output as tiled
 | ||||||
|                             u32 coarse_y = y & ~7; |                             u32 coarse_y = y & ~7; | ||||||
|  | |||||||
| @ -201,12 +201,14 @@ struct Regs { | |||||||
|             u32 flags; |             u32 flags; | ||||||
| 
 | 
 | ||||||
|             BitField< 0, 1, u32> flip_vertically;  // flips input data vertically
 |             BitField< 0, 1, u32> flip_vertically;  // flips input data vertically
 | ||||||
|             BitField< 1, 1, u32> output_tiled;     // Converts from linear to tiled format
 |             BitField< 1, 1, u32> input_linear;     // Converts from linear to tiled format
 | ||||||
|             BitField< 3, 1, u32> raw_copy;         // Copies the data without performing any processing
 |             BitField< 2, 1, u32> crop_input_lines; | ||||||
|  |             BitField< 3, 1, u32> is_texture_copy;  // Copies the data without performing any processing and respecting texture copy fields
 | ||||||
|             BitField< 5, 1, u32> dont_swizzle; |             BitField< 5, 1, u32> dont_swizzle; | ||||||
|             BitField< 8, 3, PixelFormat> input_format; |             BitField< 8, 3, PixelFormat> input_format; | ||||||
|             BitField<12, 3, PixelFormat> output_format; |             BitField<12, 3, PixelFormat> output_format; | ||||||
| 
 |             /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one.
 | ||||||
|  |             BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented
 | ||||||
|             BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer
 |             BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer
 | ||||||
|         }; |         }; | ||||||
| 
 | 
 | ||||||
| @ -214,10 +216,30 @@ struct Regs { | |||||||
| 
 | 
 | ||||||
|         // it seems that writing to this field triggers the display transfer
 |         // it seems that writing to this field triggers the display transfer
 | ||||||
|         u32 trigger; |         u32 trigger; | ||||||
|     } display_transfer_config; |  | ||||||
|     ASSERT_MEMBER_SIZE(display_transfer_config, 0x1c); |  | ||||||
| 
 | 
 | ||||||
|     INSERT_PADDING_WORDS(0x331); |         INSERT_PADDING_WORDS(0x1); | ||||||
|  | 
 | ||||||
|  |         struct { | ||||||
|  |             u32 size; | ||||||
|  | 
 | ||||||
|  |             union { | ||||||
|  |                 u32 input_size; | ||||||
|  | 
 | ||||||
|  |                 BitField< 0, 16, u32> input_width; | ||||||
|  |                 BitField<16, 16, u32> input_gap; | ||||||
|  |             }; | ||||||
|  | 
 | ||||||
|  |             union { | ||||||
|  |                 u32 output_size; | ||||||
|  | 
 | ||||||
|  |                 BitField< 0, 16, u32> output_width; | ||||||
|  |                 BitField<16, 16, u32> output_gap; | ||||||
|  |             }; | ||||||
|  |         } texture_copy; | ||||||
|  |     } display_transfer_config; | ||||||
|  |     ASSERT_MEMBER_SIZE(display_transfer_config, 0x2c); | ||||||
|  | 
 | ||||||
|  |     INSERT_PADDING_WORDS(0x32D); | ||||||
| 
 | 
 | ||||||
|     struct { |     struct { | ||||||
|         // command list size (in bytes)
 |         // command list size (in bytes)
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Yuri Kunde Schlesner
						Yuri Kunde Schlesner