Compare commits

..

3 Commits

Author SHA1 Message Date
gdkchan
b34de74f81 Avoid adding shader buffer descriptors for constant buffers that are not used (#3478)
* Avoid adding shader buffer descriptors for constant buffers that are not used

* Shader cache version
2022-07-23 11:15:58 -03:00
riperiperi
5811d121df Avoid scaling 2d textures that could be used as 3d (#3464) 2022-07-15 09:24:13 -03:00
Logan Stromberg
6eb85e846f Reduce some unnecessary allocations in DMA handler (#2886)
* experimental changes to try and reduce allocations in kernel threading and DMA handler

* Simplify the changes in this branch to just 1. Don't make unnecessary copies of data just for texture-texture transfers and 2. Add a fast path for 1bpp linear byte copies

* forgot to check src + dst linearity in 1bpp DMA fast path. Fixes the UE4 regression.

* removing dev log I left in

* Generalizing the DMA linear fast path to cases other than 1bpp copies

* revert kernel changes

* revert whitespace

* remove unneeded references

* PR feedback

Co-authored-by: Logan Stromberg <lostromb@microsoft.com>
Co-authored-by: gdk <gab.dark.100@gmail.com>
2022-07-14 15:45:56 -03:00
8 changed files with 55 additions and 29 deletions

View File

@@ -208,7 +208,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma
} }
ReadOnlySpan<byte> srcSpan = memoryManager.GetSpan(srcGpuVa + (ulong)srcBaseOffset, srcSize, true); ReadOnlySpan<byte> srcSpan = memoryManager.GetSpan(srcGpuVa + (ulong)srcBaseOffset, srcSize, true);
Span<byte> dstSpan = memoryManager.GetSpan(dstGpuVa + (ulong)dstBaseOffset, dstSize).ToArray();
bool completeSource = IsTextureCopyComplete(src, srcLinear, srcBpp, srcStride, xCount, yCount); bool completeSource = IsTextureCopyComplete(src, srcLinear, srcBpp, srcStride, xCount, yCount);
bool completeDest = IsTextureCopyComplete(dst, dstLinear, dstBpp, dstStride, xCount, yCount); bool completeDest = IsTextureCopyComplete(dst, dstLinear, dstBpp, dstStride, xCount, yCount);
@@ -262,20 +261,33 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma
target.SynchronizeMemory(); target.SynchronizeMemory();
target.SetData(data); target.SetData(data);
target.SignalModified(); target.SignalModified();
return; return;
} }
else if (srcCalculator.LayoutMatches(dstCalculator)) else if (srcCalculator.LayoutMatches(dstCalculator))
{ {
srcSpan.CopyTo(dstSpan); // No layout conversion has to be performed, just copy the data entirely. // No layout conversion has to be performed, just copy the data entirely.
memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, srcSpan);
memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, dstSpan);
return; return;
} }
} }
unsafe bool Convert<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan) where T : unmanaged unsafe bool Convert<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan) where T : unmanaged
{
if (srcLinear && dstLinear && srcBpp == dstBpp)
{
// Optimized path for purely linear copies - we don't need to calculate every single byte offset,
// and we can make use of Span.CopyTo which is very very fast (even compared to pointers)
for (int y = 0; y < yCount; y++)
{
srcCalculator.SetY(srcRegionY + y);
dstCalculator.SetY(dstRegionY + y);
int srcOffset = srcCalculator.GetOffset(srcRegionX);
int dstOffset = dstCalculator.GetOffset(dstRegionX);
srcSpan.Slice(srcOffset - srcBaseOffset, xCount * srcBpp)
.CopyTo(dstSpan.Slice(dstOffset - dstBaseOffset, xCount * dstBpp));
}
}
else
{ {
fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan) fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan)
{ {
@@ -296,9 +308,16 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma
} }
} }
} }
}
return true; return true;
} }
// OPT: This allocates a (potentially) huge temporary array and then copies an existing
// region of memory into it, data that might get overwritten entirely anyways. Ideally this should
// all be rewritten to use pooled arrays, but that gets complicated with packed data and strides
Span<byte> dstSpan = memoryManager.GetSpan(dstGpuVa + (ulong)dstBaseOffset, dstSize).ToArray();
bool _ = srcBpp switch bool _ = srcBpp switch
{ {
1 => Convert<byte>(dstSpan, srcSpan), 1 => Convert<byte>(dstSpan, srcSpan),

View File

@@ -174,6 +174,14 @@ namespace Ryujinx.Graphics.Gpu.Image
} }
} }
if (info.Width == info.Height * info.Height)
{
// Possibly used for a "3D texture" drawn onto a 2D surface.
// Some games do this to generate a tone mapping LUT without rendering into 3D texture slices.
return false;
}
return true; return true;
} }

View File

@@ -21,7 +21,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
private const ushort FileFormatVersionMajor = 1; private const ushort FileFormatVersionMajor = 1;
private const ushort FileFormatVersionMinor = 1; private const ushort FileFormatVersionMinor = 1;
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
private const uint CodeGenVersion = 3457; private const uint CodeGenVersion = 3478;
private const string SharedTocFileName = "shared.toc"; private const string SharedTocFileName = "shared.toc";
private const string SharedDataFileName = "shared.data"; private const string SharedDataFileName = "shared.data";

View File

@@ -45,12 +45,12 @@ namespace Ryujinx.Graphics.Shader.Instructions
if (isFP64) if (isFP64)
{ {
return context.PackDouble2x32( return context.PackDouble2x32(
context.Config.CreateCbuf(cbufSlot, cbufOffset), Cbuf(cbufSlot, cbufOffset),
context.Config.CreateCbuf(cbufSlot, cbufOffset + 1)); Cbuf(cbufSlot, cbufOffset + 1));
} }
else else
{ {
return context.Config.CreateCbuf(cbufSlot, cbufOffset); return Cbuf(cbufSlot, cbufOffset);
} }
} }

View File

@@ -300,6 +300,11 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
if (operand.Type != OperandType.LocalVariable) if (operand.Type != OperandType.LocalVariable)
{ {
if (operand.Type == OperandType.ConstantBuffer)
{
Config.SetUsedConstantBuffer(operand.GetCbufSlot());
}
return new AstOperand(operand); return new AstOperand(operand);
} }

View File

@@ -68,7 +68,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{ {
Operand addrLow = operation.GetSource(0); Operand addrLow = operation.GetSource(0);
Operand baseAddrLow = config.CreateCbuf(0, GetStorageCbOffset(config.Stage, storageIndex)); Operand baseAddrLow = Cbuf(0, GetStorageCbOffset(config.Stage, storageIndex));
Operand baseAddrTrunc = Local(); Operand baseAddrTrunc = Local();
@@ -152,7 +152,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{ {
Operand addrLow = operation.GetSource(0); Operand addrLow = operation.GetSource(0);
Operand baseAddrLow = config.CreateCbuf(0, UbeBaseOffset + storageIndex * StorageDescSize); Operand baseAddrLow = Cbuf(0, UbeBaseOffset + storageIndex * StorageDescSize);
Operand baseAddrTrunc = Local(); Operand baseAddrTrunc = Local();

View File

@@ -75,9 +75,9 @@ namespace Ryujinx.Graphics.Shader.Translation
int cbOffset = GetStorageCbOffset(config.Stage, slot); int cbOffset = GetStorageCbOffset(config.Stage, slot);
Operand baseAddrLow = config.CreateCbuf(0, cbOffset); Operand baseAddrLow = Cbuf(0, cbOffset);
Operand baseAddrHigh = config.CreateCbuf(0, cbOffset + 1); Operand baseAddrHigh = Cbuf(0, cbOffset + 1);
Operand size = config.CreateCbuf(0, cbOffset + 2); Operand size = Cbuf(0, cbOffset + 2);
Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow); Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow);
Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow); Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow);

View File

@@ -360,12 +360,6 @@ namespace Ryujinx.Graphics.Shader.Translation
UsedFeatures |= flags; UsedFeatures |= flags;
} }
public Operand CreateCbuf(int slot, int offset)
{
SetUsedConstantBuffer(slot);
return OperandHelper.Cbuf(slot, offset);
}
public void SetUsedConstantBuffer(int slot) public void SetUsedConstantBuffer(int slot)
{ {
_usedConstantBuffers |= 1 << slot; _usedConstantBuffers |= 1 << slot;