From 56d79c76a7305f0d59ed543230e92d42eda14319 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Tue, 23 Dec 2025 19:55:33 +1000 Subject: [PATCH] GPU: Extract heavier helper functions to own header --- src/core/CMakeLists.txt | 1 + src/core/core.vcxproj | 1 + src/core/core.vcxproj.filters | 1 + src/core/cpu_pgxp.cpp | 2 +- src/core/gpu.cpp | 1 + src/core/gpu.h | 4 - src/core/gpu_commands.cpp | 1 + src/core/gpu_dump.h | 1 + src/core/gpu_helpers.h | 262 ++++++++++++++++++++++++++++ src/core/gpu_hw.cpp | 1 + src/core/gpu_hw_shadergen.cpp | 1 + src/core/gpu_hw_texture_cache.cpp | 1 + src/core/gpu_hw_texture_cache.h | 2 + src/core/gpu_sw.cpp | 1 + src/core/gpu_sw_rasterizer.cpp | 1 + src/core/gpu_thread.cpp | 2 +- src/core/gpu_types.h | 272 +----------------------------- src/core/imgui_overlays.cpp | 1 + src/core/memory_card_image.cpp | 2 +- 19 files changed, 283 insertions(+), 275 deletions(-) create mode 100644 src/core/gpu_helpers.h diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 537d4ea33..fd7f328a9 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -61,6 +61,7 @@ add_library(core gpu_commands.cpp gpu_dump.cpp gpu_dump.h + gpu_helpers.h gpu_hw.cpp gpu_hw.h gpu_hw_shadergen.cpp diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index c613e7a1c..c53b19ec3 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -127,6 +127,7 @@ + diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters index 84cde47b8..8d802024a 100644 --- a/src/core/core.vcxproj.filters +++ b/src/core/core.vcxproj.filters @@ -157,6 +157,7 @@ + diff --git a/src/core/cpu_pgxp.cpp b/src/core/cpu_pgxp.cpp index 013eb952f..d983664b6 100644 --- a/src/core/cpu_pgxp.cpp +++ b/src/core/cpu_pgxp.cpp @@ -10,7 +10,7 @@ #include "cpu_core.h" #include "cpu_core_private.h" #include "cpu_disasm.h" -#include "gpu_types.h" +#include "gpu_helpers.h" #include "settings.h" #include "util/gpu_device.h" diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 1ff52300d..d4d0103a4 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -6,6 +6,7 @@ #include "dma.h" #include "gpu_backend.h" #include "gpu_dump.h" +#include "gpu_helpers.h" #include "gpu_hw_texture_cache.h" #include "gpu_shadergen.h" #include "gpu_sw_rasterizer.h" diff --git a/src/core/gpu.h b/src/core/gpu.h index 9f33aea75..e8269ad0c 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -4,12 +4,8 @@ #pragma once #include "gpu_types.h" -#include "timers.h" #include "types.h" -#include "util/gpu_device.h" -#include "util/gpu_texture.h" - #include "common/bitfield.h" #include "common/fifo_queue.h" #include "common/gsvector.h" diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index ce354335f..7dbcc02a7 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -5,6 +5,7 @@ #include "gpu.h" #include "gpu_backend.h" #include "gpu_dump.h" +#include "gpu_helpers.h" #include "gpu_thread_commands.h" #include "interrupt_controller.h" #include "system.h" diff --git a/src/core/gpu_dump.h b/src/core/gpu_dump.h index c2a0a8359..bbc2cc9b8 100644 --- a/src/core/gpu_dump.h +++ b/src/core/gpu_dump.h @@ -4,6 +4,7 @@ #pragma once #include "gpu_types.h" +#include "types.h" #include "common/bitfield.h" #include "common/file_system.h" diff --git a/src/core/gpu_helpers.h b/src/core/gpu_helpers.h new file mode 100644 index 000000000..418b00aad --- /dev/null +++ b/src/core/gpu_helpers.h @@ -0,0 +1,262 @@ +// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +#pragma once + +#include "gpu_types.h" + +#include "util/gpu_types.h" + +#include "common/bitutils.h" +#include "common/gsvector.h" + +ALWAYS_INLINE static constexpr bool TextureModeHasPalette(GPUTextureMode mode) +{ + return (mode < GPUTextureMode::Direct16Bit); +} + +ALWAYS_INLINE constexpr u32 VRAMRGBA5551ToRGBA8888(u32 color) +{ + // Helper/format conversion functions - constants from https://stackoverflow.com/a/9069480 +#define E5TO8(color) ((((color) * 527u) + 23u) >> 6) + + const u32 r = E5TO8(color & 31u); + const u32 g = E5TO8((color >> 5) & 31u); + const u32 b = E5TO8((color >> 10) & 31u); + const u32 a = ((color >> 15) != 0) ? 255 : 0; + return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16) | (ZeroExtend32(a) << 24); + +#undef E5TO8 +} + +ALWAYS_INLINE constexpr u16 VRAMRGBA8888ToRGBA5551(u32 color) +{ + const u32 r = (color & 0xFFu) >> 3; + const u32 g = ((color >> 8) & 0xFFu) >> 3; + const u32 b = ((color >> 16) & 0xFFu) >> 3; + const u32 a = ((color >> 24) & 0x01u); + return Truncate16(r | (g << 5) | (b << 10) | (a << 15)); +} + +#ifdef CPU_ARCH_SIMD + +ALWAYS_INLINE GSVector4i VRAM5BitTo8Bit(GSVector4i val) +{ + return val.mul32l(GSVector4i::cxpr(527)).add32(GSVector4i::cxpr(23)).srl32<6>(); +} + +ALWAYS_INLINE GSVector4i VRAMRGB5A1ToRGBA8888(GSVector4i val) +{ + static constexpr GSVector4i cmask = GSVector4i::cxpr(0x1F); + + const GSVector4i r = VRAM5BitTo8Bit(val & cmask); + const GSVector4i g = VRAM5BitTo8Bit((val.srl32<5>() & cmask)); + const GSVector4i b = VRAM5BitTo8Bit((val.srl32<10>() & cmask)); + const GSVector4i a = val.srl32<15>().sll32<31>().sra32<7>(); + + return r | g.sll32<8>() | b.sll32<16>() | a; +} + +template +ALWAYS_INLINE void ConvertVRAMPixels(u8*& dest, GSVector4i c16) +{ + if constexpr (format == GPUTextureFormat::RGBA8) + { + const GSVector4i low = VRAMRGB5A1ToRGBA8888(c16.upl16()); + const GSVector4i high = VRAMRGB5A1ToRGBA8888(c16.uph16()); + + GSVector4i::store(dest, low); + dest += sizeof(GSVector4i); + + GSVector4i::store(dest, high); + dest += sizeof(GSVector4i); + } + else if constexpr (format == GPUTextureFormat::RGB5A1) + { + static constexpr GSVector4i cmask = GSVector4i::cxpr16(0x1F); + + const GSVector4i repacked = + (c16 & GSVector4i::cxpr16(static_cast(0x83E0))) | (c16.srl16<10>() & cmask) | (c16 & cmask).sll16<10>(); + + GSVector4i::store(dest, repacked); + dest += sizeof(GSVector4i); + } + else if constexpr (format == GPUTextureFormat::A1BGR5) + { + const GSVector4i repacked = (c16 & GSVector4i::cxpr16(static_cast(0x3E0))).sll16<1>() | + (c16.srl16<9>() & GSVector4i::cxpr16(0x3E)) | + (c16 & GSVector4i::cxpr16(0x1F)).sll16<11>() | c16.srl16<15>(); + + GSVector4i::store(dest, repacked); + dest += sizeof(GSVector4i); + } + else if constexpr (format == GPUTextureFormat::RGB565) + { + constexpr GSVector4i single_mask = GSVector4i::cxpr16(0x1F); + const GSVector4i a = (c16 & GSVector4i::cxpr16(0x3E0)).sll16<1>(); // (value & 0x3E0) << 1 + const GSVector4i b = (c16 & GSVector4i::cxpr16(0x20)).sll16<1>(); // (value & 0x20) << 1 + const GSVector4i c = (c16.srl16<10>() & single_mask); // ((value >> 10) & 0x1F) + const GSVector4i d = (c16 & single_mask).sll16<11>(); // ((value & 0x1F) << 11) + GSVector4i::store(dest, (((a | b) | c) | d)); + dest += sizeof(GSVector4i); + } +} + +#endif + +template +ALWAYS_INLINE void ConvertVRAMPixel(u8*& dest, u16 c16) +{ + if constexpr (format == GPUTextureFormat::RGBA8) + { + const u32 c32 = VRAMRGBA5551ToRGBA8888(c16); + std::memcpy(std::assume_aligned(dest), &c32, sizeof(c32)); + dest += sizeof(c32); + } + else if constexpr (format == GPUTextureFormat::RGB5A1) + { + const u16 repacked = (c16 & 0x83E0) | ((c16 >> 10) & 0x1F) | ((c16 & 0x1F) << 10); + std::memcpy(std::assume_aligned(dest), &repacked, sizeof(repacked)); + dest += sizeof(repacked); + } + else if constexpr (format == GPUTextureFormat::A1BGR5) + { + const u16 repacked = ((c16 & 0x3E0) << 1) | ((c16 >> 9) & 0x3E) | ((c16 & 0x1F) << 11) | (c16 >> 15); + std::memcpy(std::assume_aligned(dest), &repacked, sizeof(repacked)); + dest += sizeof(repacked); + } + else if constexpr (format == GPUTextureFormat::RGB565) + { + const u16 repacked = ((c16 & 0x3E0) << 1) | ((c16 & 0x20) << 1) | ((c16 >> 10) & 0x1F) | ((c16 & 0x1F) << 11); + std::memcpy(std::assume_aligned(dest), &repacked, sizeof(repacked)); + dest += sizeof(repacked); + } +} + +// Sprites/rectangles should be clipped to 11 bits before drawing. +inline constexpr s32 TruncateGPUVertexPosition(s32 x) +{ + return SignExtendN<11, s32>(x); +} + +ALWAYS_INLINE constexpr u32 VRAMPageIndex(u32 px, u32 py) +{ + return ((py * VRAM_PAGES_WIDE) + px); +} +ALWAYS_INLINE constexpr GSVector4i VRAMPageRect(u32 px, u32 py) +{ + return GSVector4i::cxpr(px * VRAM_PAGE_WIDTH, py * VRAM_PAGE_HEIGHT, (px + 1) * VRAM_PAGE_WIDTH, + (py + 1) * VRAM_PAGE_HEIGHT); +} +ALWAYS_INLINE constexpr GSVector4i VRAMPageRect(u32 pn) +{ + // TODO: Put page rects in a LUT instead? + return VRAMPageRect(pn % VRAM_PAGES_WIDE, pn / VRAM_PAGES_WIDE); +} + +ALWAYS_INLINE constexpr u32 VRAMCoordinateToPage(u32 x, u32 y) +{ + return VRAMPageIndex(x / VRAM_PAGE_WIDTH, y / VRAM_PAGE_HEIGHT); +} + +ALWAYS_INLINE constexpr u32 VRAMPageStartX(u32 pn) +{ + return (pn % VRAM_PAGES_WIDE) * VRAM_PAGE_WIDTH; +} + +ALWAYS_INLINE constexpr u32 VRAMPageStartY(u32 pn) +{ + return (pn / VRAM_PAGES_WIDE) * VRAM_PAGE_HEIGHT; +} + +ALWAYS_INLINE constexpr u8 GetTextureModeShift(GPUTextureMode mode) +{ + return ((mode < GPUTextureMode::Direct16Bit) ? (2 - static_cast(mode)) : 0); +} + +ALWAYS_INLINE constexpr u32 ApplyTextureModeShift(GPUTextureMode mode, u32 vram_width) +{ + return vram_width << GetTextureModeShift(mode); +} + +ALWAYS_INLINE GSVector4i ApplyTextureModeShift(GPUTextureMode mode, const GSVector4i rect) +{ + return rect.sll32(GetTextureModeShift(mode)); +} + +ALWAYS_INLINE constexpr u32 TexturePageCountForMode(GPUTextureMode mode) +{ + return ((mode < GPUTextureMode::Direct16Bit) ? (1 + static_cast(mode)) : 4); +} + +ALWAYS_INLINE constexpr u32 TexturePageWidthForMode(GPUTextureMode mode) +{ + return TEXTURE_PAGE_WIDTH >> GetTextureModeShift(mode); +} + +ALWAYS_INLINE constexpr bool TexturePageIsWrapping(GPUTextureMode mode, u32 pn) +{ + return ((VRAMPageStartX(pn) + TexturePageWidthForMode(mode)) > VRAM_WIDTH); +} + +ALWAYS_INLINE constexpr u32 PalettePageCountForMode(GPUTextureMode mode) +{ + return (mode == GPUTextureMode::Palette4Bit) ? 1 : 4; +} + +ALWAYS_INLINE constexpr u32 PalettePageNumber(GPUTexturePaletteReg reg) +{ + return VRAMCoordinateToPage(reg.GetXBase(), reg.GetYBase()); +} + +ALWAYS_INLINE constexpr GSVector4i GetTextureRect(u32 pn, GPUTextureMode mode) +{ + u32 left = VRAMPageStartX(pn); + u32 top = VRAMPageStartY(pn); + u32 right = left + TexturePageWidthForMode(mode); + u32 bottom = top + VRAM_PAGE_HEIGHT; + if (right > VRAM_WIDTH) [[unlikely]] + { + left = 0; + right = VRAM_WIDTH; + } + if (bottom > VRAM_HEIGHT) [[unlikely]] + { + top = 0; + bottom = VRAM_HEIGHT; + } + + return GSVector4i::cxpr(left, top, right, bottom); +} + +ALWAYS_INLINE constexpr GSVector4i GetTextureRectWithoutWrap(u32 pn, GPUTextureMode mode) +{ + const u32 left = VRAMPageStartX(pn); + const u32 top = VRAMPageStartY(pn); + const u32 right = std::min(left + TexturePageWidthForMode(mode), VRAM_WIDTH); + const u32 bottom = top + VRAM_PAGE_HEIGHT; + return GSVector4i::cxpr(left, top, right, bottom); +} + +/// Returns the maximum index for a paletted texture. +ALWAYS_INLINE constexpr u32 GetPaletteWidth(GPUTextureMode mode) +{ + return (mode == GPUTextureMode::Palette4Bit ? 16 : ((mode == GPUTextureMode::Palette8Bit) ? 256 : 0)); +} + +/// Returns a rectangle comprising the texture palette area. +ALWAYS_INLINE constexpr GSVector4i GetPaletteRect(GPUTexturePaletteReg palette, GPUTextureMode mode, + bool clamp_instead_of_wrapping = false) +{ + const u32 width = GetPaletteWidth(mode); + u32 left = palette.GetXBase(); + u32 top = palette.GetYBase(); + u32 right = left + width; + u32 bottom = top + 1; + if (right > VRAM_WIDTH) [[unlikely]] + { + right = VRAM_WIDTH; + left = clamp_instead_of_wrapping ? left : 0; + } + return GSVector4i::cxpr(left, top, right, bottom); +} diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 76ee424a6..6525475e4 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -7,6 +7,7 @@ #include "cpu_pgxp.h" #include "fullscreenui_widgets.h" #include "gpu.h" +#include "gpu_helpers.h" #include "gpu_hw_shadergen.h" #include "gpu_presenter.h" #include "gpu_sw_rasterizer.h" diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 9c4e179ff..fdbd817c4 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -5,6 +5,7 @@ // #include "gpu_hw_shadergen.h" +#include "gpu_types.h" #include "common/assert.h" diff --git a/src/core/gpu_hw_texture_cache.cpp b/src/core/gpu_hw_texture_cache.cpp index 9be5ffc03..5ceec0093 100644 --- a/src/core/gpu_hw_texture_cache.cpp +++ b/src/core/gpu_hw_texture_cache.cpp @@ -4,6 +4,7 @@ #include "gpu_hw_texture_cache.h" #include "fullscreenui_widgets.h" #include "game_database.h" +#include "gpu_helpers.h" #include "gpu_hw.h" #include "gpu_hw_shadergen.h" #include "gpu_sw_rasterizer.h" diff --git a/src/core/gpu_hw_texture_cache.h b/src/core/gpu_hw_texture_cache.h index bc84782a5..f7d103d2d 100644 --- a/src/core/gpu_hw_texture_cache.h +++ b/src/core/gpu_hw_texture_cache.h @@ -5,6 +5,8 @@ #include "gpu_types.h" +#include "common/gsvector.h" + class Error; class Image; class GPUTexture; diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index d8ad41341..7bea61475 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -3,6 +3,7 @@ #include "gpu_sw.h" #include "gpu.h" +#include "gpu_helpers.h" #include "gpu_presenter.h" #include "gpu_sw_rasterizer.h" #include "settings.h" diff --git a/src/core/gpu_sw_rasterizer.cpp b/src/core/gpu_sw_rasterizer.cpp index e934aeebf..3ae9378ca 100644 --- a/src/core/gpu_sw_rasterizer.cpp +++ b/src/core/gpu_sw_rasterizer.cpp @@ -3,6 +3,7 @@ #include "gpu_sw_rasterizer.h" #include "gpu.h" +#include "gpu_helpers.h" #include "cpuinfo.h" diff --git a/src/core/gpu_thread.cpp b/src/core/gpu_thread.cpp index dc3f71e14..ac2b4ba40 100644 --- a/src/core/gpu_thread.cpp +++ b/src/core/gpu_thread.cpp @@ -97,7 +97,7 @@ namespace { struct ALIGN_TO_CACHE_LINE State { // Owned by CPU thread. - ALIGN_TO_CACHE_LINE Timer::Value thread_spin_time = 0; + Timer::Value thread_spin_time = 0; Threading::ThreadHandle gpu_thread; Common::unique_aligned_ptr command_fifo_data; WindowInfo render_window_info; diff --git a/src/core/gpu_types.h b/src/core/gpu_types.h index 8013c7657..7d6937017 100644 --- a/src/core/gpu_types.h +++ b/src/core/gpu_types.h @@ -1,17 +1,11 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #pragma once -#include "types.h" - -#include "util/gpu_texture.h" - #include "common/bitfield.h" #include "common/bitutils.h" -#include "common/gsvector.h" - -#include +#include "common/types.h" enum : u32 { @@ -79,11 +73,6 @@ enum class GPUTextureMode : u8 IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPUTextureMode); -ALWAYS_INLINE static constexpr bool TextureModeHasPalette(GPUTextureMode mode) -{ - return (mode < GPUTextureMode::Direct16Bit); -} - enum class GPUTransparencyMode : u8 { HalfBackgroundPlusHalfForeground = 0, @@ -227,124 +216,6 @@ union GPUSTAT } }; -ALWAYS_INLINE constexpr u32 VRAMRGBA5551ToRGBA8888(u32 color) -{ - // Helper/format conversion functions - constants from https://stackoverflow.com/a/9069480 -#define E5TO8(color) ((((color) * 527u) + 23u) >> 6) - - const u32 r = E5TO8(color & 31u); - const u32 g = E5TO8((color >> 5) & 31u); - const u32 b = E5TO8((color >> 10) & 31u); - const u32 a = ((color >> 15) != 0) ? 255 : 0; - return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16) | (ZeroExtend32(a) << 24); - -#undef E5TO8 -} - -ALWAYS_INLINE constexpr u16 VRAMRGBA8888ToRGBA5551(u32 color) -{ - const u32 r = (color & 0xFFu) >> 3; - const u32 g = ((color >> 8) & 0xFFu) >> 3; - const u32 b = ((color >> 16) & 0xFFu) >> 3; - const u32 a = ((color >> 24) & 0x01u); - return Truncate16(r | (g << 5) | (b << 10) | (a << 15)); -} - -#ifdef CPU_ARCH_SIMD - -ALWAYS_INLINE GSVector4i VRAM5BitTo8Bit(GSVector4i val) -{ - return val.mul32l(GSVector4i::cxpr(527)).add32(GSVector4i::cxpr(23)).srl32<6>(); -} - -ALWAYS_INLINE GSVector4i VRAMRGB5A1ToRGBA8888(GSVector4i val) -{ - static constexpr GSVector4i cmask = GSVector4i::cxpr(0x1F); - - const GSVector4i r = VRAM5BitTo8Bit(val & cmask); - const GSVector4i g = VRAM5BitTo8Bit((val.srl32<5>() & cmask)); - const GSVector4i b = VRAM5BitTo8Bit((val.srl32<10>() & cmask)); - const GSVector4i a = val.srl32<15>().sll32<31>().sra32<7>(); - - return r | g.sll32<8>() | b.sll32<16>() | a; -} - -template -ALWAYS_INLINE void ConvertVRAMPixels(u8*& dest, GSVector4i c16) -{ - if constexpr (format == GPUTextureFormat::RGBA8) - { - const GSVector4i low = VRAMRGB5A1ToRGBA8888(c16.upl16()); - const GSVector4i high = VRAMRGB5A1ToRGBA8888(c16.uph16()); - - GSVector4i::store(dest, low); - dest += sizeof(GSVector4i); - - GSVector4i::store(dest, high); - dest += sizeof(GSVector4i); - } - else if constexpr (format == GPUTextureFormat::RGB5A1) - { - static constexpr GSVector4i cmask = GSVector4i::cxpr16(0x1F); - - const GSVector4i repacked = - (c16 & GSVector4i::cxpr16(static_cast(0x83E0))) | (c16.srl16<10>() & cmask) | (c16 & cmask).sll16<10>(); - - GSVector4i::store(dest, repacked); - dest += sizeof(GSVector4i); - } - else if constexpr (format == GPUTextureFormat::A1BGR5) - { - const GSVector4i repacked = (c16 & GSVector4i::cxpr16(static_cast(0x3E0))).sll16<1>() | - (c16.srl16<9>() & GSVector4i::cxpr16(0x3E)) | - (c16 & GSVector4i::cxpr16(0x1F)).sll16<11>() | c16.srl16<15>(); - - GSVector4i::store(dest, repacked); - dest += sizeof(GSVector4i); - } - else if constexpr (format == GPUTextureFormat::RGB565) - { - constexpr GSVector4i single_mask = GSVector4i::cxpr16(0x1F); - const GSVector4i a = (c16 & GSVector4i::cxpr16(0x3E0)).sll16<1>(); // (value & 0x3E0) << 1 - const GSVector4i b = (c16 & GSVector4i::cxpr16(0x20)).sll16<1>(); // (value & 0x20) << 1 - const GSVector4i c = (c16.srl16<10>() & single_mask); // ((value >> 10) & 0x1F) - const GSVector4i d = (c16 & single_mask).sll16<11>(); // ((value & 0x1F) << 11) - GSVector4i::store(dest, (((a | b) | c) | d)); - dest += sizeof(GSVector4i); - } -} - -#endif - -template -ALWAYS_INLINE void ConvertVRAMPixel(u8*& dest, u16 c16) -{ - if constexpr (format == GPUTextureFormat::RGBA8) - { - const u32 c32 = VRAMRGBA5551ToRGBA8888(c16); - std::memcpy(std::assume_aligned(dest), &c32, sizeof(c32)); - dest += sizeof(c32); - } - else if constexpr (format == GPUTextureFormat::RGB5A1) - { - const u16 repacked = (c16 & 0x83E0) | ((c16 >> 10) & 0x1F) | ((c16 & 0x1F) << 10); - std::memcpy(std::assume_aligned(dest), &repacked, sizeof(repacked)); - dest += sizeof(repacked); - } - else if constexpr (format == GPUTextureFormat::A1BGR5) - { - const u16 repacked = ((c16 & 0x3E0) << 1) | ((c16 >> 9) & 0x3E) | ((c16 & 0x1F) << 11) | (c16 >> 15); - std::memcpy(std::assume_aligned(dest), &repacked, sizeof(repacked)); - dest += sizeof(repacked); - } - else if constexpr (format == GPUTextureFormat::RGB565) - { - const u16 repacked = ((c16 & 0x3E0) << 1) | ((c16 & 0x20) << 1) | ((c16 >> 10) & 0x1F) | ((c16 & 0x1F) << 11); - std::memcpy(std::assume_aligned(dest), &repacked, sizeof(repacked)); - dest += sizeof(repacked); - } -} - union GPUVertexPosition { u32 bits; @@ -353,12 +224,6 @@ union GPUVertexPosition BitField y; }; -// Sprites/rectangles should be clipped to 11 bits before drawing. -inline constexpr s32 TruncateGPUVertexPosition(s32 x) -{ - return SignExtendN<11, s32>(x); -} - // bits in GP0(E1h) or texpage part of polygon union GPUDrawModeReg { @@ -416,139 +281,10 @@ union GPUTextureWindow u32 bits; - ALWAYS_INLINE bool operator==(const GPUTextureWindow& rhs) const - { - return (std::memcmp(this, &rhs, sizeof(*this)) == 0); - } - - ALWAYS_INLINE bool operator!=(const GPUTextureWindow& rhs) const - { - return (std::memcmp(this, &rhs, sizeof(*this)) != 0); - } + ALWAYS_INLINE bool operator==(const GPUTextureWindow& rhs) const { return (bits == rhs.bits); } + ALWAYS_INLINE bool operator!=(const GPUTextureWindow& rhs) const { return (bits != rhs.bits); } }; -ALWAYS_INLINE constexpr u32 VRAMPageIndex(u32 px, u32 py) -{ - return ((py * VRAM_PAGES_WIDE) + px); -} -ALWAYS_INLINE constexpr GSVector4i VRAMPageRect(u32 px, u32 py) -{ - return GSVector4i::cxpr(px * VRAM_PAGE_WIDTH, py * VRAM_PAGE_HEIGHT, (px + 1) * VRAM_PAGE_WIDTH, - (py + 1) * VRAM_PAGE_HEIGHT); -} -ALWAYS_INLINE constexpr GSVector4i VRAMPageRect(u32 pn) -{ - // TODO: Put page rects in a LUT instead? - return VRAMPageRect(pn % VRAM_PAGES_WIDE, pn / VRAM_PAGES_WIDE); -} - -ALWAYS_INLINE constexpr u32 VRAMCoordinateToPage(u32 x, u32 y) -{ - return VRAMPageIndex(x / VRAM_PAGE_WIDTH, y / VRAM_PAGE_HEIGHT); -} - -ALWAYS_INLINE constexpr u32 VRAMPageStartX(u32 pn) -{ - return (pn % VRAM_PAGES_WIDE) * VRAM_PAGE_WIDTH; -} - -ALWAYS_INLINE constexpr u32 VRAMPageStartY(u32 pn) -{ - return (pn / VRAM_PAGES_WIDE) * VRAM_PAGE_HEIGHT; -} - -ALWAYS_INLINE constexpr u8 GetTextureModeShift(GPUTextureMode mode) -{ - return ((mode < GPUTextureMode::Direct16Bit) ? (2 - static_cast(mode)) : 0); -} - -ALWAYS_INLINE constexpr u32 ApplyTextureModeShift(GPUTextureMode mode, u32 vram_width) -{ - return vram_width << GetTextureModeShift(mode); -} - -ALWAYS_INLINE GSVector4i ApplyTextureModeShift(GPUTextureMode mode, const GSVector4i rect) -{ - return rect.sll32(GetTextureModeShift(mode)); -} - -ALWAYS_INLINE constexpr u32 TexturePageCountForMode(GPUTextureMode mode) -{ - return ((mode < GPUTextureMode::Direct16Bit) ? (1 + static_cast(mode)) : 4); -} - -ALWAYS_INLINE constexpr u32 TexturePageWidthForMode(GPUTextureMode mode) -{ - return TEXTURE_PAGE_WIDTH >> GetTextureModeShift(mode); -} - -ALWAYS_INLINE constexpr bool TexturePageIsWrapping(GPUTextureMode mode, u32 pn) -{ - return ((VRAMPageStartX(pn) + TexturePageWidthForMode(mode)) > VRAM_WIDTH); -} - -ALWAYS_INLINE constexpr u32 PalettePageCountForMode(GPUTextureMode mode) -{ - return (mode == GPUTextureMode::Palette4Bit) ? 1 : 4; -} - -ALWAYS_INLINE constexpr u32 PalettePageNumber(GPUTexturePaletteReg reg) -{ - return VRAMCoordinateToPage(reg.GetXBase(), reg.GetYBase()); -} - -ALWAYS_INLINE constexpr GSVector4i GetTextureRect(u32 pn, GPUTextureMode mode) -{ - u32 left = VRAMPageStartX(pn); - u32 top = VRAMPageStartY(pn); - u32 right = left + TexturePageWidthForMode(mode); - u32 bottom = top + VRAM_PAGE_HEIGHT; - if (right > VRAM_WIDTH) [[unlikely]] - { - left = 0; - right = VRAM_WIDTH; - } - if (bottom > VRAM_HEIGHT) [[unlikely]] - { - top = 0; - bottom = VRAM_HEIGHT; - } - - return GSVector4i::cxpr(left, top, right, bottom); -} - -ALWAYS_INLINE constexpr GSVector4i GetTextureRectWithoutWrap(u32 pn, GPUTextureMode mode) -{ - const u32 left = VRAMPageStartX(pn); - const u32 top = VRAMPageStartY(pn); - const u32 right = std::min(left + TexturePageWidthForMode(mode), VRAM_WIDTH); - const u32 bottom = top + VRAM_PAGE_HEIGHT; - return GSVector4i::cxpr(left, top, right, bottom); -} - -/// Returns the maximum index for a paletted texture. -ALWAYS_INLINE constexpr u32 GetPaletteWidth(GPUTextureMode mode) -{ - return (mode == GPUTextureMode::Palette4Bit ? 16 : ((mode == GPUTextureMode::Palette8Bit) ? 256 : 0)); -} - -/// Returns a rectangle comprising the texture palette area. -ALWAYS_INLINE constexpr GSVector4i GetPaletteRect(GPUTexturePaletteReg palette, GPUTextureMode mode, - bool clamp_instead_of_wrapping = false) -{ - const u32 width = GetPaletteWidth(mode); - u32 left = palette.GetXBase(); - u32 top = palette.GetYBase(); - u32 right = left + width; - u32 bottom = top + 1; - if (right > VRAM_WIDTH) [[unlikely]] - { - right = VRAM_WIDTH; - left = clamp_instead_of_wrapping ? left : 0; - } - return GSVector4i::cxpr(left, top, right, bottom); -} - // 4x4 dither matrix. inline constexpr s32 DITHER_MATRIX[DITHER_MATRIX_SIZE][DITHER_MATRIX_SIZE] = {{-4, +0, -3, +1}, // row 0 {+2, -2, +3, -1}, // row 1 diff --git a/src/core/imgui_overlays.cpp b/src/core/imgui_overlays.cpp index 10f6c6918..486c9392b 100644 --- a/src/core/imgui_overlays.cpp +++ b/src/core/imgui_overlays.cpp @@ -20,6 +20,7 @@ #include "settings.h" #include "spu.h" #include "system.h" +#include "timers.h" #include "util/gpu_device.h" #include "util/imgui_animated.h" diff --git a/src/core/memory_card_image.cpp b/src/core/memory_card_image.cpp index 80fdd1607..b7af45ac2 100644 --- a/src/core/memory_card_image.cpp +++ b/src/core/memory_card_image.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #include "memory_card_image.h" -#include "gpu_types.h" +#include "gpu_helpers.h" #include "system.h" #include "util/shiftjis.h"