From 4bccf990f3495fcc9d8ad10f974acb6fbc1699cf Mon Sep 17 00:00:00 2001 From: DH Date: Fri, 1 Nov 2024 09:51:50 +0300 Subject: [PATCH] gpu: reduce cpu usage on cache commands --- rpcsx/gpu/Device.cpp | 18 +++++++++++++++--- rpcsx/gpu/DeviceContext.hpp | 11 +++++++---- rpcsx/iodev/dce.cpp | 24 ++++++++++++++++++++---- 3 files changed, 42 insertions(+), 11 deletions(-) diff --git a/rpcsx/gpu/Device.cpp b/rpcsx/gpu/Device.cpp index f5dfd44..4f8ee3d 100644 --- a/rpcsx/gpu/Device.cpp +++ b/rpcsx/gpu/Device.cpp @@ -210,7 +210,14 @@ Device::Device() : vkContext(createVkContext(this)) { cacheUpdateThread = std::jthread([this](const std::stop_token &stopToken) { auto &sched = graphicsPipes[0].scheduler; + std::uint32_t prevIdleValue = 0; while (!stopToken.stop_requested()) { + if (gpuCacheCommandIdle.wait(prevIdleValue) != std::errc{}) { + continue; + } + + prevIdleValue = gpuCacheCommandIdle.load(std::memory_order::acquire); + for (int vmId = 0; vmId < kMaxProcessCount; ++vmId) { auto page = gpuCacheCommand[vmId].load(std::memory_order::relaxed); if (page == 0) { @@ -996,11 +1003,16 @@ static void notifyPageChanges(Device *device, int vmId, std::uint32_t firstPage, (static_cast(pageCount - 1) << 32) | firstPage; while (true) { - for (std::size_t i = 0; i < std::size(device->cacheCommands); ++i) { + for (std::size_t i = 0; i < std::size(device->cpuCacheCommands); ++i) { std::uint64_t expCommand = 0; - if (device->cacheCommands[vmId][i].compare_exchange_strong( - expCommand, command, std::memory_order::acquire, + if (device->cpuCacheCommands[vmId][i].compare_exchange_strong( + expCommand, command, std::memory_order::release, std::memory_order::relaxed)) { + device->cpuCacheCommandsIdle[vmId].fetch_add( + 1, std::memory_order::release); + device->cpuCacheCommandsIdle[vmId].notify_one(); + + while (device->cpuCacheCommands[vmId][i].load(std::memory_order::acquire) != 0) {} return; } } diff --git a/rpcsx/gpu/DeviceContext.hpp b/rpcsx/gpu/DeviceContext.hpp index 70c36ac..dc2f37d 100644 --- a/rpcsx/gpu/DeviceContext.hpp +++ b/rpcsx/gpu/DeviceContext.hpp @@ -1,5 +1,6 @@ #pragma once +#include "orbis/utils/SharedAtomic.hpp" #include #include @@ -66,10 +67,12 @@ enum { struct DeviceContext { static constexpr auto kMaxProcessCount = 6; - PadState kbPadState; - std::atomic cacheCommands[kMaxProcessCount][4]; - std::atomic gpuCacheCommand[kMaxProcessCount]; - std::atomic *cachePages[kMaxProcessCount]; + PadState kbPadState{}; + std::atomic cpuCacheCommands[kMaxProcessCount][4]{}; + orbis::shared_atomic32 cpuCacheCommandsIdle[kMaxProcessCount]{}; + orbis::shared_atomic32 gpuCacheCommand[kMaxProcessCount]{}; + orbis::shared_atomic32 gpuCacheCommandIdle{}; + std::atomic *cachePages[kMaxProcessCount]{}; volatile std::uint32_t flipBuffer[kMaxProcessCount]; volatile std::uint64_t flipArg[kMaxProcessCount]; diff --git a/rpcsx/iodev/dce.cpp b/rpcsx/iodev/dce.cpp index 0ca39c9..80ccf04 100644 --- a/rpcsx/iodev/dce.cpp +++ b/rpcsx/iodev/dce.cpp @@ -137,15 +137,26 @@ static void runBridge(int vmId) { auto gpu = amdgpu::DeviceCtl{orbis::g_context.gpuDevice}; auto &gpuCtx = gpu.getContext(); std::vector fetchedCommands; - fetchedCommands.reserve(std::size(gpuCtx.cacheCommands)); + fetchedCommands.reserve(std::size(gpuCtx.cpuCacheCommands)); + + std::vector *> fetchedAtomics; + std::uint32_t prevIdleValue = 0; while (true) { - for (auto &command : gpuCtx.cacheCommands) { - std::uint64_t value = command[vmId].load(std::memory_order::relaxed); + if (gpuCtx.cpuCacheCommandsIdle[vmId].wait(prevIdleValue) != + std::errc{}) { + continue; + } + + prevIdleValue = + gpuCtx.cpuCacheCommandsIdle[vmId].load(std::memory_order::acquire); + + for (auto &command : gpuCtx.cpuCacheCommands[vmId]) { + std::uint64_t value = command.load(std::memory_order::relaxed); if (value != 0) { fetchedCommands.push_back(value); - command[vmId].store(0, std::memory_order::relaxed); + fetchedAtomics.push_back(&command); } } @@ -187,7 +198,12 @@ static void runBridge(int vmId) { } } + for (auto fetchedAtomic : fetchedAtomics) { + fetchedAtomic->store(0, std::memory_order::release); + } + fetchedCommands.clear(); + fetchedAtomics.clear(); } }}.detach(); }