Skip to content

Commit

Permalink
Merge branch 'instance-spatial-map' into 'main'
Browse files Browse the repository at this point in the history
[REMIX-2432] optimizing CPU instance overhead

See merge request lightspeedrtx/dxvk-remix-nv!644
  • Loading branch information
MarkEHenderson committed Jan 19, 2024
2 parents 18514d1 + db9de2e commit 1f5b5dd
Show file tree
Hide file tree
Showing 15 changed files with 449 additions and 120 deletions.
5 changes: 5 additions & 0 deletions documentation/UnitTest.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,8 @@ These tests currently focus on the important SSE operations and math algorithms
- The new unit tests should have minimal number of dependencies. If there are too many dependencies, do refactoring on the code that is tested, and make sure it's compatible to the unit test.
- It is essential that the unit tests offer complete coverage. This requires a carefully design of the test data to cover every possible branch and scenario. Make sure the expected result are correct. Moreover, for ease of debugging, put comments with details about each test.
- In cases where a test fails to deliver the expected outcome, it is important to do error handling and descriptive logging.

## Running Locally

1. Open a powershell window in `dxvk-remix-nv/` and run `.\build_dxvk.ps1 -BuildFlavour release -BuildSubDir _Comp64UnitTest -Backend ninja -EnableTracy false unit_tests`
2. Then run `.\_Comp64UnitTest\tests\rtx\unit\<test_name>.exe`
4 changes: 2 additions & 2 deletions src/dxvk/imgui/dxvk_imgui.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2002,13 +2002,13 @@ namespace dxvk {

if (ImGui::CollapsingHeader("Step 2: Parameter Tuning", collapsingHeaderClosedFlags)) {
ImGui::Indent();
ImGui::DragFloat("Scene Unit Scale", &RtxOptions::Get()->sceneScaleObject(), 0.01f, 0.01f, FLT_MAX, "%.3f", sliderFlags);
ImGui::DragFloat("Scene Unit Scale", &RtxOptions::Get()->sceneScaleObject(), 0.01f, 0.00001f, FLT_MAX, "%.3f", sliderFlags);
ImGui::Checkbox("Scene Z-Up", &RtxOptions::Get()->zUpObject());
ImGui::Checkbox("Scene Left-Handed", &RtxOptions::Get()->isLHSObject());
fusedWorldViewModeCombo.getKey(&RtxOptions::Get()->fusedWorldViewModeRef());
ImGui::Separator();

ImGui::DragFloat("Unique Object Search Distance", &RtxOptions::Get()->uniqueObjectDistanceObject(), 0.01f, 0.01f, FLT_MAX, "%.3f", sliderFlags);
ImGui::DragFloat("Unique Object Search Distance", &RtxOptions::Get()->uniqueObjectDistanceObject(), 0.01f, FLT_MIN, FLT_MAX, "%.3f", sliderFlags);
ImGui::Separator();

ImGui::DragFloat("Vertex Color Strength", &RtxOptions::Get()->vertexColorStrengthObject(), 0.001f, 0.0f, 1.0f);
Expand Down
6 changes: 6 additions & 0 deletions src/dxvk/rtx_render/rtx_draw_call_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ class DrawCallCache : public CommonDeviceObject {
void clear() {
m_entries.clear();
}

void rebuildSpatialMaps() {
for (auto& iter = m_entries.begin(); iter != m_entries.end(); ++iter) {
iter->second.rebuildSpatialMap();
}
}

private:
MultimapType m_entries;
Expand Down
208 changes: 113 additions & 95 deletions src/dxvk/rtx_render/rtx_instance_manager.cpp

Large diffs are not rendered by default.

29 changes: 24 additions & 5 deletions src/dxvk/rtx_render/rtx_instance_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,30 @@ class RtInstance {
Vector3 getWorldPosition() const { return { m_vkInstance.transform.matrix[0][3], m_vkInstance.transform.matrix[1][3], m_vkInstance.transform.matrix[2][3] }; }
const Vector3& getPrevWorldPosition() const { return surface.prevObjectToWorld.data[3].xyz(); }

const Vector3& getSpatialCachePosition() const { return m_spatialCachePos; }
void removeFromSpatialCache() const {
if (m_isCreatedByRenderer) {
return;
}
m_linkedBlas->getSpatialMap().erase(m_spatialCachePos, this);
}

bool isCreatedThisFrame(uint32_t frameIndex) const { return frameIndex == m_frameCreated; }

// Bind a BLAS object to this instance
void setBlas(BlasEntry& blas);
// Set the transform for this instance, returns true if object has moved
bool setTransform(const Matrix4& objectToWorld);
// Set the transform for this instance for current frame only, returns true if object has moved
bool setCurrentTransform(const Matrix4& objectToWorld);
void setPrevTransform(const Matrix4& objectToWorld);

// Sets current and previous transforms explicitly
bool teleport(const Matrix4& objectToWorld);
bool teleport(const Matrix4& objectToWorld, const Matrix4& prevObjectToWorld);
// Changes all transform data from an old context to a new context (i.e. when an instance moves through a portal).
void teleportWithHistory(const Matrix4& oldToNew);

// Move to the new transform and retain previous transforms as history (call the first time a transform changes per frame)
bool move(const Matrix4& objectToWorld);
// Move to the new transform without changing history (call if the transform is changed multiple times per frame)
bool moveAgain(const Matrix4& objectToWorld);

void setFrameCreated(const uint32_t frameIndex);
// Returns if this is the first occurence in a given frame
bool setFrameLastUpdated(const uint32_t frameIndex);
Expand Down Expand Up @@ -135,6 +150,8 @@ class RtInstance {

bool isUnlinkedForGC() const { return m_isUnlinkedForGC; }
private:

void onTransformChanged();
friend class InstanceManager;

const uint64_t m_id;
Expand Down Expand Up @@ -183,6 +200,8 @@ class RtInstance {

CategoryFlags m_categoryFlags;

Vector3 m_spatialCachePos = Vector3(0.f);

public:

// Not really needed in this struct, just to store it somewhere for a batched build
Expand Down
2 changes: 1 addition & 1 deletion src/dxvk/rtx_render/rtx_light_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ namespace dxvk {
if (newLight.getBufferIdx() != kNewLightIdx || newLight.isChildOfMesh())
continue;

float similarity = isSimilar(light, newLight, RtxOptions::Get()->getUniqueObjectDistance());
float similarity = isSimilar(light, newLight, RtxOptions::uniqueObjectDistance());
// Update the cached light if it's similar.
if (similarity > currentSimilarity) {
similarLight = newPair.first;
Expand Down
8 changes: 6 additions & 2 deletions src/dxvk/rtx_render/rtx_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,9 @@ namespace dxvk {
RTX_OPTION("rtx", bool, enableSecondaryBounces, true, "Enables indirect lighting (lighting from diffuse/specular bounces to one or more other surfaces) on surfaces when set to true, otherwise disables it.");
RTX_OPTION("rtx", bool, zUp, false, "Indicates that the Z axis is the \"upward\" axis in the world when true, otherwise the Y axis when false.");
RTX_OPTION("rtx", bool, isLHS, false, "");
RTX_OPTION("rtx", float, uniqueObjectDistance, 300.f, "[cm]");
RTX_OPTION("rtx", float, uniqueObjectDistance, 300.f, "The distance (in game units) that an object can move in a single frame before it is no longer considered the same object.\n"
"If this is too low, fast moving objects may flicker and have bad lighting. If it's too high, repeated objects may flicker.\n"
"This does not account for sceneScale.");
RTX_OPTION_FLAG_ENV("rtx", UIType, showUI, UIType::None, RtxOptionFlags::NoSave | RtxOptionFlags::NoReset, "RTX_GUI_DISPLAY_UI", "0 = Don't Show, 1 = Show Simple, 2 = Show Advanced.");
RTX_OPTION_FLAG("rtx", bool, defaultToAdvancedUI, false, RtxOptionFlags::NoReset, "");
RTX_OPTION("rtx", bool, showUICursor, true, "");
Expand Down Expand Up @@ -948,6 +950,9 @@ namespace dxvk {
if (sourceRootPath() == "./")
sourceRootPathRef() = getCurrentDirectory() + "/";

// Needs to be > 0
RTX_OPTION_CLAMP_MIN(uniqueObjectDistance, FLT_MIN);

RTX_OPTION_CLAMP_MIN(emissiveIntensity, 0.0f);
// Note: Clamp to positive values as negative luminance thresholds are not valid.
RTX_OPTION_CLAMP_MIN(fireflyFilteringLuminanceThreshold, 0.0f);
Expand Down Expand Up @@ -1211,7 +1216,6 @@ namespace dxvk {
bool shouldCaptureDebugImage() const { return captureDebugImage(); }
bool isLiveShaderEditModeEnabled() const { return useLiveShaderEditMode(); }
bool isZUp() const { return zUp(); }
float getUniqueObjectDistance() const { return uniqueObjectDistance(); }
float getUniqueObjectDistanceSqr() const { return uniqueObjectDistance() * uniqueObjectDistance(); }
float getResolutionScale() const { return resolutionScale(); }
DLSSProfile getDLSSQuality() const { return qualityDLSS(); }
Expand Down
8 changes: 7 additions & 1 deletion src/dxvk/rtx_render/rtx_scene_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ namespace dxvk {
, m_pReplacer(new AssetReplacer())
, m_terrainBaker(new TerrainBaker())
, m_cameraManager(device)
, m_startTime(std::chrono::steady_clock::now()) {
, m_startTime(std::chrono::steady_clock::now())
, m_uniqueObjectSearchDistance(RtxOptions::uniqueObjectDistance()) {
InstanceEventHandler instanceEvents(this);
instanceEvents.onInstanceAddedCallback = [this](const RtInstance& instance) { onInstanceAdded(instance); };
instanceEvents.onInstanceUpdatedCallback = [this](RtInstance& instance, const RtSurfaceMaterial& material, bool hasTransformChanged, bool hasVerticesChanged) { onInstanceUpdated(instance, material, hasTransformChanged, hasVerticesChanged); };
Expand Down Expand Up @@ -474,6 +475,11 @@ namespace dxvk {
m_terrainBaker->onFrameEnd(ctx);

m_activePOMCount = 0;

if (m_uniqueObjectSearchDistance != RtxOptions::uniqueObjectDistance()) {
m_uniqueObjectSearchDistance = RtxOptions::uniqueObjectDistance();
m_drawCallCache.rebuildSpatialMaps();
}
}

void SceneManager::onFrameEndNoRTX() {
Expand Down
2 changes: 2 additions & 0 deletions src/dxvk/rtx_render/rtx_scene_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,8 @@ class SceneManager : public CommonDeviceObject, public ResourceCache {
bool m_useFixedFrameTime = false;
std::chrono::time_point<std::chrono::steady_clock> m_startTime;
uint32_t m_activePOMCount = 0;

float m_uniqueObjectSearchDistance = 1.f;

struct DrawCallMetaInfo {
XXH64_hash_t legacyTextureHash { kEmptyHash };
Expand Down
32 changes: 32 additions & 0 deletions src/dxvk/rtx_render/rtx_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "rtx_types.h"
#include "rtx_options.h"
#include "rtx_terrain_baker.h"
#include "rtx_instance_manager.h"

namespace dxvk {
uint32_t RasterGeometry::calculatePrimitiveCount() const {
Expand Down Expand Up @@ -195,4 +196,35 @@ namespace dxvk {
setCategory(InstanceCategories::Sky, shouldBakeSky(*this));
setCategory(InstanceCategories::Terrain, shouldBakeTerrain(*this));
}

BlasEntry::BlasEntry(const DrawCallState& input_)
: input(input_), m_spatialMap(RtxOptions::uniqueObjectDistance() * 2.f) {
if (RtxOptions::uniqueObjectDistance() <= 0.f) {
ONCE(Logger::err("rtx.uniqueObjectDistance must be greater than 0."));
}
}

void BlasEntry::unlinkInstance(const RtInstance* instance) {
instance->removeFromSpatialCache();
auto& it = std::find(m_linkedInstances.begin(), m_linkedInstances.end(), instance);
if (it != m_linkedInstances.end()) {
// Swap & pop - faster than "erase", but doesn't preserve order, which is fine here.
std::swap(*it, m_linkedInstances.back());
m_linkedInstances.pop_back();
} else {
ONCE(Logger::err("Tried to unlink an instance, which was never linked!"));
}
}

void BlasEntry::rebuildSpatialMap() {
InstanceMap newMap(RtxOptions::uniqueObjectDistance() * 2.f);

for (const auto& iter : m_spatialMap.getAll()){
for (const RtInstance* instance : iter.second) {
newMap.insert(instance->getSpatialCachePosition(), instance);
}
}
m_spatialMap = std::move(newMap);
}

} // namespace dxvk
22 changes: 10 additions & 12 deletions src/dxvk/rtx_render/rtx_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "rtx_camera.h"
#include "vulkan/vulkan_core.h"
#include "../../util/util_threadpool.h"
#include "../../util/util_spatial_map.h"

#include <inttypes.h>
#include <vector>
Expand Down Expand Up @@ -562,12 +563,13 @@ struct BlasEntry {
// Frame when the vertex data of this geometry was last updated, used to detect static geometries
uint32_t frameLastUpdated = kInvalidFrameIndex;

using InstanceMap = SpatialMap<const RtInstance*>;

Rc<PooledBlas> staticBlas;

BlasEntry() = default;

BlasEntry(const DrawCallState& input_)
: input(input_) { }
BlasEntry(const DrawCallState& input_);

void cacheMaterial(const LegacyMaterialData& newMaterial) {
if (input.getMaterialData().getHash() != newMaterial.getHash()) {
Expand Down Expand Up @@ -595,21 +597,17 @@ struct BlasEntry {
m_linkedInstances.push_back(instance);
}

void unlinkInstance(const RtInstance* instance) {
auto& it = std::find(m_linkedInstances.begin(), m_linkedInstances.end(), instance);
if (it != m_linkedInstances.end()) {
// Swap & pop - faster than "erase", but doesn't preserve order, which is fine here.
std::swap(*it, m_linkedInstances.back());
m_linkedInstances.pop_back();
} else {
Logger::err("Tried to unlink an instance, which was never linked!");
}
}
void unlinkInstance(const RtInstance* instance);

const std::vector<const RtInstance*>& getLinkedInstances() const { return m_linkedInstances; }
InstanceMap& getSpatialMap() { return m_spatialMap; }
const InstanceMap& getSpatialMap() const { return m_spatialMap; }

void rebuildSpatialMap();

private:
std::vector<const RtInstance*> m_linkedInstances;
InstanceMap m_spatialMap;
std::unordered_map<XXH64_hash_t, LegacyMaterialData> m_materials;
};

Expand Down
133 changes: 133 additions & 0 deletions src/util/util_spatial_map.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/

#pragma once
#include <unordered_map>

#include "util_vector.h"
#include "./log/log.h"

namespace dxvk {
// A structure to allow for quickly returning data close to a specific position.
template<class T>
class SpatialMap {
public:
SpatialMap(float cellSize) : m_cellSize(cellSize) {
if (m_cellSize <= 0) {
ONCE(Logger::err("Invalid cell size in SpatialMap. cellSize must be greater than 0."));
m_cellSize = 1.f;
}
}

SpatialMap& operator=(SpatialMap&& other) {
m_cellSize = other.m_cellSize;
m_cache = std::move(other.m_cache);
return *this;
}

// returns the 8 cells closest to `position`
const std::vector<const std::vector<T>*> getDataNearPos(const Vector3& position) const {
static const Vector3i kOffsets[] = {
{0, 0, 0},
{0, 0, 1},
{0, 1, 0},
{0, 1, 1},
{1, 0, 0},
{1, 0, 1},
{1, 1, 0},
{1, 1, 1}
};
std::vector<const std::vector<T>*> result;
result.reserve(8);

const Vector3 cellPosition = position / m_cellSize - Vector3(0.5f, 0.5f, 0.5f);
const Vector3i floorPos(int(std::floor(cellPosition.x)), int(std::floor(cellPosition.y)), int(std::floor(cellPosition.z)));

for (const Vector3i& offset : kOffsets) {
auto iter = m_cache.find(floorPos + offset);
if (iter != m_cache.end()) {
const std::vector<T>* value = &iter->second;
result.push_back(value);
}
}

return result;
};

void insert(const Vector3& position, T data) {
insert(getCellPos(position), data);
}

void erase(const Vector3& position, T data) {
erase(getCellPos(position), data);
}

void move(const Vector3& oldPosition, const Vector3& newPosition, T data) {
Vector3i oldPos = getCellPos(oldPosition);
Vector3i newPos = getCellPos(newPosition);
if (oldPos != newPos) {
erase(oldPosition, data);
insert(newPos, data);
}
}

const std::unordered_map<Vector3i, std::vector<T>>& getAll() {
return m_cache;
}

private:

Vector3i getCellPos(const Vector3& position) const {
const Vector3 scaledPos = position / m_cellSize;
return Vector3i(int(std::floor(scaledPos.x)), int(std::floor(scaledPos.y)), int(std::floor(scaledPos.z)));
}

void insert(const Vector3i& pos, T data) {
m_cache[pos].push_back(data);
}

void erase(const Vector3i& pos, T data) {
auto cellIter = m_cache.find(pos);
if (cellIter == m_cache.end()) {
Logger::err("Specified cell was already empty in SpatialMap::erase().");
return;
}

std::vector<T>& cell = cellIter->second;
auto& iter = std::find(cell.begin(), cell.end(), data);
if (iter != cell.end()) {
if (cell.size() > 1) {
// Swap & pop - faster than "erase", but doesn't preserve order, which is fine here.
std::swap(*iter, cell.back());
cell.pop_back();
} else {
m_cache.erase(cellIter);
}
} else {
Logger::err("Couldn't find matching data in SpatialMap::erase().");
}
}

float m_cellSize;
std::unordered_map<Vector3i, std::vector<T>> m_cache;
};
}
Loading

0 comments on commit 1f5b5dd

Please sign in to comment.