From d74e4217fdbc3ad584f58f55d080d10ef3dc4f19 Mon Sep 17 00:00:00 2001 From: JellySquid Date: Tue, 7 Jan 2025 15:29:16 -0600 Subject: [PATCH] Improve vertex sorting code and use radix sorts For larger arrays of floats (>80 elements), the radix sort is significantly faster. We also merge the process of calculating each quad's centroid into the metric calculation, so that we can avoid expensive object de-references, and so that it isn't necessary to allocate additional memory prior to sorting. --- .../bsp_tree/InnerPartitionBSPNode.java | 44 ++---- .../data/DynamicTopoData.java | 29 ++-- .../data/StaticNormalRelativeData.java | 60 ++------ .../client/util/sorting/AbstractSort.java | 14 -- .../client/util/sorting/InsertionSort.java | 27 ---- .../sodium/client/util/sorting/MergeSort.java | 89 ----------- .../sodium/client/util/sorting/RadixSort.java | 91 ++++++++---- .../client/util/sorting/VertexSorters.java | 138 +++++++++++++++--- .../util/sorting/VertexSortingExtended.java | 12 ++ .../sorting/MeshDataAccessor.java | 12 ++ .../buffer_builder/sorting/MeshDataMixin.java | 47 ------ .../sorting/MultiBufferSourceMixin.java | 80 ++++++++++ .../sorting/VertexSortingMixin.java | 29 +++- .../main/resources/sodium-common.mixins.json | 3 +- 14 files changed, 352 insertions(+), 323 deletions(-) delete mode 100644 common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/AbstractSort.java delete mode 100644 common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/InsertionSort.java delete mode 100644 common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/MergeSort.java create mode 100644 common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/VertexSortingExtended.java create mode 100644 common/src/main/java/net/caffeinemc/mods/sodium/mixin/features/render/immediate/buffer_builder/sorting/MeshDataAccessor.java delete mode 100644 common/src/main/java/net/caffeinemc/mods/sodium/mixin/features/render/immediate/buffer_builder/sorting/MeshDataMixin.java create mode 100644 common/src/main/java/net/caffeinemc/mods/sodium/mixin/features/render/immediate/buffer_builder/sorting/MultiBufferSourceMixin.java diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/translucent_sorting/bsp_tree/InnerPartitionBSPNode.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/translucent_sorting/bsp_tree/InnerPartitionBSPNode.java index 3ee6572093..613dff0616 100644 --- a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/translucent_sorting/bsp_tree/InnerPartitionBSPNode.java +++ b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/translucent_sorting/bsp_tree/InnerPartitionBSPNode.java @@ -530,43 +530,25 @@ static private BSPNode buildTopoMultiLeafNode(BSPWorkspace workspace, IntArrayLi } static private BSPNode buildSNRLeafNodeFromQuads(BSPWorkspace workspace, IntArrayList indexes, LongArrayList points) { - // in this case the points array is wrong, but its allocation can be reused + final var indexBuffer = indexes.elements(); + final var indexCount = indexes.size(); - int[] quadIndexes; + final var keys = new int[indexCount]; + final var perm = new int[indexCount]; - // adapted from SNR sorting code - if (RadixSort.useRadixSort(indexes.size())) { - final var keys = new int[indexes.size()]; - - for (int i = 0; i < indexes.size(); i++) { - var quadIndex = indexes.getInt(i); - keys[i] = MathUtil.floatToComparableInt(workspace.quads[quadIndex].getAccurateDotProduct()); - } - - quadIndexes = RadixSort.sort(keys); - - for (int i = 0; i < indexes.size(); i++) { - quadIndexes[i] = indexes.getInt(quadIndexes[i]); - } - } else { - final var sortData = points.elements(); - - for (int i = 0; i < indexes.size(); i++) { - var quadIndex = indexes.getInt(i); - int dotProductComponent = MathUtil.floatToComparableInt(workspace.quads[quadIndex].getAccurateDotProduct()); - sortData[i] = (long) dotProductComponent << 32 | quadIndex; - } - - Arrays.sort(sortData, 0, indexes.size()); + for (int i = 0; i < indexCount; i++) { + TQuad quad = workspace.quads[indexBuffer[i]]; + keys[i] = MathUtil.floatToComparableInt(quad.getAccurateDotProduct()); + perm[i] = i; + } - quadIndexes = new int[indexes.size()]; + RadixSort.sortIndirect(perm, keys); - for (int i = 0; i < indexes.size(); i++) { - quadIndexes[i] = (int) sortData[i]; - } + for (int i = 0; i < indexCount; i++) { + perm[i] = indexBuffer[perm[i]]; } - return new LeafMultiBSPNode(BSPSortState.compressIndexes(IntArrayList.wrap(quadIndexes), false)); + return new LeafMultiBSPNode(BSPSortState.compressIndexes(IntArrayList.wrap(perm), false)); } static private BSPNode buildSNRLeafNodeFromPoints(BSPWorkspace workspace, LongArrayList points) { diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/translucent_sorting/data/DynamicTopoData.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/translucent_sorting/data/DynamicTopoData.java index 200ec74a93..2156bb3283 100644 --- a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/translucent_sorting/data/DynamicTopoData.java +++ b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/translucent_sorting/data/DynamicTopoData.java @@ -9,7 +9,6 @@ import org.joml.Vector3fc; import java.nio.IntBuffer; -import java.util.Arrays; import java.util.function.IntConsumer; /** @@ -221,30 +220,22 @@ void writeSort(CombinedCameraPos cameraPos, boolean initial) { */ static void distanceSortDirect(IntBuffer indexBuffer, TQuad[] quads, Vector3fc cameraPos) { if (quads.length <= 1) { + // Avoid allocations when there is nothing to sort. TranslucentData.writeQuadVertexIndexes(indexBuffer, 0); - } else if (RadixSort.useRadixSort(quads.length)) { + } else { final var keys = new int[quads.length]; + final var perm = new int[quads.length]; - for (int q = 0; q < quads.length; q++) { - keys[q] = ~Float.floatToRawIntBits(quads[q].getCenter().distanceSquared(cameraPos)); - } - - var indices = RadixSort.sort(keys); - - for (int i = 0; i < quads.length; i++) { - TranslucentData.writeQuadVertexIndexes(indexBuffer, indices[i]); - } - } else { - final var data = new long[quads.length]; - for (int q = 0; q < quads.length; q++) { - float distance = quads[q].getCenter().distanceSquared(cameraPos); - data[q] = (long) ~Float.floatToRawIntBits(distance) << 32 | q; + for (int idx = 0; idx < quads.length; idx++) { + var centroid = quads[idx].getCenter(); + keys[idx] = ~Float.floatToRawIntBits(centroid.distanceSquared(cameraPos)); + perm[idx] = idx; } - Arrays.sort(data); + RadixSort.sortIndirect(perm, keys); - for (int i = 0; i < quads.length; i++) { - TranslucentData.writeQuadVertexIndexes(indexBuffer, (int) data[i]); + for (int idx = 0; idx < quads.length; idx++) { + TranslucentData.writeQuadVertexIndexes(indexBuffer, perm[idx]); } } } diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/translucent_sorting/data/StaticNormalRelativeData.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/translucent_sorting/data/StaticNormalRelativeData.java index 888ccd2034..9998e67ce9 100644 --- a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/translucent_sorting/data/StaticNormalRelativeData.java +++ b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/translucent_sorting/data/StaticNormalRelativeData.java @@ -44,31 +44,21 @@ private static StaticNormalRelativeData fromDoubleUnaligned(int[] vertexCounts, var indexBuffer = sorter.getIntBuffer(); if (quads.length <= 1) { + // Avoid allocations when there is nothing to sort. TranslucentData.writeQuadVertexIndexes(indexBuffer, 0); - } else if (RadixSort.useRadixSort(quads.length)) { + } else { final var keys = new int[quads.length]; + final var perm = new int[quads.length]; for (int q = 0; q < quads.length; q++) { keys[q] = MathUtil.floatToComparableInt(quads[q].getAccurateDotProduct()); + perm[q] = q; } - var indices = RadixSort.sort(keys); - - for (int i = 0; i < quads.length; i++) { - TranslucentData.writeQuadVertexIndexes(indexBuffer, indices[i]); - } - } else { - final var sortData = new long[quads.length]; - - for (int q = 0; q < quads.length; q++) { - int dotProductComponent = MathUtil.floatToComparableInt(quads[q].getAccurateDotProduct()); - sortData[q] = (long) dotProductComponent << 32 | q; - } - - Arrays.sort(sortData); + RadixSort.sortIndirect(perm, keys); for (int i = 0; i < quads.length; i++) { - TranslucentData.writeQuadVertexIndexes(indexBuffer, (int) sortData[i]); + TranslucentData.writeQuadVertexIndexes(indexBuffer, perm[i]); } } @@ -86,21 +76,14 @@ private static StaticNormalRelativeData fromMixed(int[] vertexCounts, var indexBuffer = sorter.getIntBuffer(); var maxQuadCount = 0; - boolean anyNeedsSortData = false; + for (var vertexCount : vertexCounts) { if (vertexCount != -1) { var quadCount = TranslucentData.vertexCountToQuadCount(vertexCount); maxQuadCount = Math.max(maxQuadCount, quadCount); - anyNeedsSortData |= !RadixSort.useRadixSort(quadCount) && quadCount > 1; } } - long[] sortData = null; - if (anyNeedsSortData) { - sortData = new long[maxQuadCount]; - } - - int quadIndex = 0; for (var vertexCount : vertexCounts) { if (vertexCount == -1 || vertexCount == 0) { continue; @@ -110,32 +93,19 @@ private static StaticNormalRelativeData fromMixed(int[] vertexCounts, if (count == 1) { TranslucentData.writeQuadVertexIndexes(indexBuffer, 0); - quadIndex++; - } else if (RadixSort.useRadixSort(count)) { + } else { final var keys = new int[count]; + final var perm = new int[count]; - for (int q = 0; q < count; q++) { - keys[q] = MathUtil.floatToComparableInt(quads[quadIndex++].getAccurateDotProduct()); + for (int idx = 0; idx < count; idx++) { + keys[idx] = MathUtil.floatToComparableInt(quads[idx].getAccurateDotProduct()); + perm[idx] = idx; } - var indices = RadixSort.sort(keys); - - for (int i = 0; i < count; i++) { - TranslucentData.writeQuadVertexIndexes(indexBuffer, indices[i]); - } - } else { - for (int i = 0; i < count; i++) { - var quad = quads[quadIndex++]; - int dotProductComponent = MathUtil.floatToComparableInt(quad.getAccurateDotProduct()); - sortData[i] = (long) dotProductComponent << 32 | i; - } - - if (count > 1) { - Arrays.sort(sortData, 0, count); - } + RadixSort.sortIndirect(perm, keys); - for (int i = 0; i < count; i++) { - TranslucentData.writeQuadVertexIndexes(indexBuffer, (int) sortData[i]); + for (int idx = 0; idx < count; idx++) { + TranslucentData.writeQuadVertexIndexes(indexBuffer, perm[idx]); } } } diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/AbstractSort.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/AbstractSort.java deleted file mode 100644 index 93fd652f6e..0000000000 --- a/common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/AbstractSort.java +++ /dev/null @@ -1,14 +0,0 @@ -package net.caffeinemc.mods.sodium.client.util.sorting; - - -public class AbstractSort { - protected static int[] createIndexBuffer(int length) { - var indices = new int[length]; - - for (int i = 0; i < length; i++) { - indices[i] = i; - } - - return indices; - } -} diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/InsertionSort.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/InsertionSort.java deleted file mode 100644 index 08dfbc4ef9..0000000000 --- a/common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/InsertionSort.java +++ /dev/null @@ -1,27 +0,0 @@ -package net.caffeinemc.mods.sodium.client.util.sorting; - -public class InsertionSort extends AbstractSort { - public static void insertionSort(final int[] indices, final int fromIndex, final int toIndex, final float[] keys) { - int index = fromIndex; - - while (++index < toIndex) { - int t = indices[index]; - int j = index; - - int u = indices[j - 1]; - - while (keys[u] < keys[t]) { - indices[j] = u; - - if (fromIndex == j - 1) { - --j; - break; - } - - u = indices[--j - 1]; - } - - indices[j] = t; - } - } -} diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/MergeSort.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/MergeSort.java deleted file mode 100644 index 5083da0274..0000000000 --- a/common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/MergeSort.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (C) 2002-2017 Sebastiano Vigna - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * - * - * For the sorting and binary search code: - * - * Copyright (C) 1999 CERN - European Organization for Nuclear Research. - * - * Permission to use, copy, modify, distribute and sell this software and - * its documentation for any purpose is hereby granted without fee, - * provided that the above copyright notice appear in all copies and that - * both that copyright notice and this permission notice appear in - * supporting documentation. CERN makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without expressed or implied warranty. - */ -package net.caffeinemc.mods.sodium.client.util.sorting; - - -/** - * Based upon {@link it.unimi.dsi.fastutil.ints.IntArrays} implementation, but it eliminates the use of a user-supplied - * function and instead sorts an array of floats directly. This helps to improve runtime performance. - */ -public class MergeSort extends AbstractSort { - private static final int INSERTION_SORT_THRESHOLD = 16; - - public static int[] mergeSort(float[] keys) { - var indices = createIndexBuffer(keys.length); - mergeSort(indices, keys); - - return indices; - } - - private static void mergeSort(final int[] indices, final float[] keys) { - mergeSort(indices, keys, 0, indices.length, null); - } - - private static void mergeSort(final int[] indices, final float[] keys, final int fromIndex, final int toIndex, int[] supp) { - int len = toIndex - fromIndex; - - // Insertion sort on smallest arrays - if (len < INSERTION_SORT_THRESHOLD) { - InsertionSort.insertionSort(indices, fromIndex, toIndex, keys); - return; - } - - if (supp == null) { - supp = indices.clone(); - } - - // Recursively sort halves of a into supp - final int mid = (fromIndex + toIndex) >>> 1; - mergeSort(supp, keys, fromIndex, mid, indices); - mergeSort(supp, keys, mid, toIndex, indices); - - // If list is already sorted, just copy from supp to indices. This is an - // optimization that results in faster sorts for nearly ordered lists. - if (keys[supp[mid]] <= keys[supp[mid - 1]]) { - System.arraycopy(supp, fromIndex, indices, fromIndex, len); - return; - } - - // Merge sorted halves (now in supp) into indices - int i = fromIndex, p = fromIndex, q = mid; - - while (i < toIndex) { - if (q >= toIndex || p < mid && keys[supp[q]] <= keys[supp[p]]) { - indices[i] = supp[p++]; - } else { - indices[i] = supp[q++]; - } - - i++; - } - } -} diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/RadixSort.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/RadixSort.java index 00aedcb426..f96a056572 100644 --- a/common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/RadixSort.java +++ b/common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/RadixSort.java @@ -1,7 +1,9 @@ package net.caffeinemc.mods.sodium.client.util.sorting; -public class RadixSort extends AbstractSort { - public static final int RADIX_SORT_THRESHOLD = 64; +import it.unimi.dsi.fastutil.ints.IntArrays; + +public class RadixSort { + private static final int RADIX_SORT_THRESHOLD = 80; private static final int DIGIT_BITS = 8; private static final int RADIX_KEY_BITS = Integer.BYTES * 8; @@ -9,27 +11,15 @@ public class RadixSort extends AbstractSort { private static final int DIGIT_COUNT = (RADIX_KEY_BITS + DIGIT_BITS - 1) / DIGIT_BITS; private static final int DIGIT_MASK = (1 << DIGIT_BITS) - 1; - public static int[] sort(int[] keys) { - if (keys.length <= 1) { - return new int[keys.length]; - } - - return radixSort(keys, createHistogram(keys)); - } - - private static int[][] createHistogram(int[] keys) { - var histogram = new int[DIGIT_COUNT][BUCKET_COUNT]; - + private static void getHistogram(int[][] histogram, int[] keys) { for (final int key : keys) { for (int digit = 0; digit < DIGIT_COUNT; digit++) { histogram[digit][extractDigit(key, digit)] += 1; } } - - return histogram; } - private static void prefixSum(int[][] offsets) { + private static void prefixSums(int[][] offsets) { for (int digit = 0; digit < DIGIT_COUNT; digit++) { final var buckets = offsets[digit]; var sum = 0; @@ -42,13 +32,55 @@ private static void prefixSum(int[][] offsets) { } } - private static int[] radixSort(int[] keys, int[][] offsets) { - prefixSum(offsets); + /** + *

Sorts the specified array according to the natural ascending order using an unstable, out-of-place, indirect, + * 256-way LSD radix sort.

+ * + *

This algorithm is well suited for large arrays of integers, especially when they are uniformly distributed + * over the entire range of the data type.

+ * + *

This method implements an indirect sort. The elements of {@param perm} (which must be + * exactly the numbers in the interval {@code [0..perm.length)}) will be permuted so that + * {@code x[perm[i]] < x[perm[i + 1]]}.

+ * + *

While this radix sort is very fast on larger arrays, there is a certain amount of fixed cost involved in + * computing the histogram and prefix sums. Because of this, a fallback algorithm (currently quick sort) is used + * for very small arrays to ensure this method performs well for all inputs of all sizes.

+ * + * @param perm a permutation array indexing {@param keys}. + * @param keys the array of elements to be sorted. + */ + public static void sortIndirect(final int[] perm, final int[] keys) { + if (perm.length <= RADIX_SORT_THRESHOLD) { + smallSort(perm, keys); + return; + } + + int[][] offsets; + int[] next; + + try { + offsets = new int[DIGIT_COUNT][BUCKET_COUNT]; + next = new int[perm.length]; + } catch (OutOfMemoryError oom) { + // Not enough memory to perform an out-of-place sort, so use an in-place alternative. + fallbackInPlaceSort(perm, keys); + return; + } + + sortIndirect(perm, keys, offsets, next); + } - final var length = keys.length; + private static void sortIndirect(final int[] perm, + final int[] keys, + final int[][] offsets, + int[] next) + { + final int length = perm.length; + getHistogram(offsets, keys); + prefixSums(offsets); - int[] cur = createIndexBuffer(length); - int[] next = new int[length]; + int[] cur = perm; for (int digit = 0; digit < DIGIT_COUNT; digit++) { final var buckets = offsets[digit]; @@ -68,15 +100,22 @@ private static int[] radixSort(int[] keys, int[][] offsets) { cur = temp; } } + } + + private static void smallSort(int[] perm, int[] keys) { + if (perm.length <= 1) { + return; + } - return cur; + fallbackInPlaceSort(perm, keys); } - private static int extractDigit(int key, int digit) { - return ((key >>> (digit * DIGIT_BITS)) & DIGIT_MASK); + // Fallback sorting method which is guaranteed to be in-place and not require additional memory. + private static void fallbackInPlaceSort(int[] perm, int[] keys) { + IntArrays.quickSortIndirect(perm, keys); } - public static boolean useRadixSort(int length) { - return length >= RADIX_SORT_THRESHOLD; + private static int extractDigit(int key, int digit) { + return ((key >>> (digit * DIGIT_BITS)) & DIGIT_MASK); } } diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/VertexSorters.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/VertexSorters.java index 5a36dd1783..fab533b5cc 100644 --- a/common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/VertexSorters.java +++ b/common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/VertexSorters.java @@ -1,45 +1,141 @@ package net.caffeinemc.mods.sodium.client.util.sorting; import com.mojang.blaze3d.vertex.VertexSorting; +import net.caffeinemc.mods.sodium.client.util.MathUtil; +import org.apache.commons.lang3.Validate; +import org.jetbrains.annotations.NotNull; import org.joml.Vector3f; +import org.lwjgl.system.MemoryUtil; + +import java.nio.ByteBuffer; public class VertexSorters { - public static VertexSorting sortByDistance(Vector3f origin) { - return new SortByDistance(origin); + public static VertexSortingExtended distance(float x, float y, float z) { + if (x == 0.0f && y == 0.0f && z == 0.0f) { + return SortByDistanceToOrigin.INSTANCE; + } + + return new SortByDistanceToPoint(x, y, z); + } + + public static VertexSortingExtended orthographicZ() { + return SortByOrthographicZ.INSTANCE; } - private static class SortByDistance extends AbstractVertexSorter { - private final Vector3f origin; + // Slow, should only be used when none of the other classes apply. + public static VertexSortingExtended fallback(VertexSorting.DistanceFunction metric) { + return new SortByFallback(metric); + } + + private abstract static class AbstractSorter implements VertexSortingExtended { + @Override + public final int @NotNull [] sort(Vector3f[] centroids) { + final int length = centroids.length; + final var keys = new int[length]; + final var perm = new int[length]; + + for (int index = 0; index < length; index++) { + keys[index] = ~MathUtil.floatToComparableInt(this.applyMetric(centroids[index])); + perm[index] = index; + } - private SortByDistance(Vector3f origin) { - this.origin = origin; + RadixSort.sortIndirect(perm, keys); + + return perm; + } + } + + private static class SortByDistanceToPoint extends AbstractSorter { + private final float x, y, z; + + private SortByDistanceToPoint(float x, float y, float z) { + this.x = x; + this.y = y; + this.z = z; } @Override - protected float getKey(Vector3f position) { - return this.origin.distanceSquared(position); + public float applyMetric(float x, float y, float z) { + float dx = this.x - x; + float dy = this.y - y; + float dz = this.z - z; + + return (dx * dx) + (dy * dy) + (dz * dz); } } - /** - * Sorts the keys given by the subclass by descending value. - */ - private static abstract class AbstractVertexSorter implements VertexSorting { + private static class SortByDistanceToOrigin extends AbstractSorter { + private static final SortByDistanceToOrigin INSTANCE = new SortByDistanceToOrigin(); + @Override - public final int[] sort(Vector3f[] positions) { - return this.mergeSort(positions); + public float applyMetric(float x, float y, float z) { + return (x * x) + (y * y) + (z * z); } + } - private int[] mergeSort(Vector3f[] positions) { - final var keys = new float[positions.length]; + private static class SortByOrthographicZ extends AbstractSorter { + private static final SortByOrthographicZ INSTANCE = new SortByOrthographicZ(); - for (int index = 0; index < positions.length; index++) { - keys[index] = this.getKey(positions[index]); - } + @Override + public float applyMetric(float x, float y, float z) { + return -z; + } + } + + private static class SortByFallback extends AbstractSorter { + private final DistanceFunction function; + private final Vector3f scratch = new Vector3f(); + + private SortByFallback(DistanceFunction function) { + this.function = function; + } - return MergeSort.mergeSort(keys); + @Override + public float applyMetric(float x, float y, float z) { + return this.function.apply(this.scratch.set(x, y, z)); } + } + + public static int[] sort(ByteBuffer buffer, int vertexCount, int vertexStride, VertexSortingExtended sorting) { + Validate.isTrue(buffer.remaining() >= vertexStride * vertexCount, + "Vertex buffer is not large enough to contain all vertices"); + + long pVertex0 = MemoryUtil.memAddress(buffer); + long pVertex2 = MemoryUtil.memAddress(buffer, vertexStride * 2); + + int primitiveCount = vertexCount / 4; + int primitiveStride = vertexStride * 4; + + final int[] keys = new int[primitiveCount]; + final int[] perm = new int[primitiveCount]; + + for (int primitiveId = 0; primitiveId < primitiveCount; primitiveId++) { + // Position of vertex[0] + float v0x = MemoryUtil.memGetFloat(pVertex0 + 0L); + float v0y = MemoryUtil.memGetFloat(pVertex0 + 4L); + float v0z = MemoryUtil.memGetFloat(pVertex0 + 8L); + + // Position of vertex[2] + float v2x = MemoryUtil.memGetFloat(pVertex2 + 0L); + float v2y = MemoryUtil.memGetFloat(pVertex2 + 4L); + float v2z = MemoryUtil.memGetFloat(pVertex2 + 8L); + + // The centroid of the quad is calculated using the mid-point of the diagonal edge. This will not work + // for degenerate quads, but those are not sortable anyway. + float cx = (v0x + v2x) * 0.5F; + float cy = (v0y + v2y) * 0.5F; + float cz = (v0z + v2z) * 0.5F; + + // The sign bit of the metric is negated as we need back-to-front (descending) ordering. + keys[primitiveId] = MathUtil.floatToComparableInt(-sorting.applyMetric(cx, cy, cz)); + perm[primitiveId] = primitiveId; + + pVertex0 += primitiveStride; + pVertex2 += primitiveStride; + } + + RadixSort.sortIndirect(perm, keys); - protected abstract float getKey(Vector3f object); + return perm; } } diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/VertexSortingExtended.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/VertexSortingExtended.java new file mode 100644 index 0000000000..b04bc9eb99 --- /dev/null +++ b/common/src/main/java/net/caffeinemc/mods/sodium/client/util/sorting/VertexSortingExtended.java @@ -0,0 +1,12 @@ +package net.caffeinemc.mods.sodium.client.util.sorting; + +import com.mojang.blaze3d.vertex.VertexSorting; +import org.joml.Vector3f; + +public interface VertexSortingExtended extends VertexSorting { + float applyMetric(float x, float y, float z); + + default float applyMetric(Vector3f vector) { + return this.applyMetric(vector.x, vector.y, vector.z); + } +} diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/mixin/features/render/immediate/buffer_builder/sorting/MeshDataAccessor.java b/common/src/main/java/net/caffeinemc/mods/sodium/mixin/features/render/immediate/buffer_builder/sorting/MeshDataAccessor.java new file mode 100644 index 0000000000..096dfd9437 --- /dev/null +++ b/common/src/main/java/net/caffeinemc/mods/sodium/mixin/features/render/immediate/buffer_builder/sorting/MeshDataAccessor.java @@ -0,0 +1,12 @@ +package net.caffeinemc.mods.sodium.mixin.features.render.immediate.buffer_builder.sorting; + +import com.mojang.blaze3d.vertex.ByteBufferBuilder; +import com.mojang.blaze3d.vertex.MeshData; +import org.spongepowered.asm.mixin.Mixin; +import org.spongepowered.asm.mixin.gen.Accessor; + +@Mixin(MeshData.class) +public interface MeshDataAccessor { + @Accessor + void setIndexBuffer(ByteBufferBuilder.Result buffer); +} diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/mixin/features/render/immediate/buffer_builder/sorting/MeshDataMixin.java b/common/src/main/java/net/caffeinemc/mods/sodium/mixin/features/render/immediate/buffer_builder/sorting/MeshDataMixin.java deleted file mode 100644 index d05d3a9f06..0000000000 --- a/common/src/main/java/net/caffeinemc/mods/sodium/mixin/features/render/immediate/buffer_builder/sorting/MeshDataMixin.java +++ /dev/null @@ -1,47 +0,0 @@ - -package net.caffeinemc.mods.sodium.mixin.features.render.immediate.buffer_builder.sorting; - -import com.mojang.blaze3d.vertex.MeshData; -import org.jetbrains.annotations.Nullable; -import org.joml.Vector3f; -import org.lwjgl.system.MemoryUtil; -import org.spongepowered.asm.mixin.Mixin; -import org.spongepowered.asm.mixin.Overwrite; -import org.spongepowered.asm.mixin.Shadow; -import org.spongepowered.asm.mixin.Unique; -import com.mojang.blaze3d.vertex.BufferBuilder; -import com.mojang.blaze3d.vertex.VertexFormat; -import com.mojang.blaze3d.vertex.VertexSorting; -import java.nio.ByteBuffer; - -@Mixin(MeshData.class) -public abstract class MeshDataMixin { - /** - * @author JellySquid - * @reason Avoid slow memory accesses - */ - @Overwrite - private static Vector3f[] unpackQuadCentroids(ByteBuffer buffer, int vertices, VertexFormat format) { - int vertexStride = format.getVertexSize(); - int primitiveCount = vertices / 4; - - Vector3f[] centers = new Vector3f[primitiveCount]; - - for (int index = 0; index < primitiveCount; ++index) { - long v1 = MemoryUtil.memAddress(buffer, (((index * 4) + 0) * vertexStride)); - long v2 = MemoryUtil.memAddress(buffer, (((index * 4) + 2) * vertexStride)); - - float x1 = MemoryUtil.memGetFloat(v1 + 0); - float y1 = MemoryUtil.memGetFloat(v1 + 4); - float z1 = MemoryUtil.memGetFloat(v1 + 8); - - float x2 = MemoryUtil.memGetFloat(v2 + 0); - float y2 = MemoryUtil.memGetFloat(v2 + 4); - float z2 = MemoryUtil.memGetFloat(v2 + 8); - - centers[index] = new Vector3f((x1 + x2) * 0.5F, (y1 + y2) * 0.5F, (z1 + z2) * 0.5F); - } - - return centers; - } -} \ No newline at end of file diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/mixin/features/render/immediate/buffer_builder/sorting/MultiBufferSourceMixin.java b/common/src/main/java/net/caffeinemc/mods/sodium/mixin/features/render/immediate/buffer_builder/sorting/MultiBufferSourceMixin.java new file mode 100644 index 0000000000..be8650c005 --- /dev/null +++ b/common/src/main/java/net/caffeinemc/mods/sodium/mixin/features/render/immediate/buffer_builder/sorting/MultiBufferSourceMixin.java @@ -0,0 +1,80 @@ +package net.caffeinemc.mods.sodium.mixin.features.render.immediate.buffer_builder.sorting; + +import com.llamalad7.mixinextras.injector.wrapoperation.Operation; +import com.llamalad7.mixinextras.injector.wrapoperation.WrapOperation; +import com.mojang.blaze3d.vertex.ByteBufferBuilder; +import com.mojang.blaze3d.vertex.MeshData; +import com.mojang.blaze3d.vertex.VertexFormat; +import com.mojang.blaze3d.vertex.VertexSorting; +import net.caffeinemc.mods.sodium.client.util.sorting.VertexSorters; +import net.caffeinemc.mods.sodium.client.util.sorting.VertexSortingExtended; +import net.minecraft.client.renderer.MultiBufferSource; +import org.lwjgl.system.MemoryUtil; +import org.spongepowered.asm.mixin.Mixin; +import org.spongepowered.asm.mixin.Unique; +import org.spongepowered.asm.mixin.injection.At; + +@Mixin(MultiBufferSource.BufferSource.class) +public class MultiBufferSourceMixin { + @Unique + private static final int VERTICES_PER_QUAD = 6; + + @WrapOperation( + method = "endBatch(Lnet/minecraft/client/renderer/RenderType;Lcom/mojang/blaze3d/vertex/BufferBuilder;)V", + at = @At( + value = "INVOKE", + target = "Lcom/mojang/blaze3d/vertex/MeshData;sortQuads(Lcom/mojang/blaze3d/vertex/ByteBufferBuilder;Lcom/mojang/blaze3d/vertex/VertexSorting;)Lcom/mojang/blaze3d/vertex/MeshData$SortState;" + ) + ) + private MeshData.SortState redirectSortQuads(MeshData meshData, ByteBufferBuilder bufferBuilder, VertexSorting sorting, Operation original) { + if (sorting instanceof VertexSortingExtended sortingExtended) { + var sortedPrimitiveIds = VertexSorters.sort(meshData.vertexBuffer(), meshData.drawState().vertexCount(), meshData.drawState().format().getVertexSize(), sortingExtended); + var sortedIndexBuffer = buildSortedIndexBuffer(meshData, bufferBuilder, sortedPrimitiveIds); + ((MeshDataAccessor) meshData).setIndexBuffer(sortedIndexBuffer); + } else { + original.call(meshData, bufferBuilder, sorting); + } + + return null; + } + + @Unique + private static ByteBufferBuilder.Result buildSortedIndexBuffer(MeshData meshData, ByteBufferBuilder bufferBuilder, int[] primitiveIds) { + final var indexType = meshData.drawState().indexType(); + final var ptr = bufferBuilder.reserve((primitiveIds.length * VERTICES_PER_QUAD) * indexType.bytes); + + if (indexType == VertexFormat.IndexType.SHORT) { + writeShortIndexBuffer(ptr, primitiveIds); + } else if (indexType == VertexFormat.IndexType.INT) { + writeIntIndexBuffer(ptr, primitiveIds); + } + + return bufferBuilder.build(); + } + + @Unique + private static void writeIntIndexBuffer(long ptr, int[] primitiveIds) { + for (int primitiveId : primitiveIds) { + MemoryUtil.memPutInt(ptr + 0L, (primitiveId * 4) + 0); + MemoryUtil.memPutInt(ptr + 4L, (primitiveId * 4) + 1); + MemoryUtil.memPutInt(ptr + 8L, (primitiveId * 4) + 2); + MemoryUtil.memPutInt(ptr + 12L, (primitiveId * 4) + 2); + MemoryUtil.memPutInt(ptr + 16L, (primitiveId * 4) + 3); + MemoryUtil.memPutInt(ptr + 20L, (primitiveId * 4) + 0); + ptr += 24L; + } + } + + @Unique + private static void writeShortIndexBuffer(long ptr, int[] primitiveIds) { + for (int primitiveId : primitiveIds) { + MemoryUtil.memPutShort(ptr + 0L, (short) ((primitiveId * 4) + 0)); + MemoryUtil.memPutShort(ptr + 2L, (short) ((primitiveId * 4) + 1)); + MemoryUtil.memPutShort(ptr + 4L, (short) ((primitiveId * 4) + 2)); + MemoryUtil.memPutShort(ptr + 6L, (short) ((primitiveId * 4) + 2)); + MemoryUtil.memPutShort(ptr + 8L, (short) ((primitiveId * 4) + 3)); + MemoryUtil.memPutShort(ptr + 10L, (short) ((primitiveId * 4) + 0)); + ptr += 12L; + } + } +} diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/mixin/features/render/immediate/buffer_builder/sorting/VertexSortingMixin.java b/common/src/main/java/net/caffeinemc/mods/sodium/mixin/features/render/immediate/buffer_builder/sorting/VertexSortingMixin.java index fddfd57503..98b719ab37 100644 --- a/common/src/main/java/net/caffeinemc/mods/sodium/mixin/features/render/immediate/buffer_builder/sorting/VertexSortingMixin.java +++ b/common/src/main/java/net/caffeinemc/mods/sodium/mixin/features/render/immediate/buffer_builder/sorting/VertexSortingMixin.java @@ -1,19 +1,42 @@ package net.caffeinemc.mods.sodium.mixin.features.render.immediate.buffer_builder.sorting; +import com.llamalad7.mixinextras.injector.ModifyExpressionValue; import com.mojang.blaze3d.vertex.VertexSorting; import net.caffeinemc.mods.sodium.client.util.sorting.VertexSorters; -import org.joml.Vector3f; +import org.objectweb.asm.Opcodes; import org.spongepowered.asm.mixin.Mixin; import org.spongepowered.asm.mixin.Overwrite; +import org.spongepowered.asm.mixin.injection.At; @Mixin(VertexSorting.class) public interface VertexSortingMixin { + @SuppressWarnings("DiscouragedShift") // Not currently avoidable. + @ModifyExpressionValue( + method = "", + at = @At( + value = "FIELD", + target = "Lcom/mojang/blaze3d/vertex/VertexSorting;ORTHOGRAPHIC_Z:Lcom/mojang/blaze3d/vertex/VertexSorting;", + opcode = Opcodes.PUTSTATIC, + shift = At.Shift.BEFORE)) + private static VertexSorting modifyVertexSorting(VertexSorting original) { + return VertexSorters.orthographicZ(); + } + /** * @author JellySquid * @reason Optimize vertex sorting */ @Overwrite static VertexSorting byDistance(float x, float y, float z) { - return VertexSorters.sortByDistance(new Vector3f(x, y, z)); + return VertexSorters.distance(x, y, z); + } + + /** + * @author JellySquid + * @reason Optimize vertex sorting + */ + @Overwrite + static VertexSorting byDistance(VertexSorting.DistanceFunction function) { + return VertexSorters.fallback(function); } -} +} \ No newline at end of file diff --git a/common/src/main/resources/sodium-common.mixins.json b/common/src/main/resources/sodium-common.mixins.json index 5261c22829..a899db2231 100644 --- a/common/src/main/resources/sodium-common.mixins.json +++ b/common/src/main/resources/sodium-common.mixins.json @@ -57,7 +57,8 @@ "features.render.gui.outlines.LevelRendererMixin", "features.render.immediate.DirectionMixin", "features.render.immediate.buffer_builder.intrinsics.BufferBuilderMixin", - "features.render.immediate.buffer_builder.sorting.MeshDataMixin", + "features.render.immediate.buffer_builder.sorting.MeshDataAccessor", + "features.render.immediate.buffer_builder.sorting.MultiBufferSourceMixin", "features.render.immediate.buffer_builder.sorting.VertexSortingMixin", "features.render.immediate.matrix_stack.PoseStackMixin", "features.render.immediate.matrix_stack.VertexConsumerMixin",