Skip to content

Commit

Permalink
Aggressively optimize entity rendering
Browse files Browse the repository at this point in the history
This is anywhere from 10 to 15% faster depending
on what entities are being rendered.

Most of the improvements come from more efficiently
laying out the cuboid data and coalescing neighboring
32-bit values into 64-bit words.

Furthermore, vertex positions are calculated by
extracting vectors from the pose matrix and adding
them to the origin vertex, which avoids many
matrix multiplications.

Co-authored-by: MoePus <[email protected]>
  • Loading branch information
jellysquid3 and MoePus committed Jan 2, 2025
1 parent 07ce020 commit 7f25220
Show file tree
Hide file tree
Showing 6 changed files with 250 additions and 167 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

public class ColorAttribute {
public static void set(long ptr, int color) {
MemoryUtil.memPutInt(ptr + 0, color);
MemoryUtil.memPutInt(ptr, color);
}

public static int get(long ptr) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

public class NormalAttribute {
public static void set(long ptr, int normal) {
MemoryUtil.memPutInt(ptr + 0, normal);
MemoryUtil.memPutInt(ptr, normal);
}

public static int get(long ptr) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,20 @@

public class PositionAttribute {
public static void put(long ptr, float x, float y, float z) {
MemoryUtil.memPutFloat(ptr + 0, x);
MemoryUtil.memPutFloat(ptr + 4, y);
MemoryUtil.memPutFloat(ptr + 8, z);
MemoryUtil.memPutFloat(ptr + 0L, x);
MemoryUtil.memPutFloat(ptr + 4L, y);
MemoryUtil.memPutFloat(ptr + 8L, z);
}

public static float getX(long ptr) {
return MemoryUtil.memGetFloat(ptr + 0);
return MemoryUtil.memGetFloat(ptr + 0L);
}

public static float getY(long ptr) {
return MemoryUtil.memGetFloat(ptr + 4);
return MemoryUtil.memGetFloat(ptr + 4L);
}

public static float getZ(long ptr) {
return MemoryUtil.memGetFloat(ptr + 8);
return MemoryUtil.memGetFloat(ptr + 8L);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,172 +4,152 @@
import net.caffeinemc.mods.sodium.api.math.MatrixHelper;
import net.caffeinemc.mods.sodium.api.vertex.buffer.VertexBufferWriter;
import net.caffeinemc.mods.sodium.api.vertex.format.common.EntityVertex;
import net.caffeinemc.mods.sodium.client.util.Int2;
import net.minecraft.core.Direction;
import org.joml.Matrix3f;
import org.joml.Matrix4f;
import org.joml.Vector2f;
import org.joml.Vector3f;
import org.lwjgl.system.MemoryStack;
import org.lwjgl.system.MemoryUtil;

import static net.caffeinemc.mods.sodium.client.render.immediate.model.ModelCuboid.*;

public class EntityRenderer {
private static final int NUM_CUBE_VERTICES = 8;
private static final int NUM_CUBE_FACES = 6;
private static final int NUM_FACE_VERTICES = 4;

private static final int
VERTEX_X1_Y1_Z1 = 0,
VERTEX_X2_Y1_Z1 = 1,
VERTEX_X2_Y2_Z1 = 2,
VERTEX_X1_Y2_Z1 = 3,
VERTEX_X1_Y1_Z2 = 4,
VERTEX_X2_Y1_Z2 = 5,
VERTEX_X2_Y2_Z2 = 6,
VERTEX_X1_Y2_Z2 = 7;

private static final Matrix3f lastMatrix = new Matrix3f();
private static final Matrix3f prevNormalMatrix = new Matrix3f();

private static final int VERTEX_BUFFER_BYTES = NUM_CUBE_FACES * NUM_FACE_VERTICES * EntityVertex.STRIDE;

private static final Vector3f[] CUBE_CORNERS = new Vector3f[NUM_CUBE_VERTICES];
private static final int[][] CUBE_VERTICES = new int[NUM_CUBE_FACES][];
private static final long[] CUBE_VERTEX_XY = new long[NUM_CUBE_VERTICES]; // (pos.x, pos.y)
private static final long[] CUBE_VERTEX_ZW = new long[NUM_CUBE_VERTICES]; // (pos.z, color)

private static final Vector3f[][] VERTEX_POSITIONS = new Vector3f[NUM_CUBE_FACES][NUM_FACE_VERTICES];
private static final Vector3f[][] VERTEX_POSITIONS_MIRRORED = new Vector3f[NUM_CUBE_FACES][NUM_FACE_VERTICES];

private static final Vector2f[][] VERTEX_TEXTURES = new Vector2f[NUM_CUBE_FACES][NUM_FACE_VERTICES];
private static final Vector2f[][] VERTEX_TEXTURES_MIRRORED = new Vector2f[NUM_CUBE_FACES][NUM_FACE_VERTICES];

private static final int[] CUBE_NORMALS = new int[NUM_CUBE_FACES];
private static final int[] CUBE_NORMALS_MIRRORED = new int[NUM_CUBE_FACES];

static {
CUBE_VERTICES[FACE_NEG_Y] = new int[] { VERTEX_X2_Y1_Z2, VERTEX_X1_Y1_Z2, VERTEX_X1_Y1_Z1, VERTEX_X2_Y1_Z1 };
CUBE_VERTICES[FACE_POS_Y] = new int[] { VERTEX_X2_Y2_Z1, VERTEX_X1_Y2_Z1, VERTEX_X1_Y2_Z2, VERTEX_X2_Y2_Z2 };
CUBE_VERTICES[FACE_NEG_Z] = new int[] { VERTEX_X2_Y1_Z1, VERTEX_X1_Y1_Z1, VERTEX_X1_Y2_Z1, VERTEX_X2_Y2_Z1 };
CUBE_VERTICES[FACE_POS_Z] = new int[] { VERTEX_X1_Y1_Z2, VERTEX_X2_Y1_Z2, VERTEX_X2_Y2_Z2, VERTEX_X1_Y2_Z2 };
CUBE_VERTICES[FACE_NEG_X] = new int[] { VERTEX_X2_Y1_Z2, VERTEX_X2_Y1_Z1, VERTEX_X2_Y2_Z1, VERTEX_X2_Y2_Z2 };
CUBE_VERTICES[FACE_POS_X] = new int[] { VERTEX_X1_Y1_Z1, VERTEX_X1_Y1_Z2, VERTEX_X1_Y2_Z2, VERTEX_X1_Y2_Z1 };

for (int cornerIndex = 0; cornerIndex < NUM_CUBE_VERTICES; cornerIndex++) {
CUBE_CORNERS[cornerIndex] = new Vector3f();
}

for (int quadIndex = 0; quadIndex < NUM_CUBE_FACES; quadIndex++) {
for (int vertexIndex = 0; vertexIndex < NUM_FACE_VERTICES; vertexIndex++) {
VERTEX_TEXTURES[quadIndex][vertexIndex] = new Vector2f();
VERTEX_POSITIONS[quadIndex][vertexIndex] = CUBE_CORNERS[CUBE_VERTICES[quadIndex][vertexIndex]];
}
}

for (int quadIndex = 0; quadIndex < NUM_CUBE_FACES; quadIndex++) {
for (int vertexIndex = 0; vertexIndex < NUM_FACE_VERTICES; vertexIndex++) {
VERTEX_TEXTURES_MIRRORED[quadIndex][vertexIndex] = VERTEX_TEXTURES[quadIndex][3 - vertexIndex];
VERTEX_POSITIONS_MIRRORED[quadIndex][vertexIndex] = VERTEX_POSITIONS[quadIndex][3 - vertexIndex];
}
}
}
private static final int[] CUBE_FACE_NORMAL = new int[NUM_CUBE_FACES];

public static void renderCuboid(PoseStack.Pose matrices, VertexBufferWriter writer, ModelCuboid cuboid, int light, int overlay, int color) {
prepareVertices(matrices, cuboid, color);
prepareNormalsIfChanged(matrices);
prepareVertices(matrices, cuboid);

try (MemoryStack stack = MemoryStack.stackPush()) {
final var vertexBuffer = stack.nmalloc(16, VERTEX_BUFFER_BYTES);
final var vertexCount = emitQuads(vertexBuffer, cuboid, color, overlay, light);
final var vertexBuffer = stack.nmalloc(64, VERTEX_BUFFER_BYTES);
final var vertexCount = emitQuads(vertexBuffer, cuboid, overlay, light);

if (vertexCount > 0) {
writer.push(stack, vertexBuffer, vertexCount, EntityVertex.FORMAT);
}
}
}

private static int emitQuads(final long buffer, ModelCuboid cuboid, int color, int overlay, int light) {
final var positions = cuboid.mirror ? VERTEX_POSITIONS_MIRRORED : VERTEX_POSITIONS;
final var textures = cuboid.mirror ? VERTEX_TEXTURES_MIRRORED : VERTEX_TEXTURES;
final var normals = cuboid.mirror ? CUBE_NORMALS_MIRRORED : CUBE_NORMALS;

var vertexCount = 0;
private static int emitQuads(final long buffer, ModelCuboid cuboid, int overlay, int light) {
// Pack the Overlay and Light coordinates into a 64-bit integer as they are next to each other
// in the vertex format. This eliminates another 32-bit memory write in the hot path.
final long packedOverlayLight = Int2.pack(overlay, light);

long ptr = buffer;

for (int quadIndex = 0; quadIndex < NUM_CUBE_FACES; quadIndex++) {
if (!cuboid.shouldDrawFace(quadIndex)) {
continue;
}

emitVertex(ptr, positions[quadIndex][0], color, textures[quadIndex][0], overlay, light, normals[quadIndex]);
ptr += EntityVertex.STRIDE;

emitVertex(ptr, positions[quadIndex][1], color, textures[quadIndex][1], overlay, light, normals[quadIndex]);
ptr += EntityVertex.STRIDE;
final int[] normals = cuboid.normals;
final int[] positions = cuboid.positions;
final long[] textures = cuboid.textures;

emitVertex(ptr, positions[quadIndex][2], color, textures[quadIndex][2], overlay, light, normals[quadIndex]);
ptr += EntityVertex.STRIDE;
int vertexCount = 0;

emitVertex(ptr, positions[quadIndex][3], color, textures[quadIndex][3], overlay, light, normals[quadIndex]);
ptr += EntityVertex.STRIDE;
for (int faceIndex = 0; faceIndex < NUM_CUBE_FACES; faceIndex++) {
if (!cuboid.shouldDrawFace(faceIndex)) {
continue;
}

final int elementOffset = faceIndex * NUM_FACE_VERTICES;
final int packedNormal = CUBE_FACE_NORMAL[normals[faceIndex]];
ptr = writeVertex(ptr, positions[elementOffset + 0], textures[elementOffset + 0], packedOverlayLight, packedNormal);
ptr = writeVertex(ptr, positions[elementOffset + 1], textures[elementOffset + 1], packedOverlayLight, packedNormal);
ptr = writeVertex(ptr, positions[elementOffset + 2], textures[elementOffset + 2], packedOverlayLight, packedNormal);
ptr = writeVertex(ptr, positions[elementOffset + 3], textures[elementOffset + 3], packedOverlayLight, packedNormal);

vertexCount += 4;
}

return vertexCount;
}

private static void emitVertex(long ptr, Vector3f pos, int color, Vector2f tex, int overlay, int light, int normal) {
EntityVertex.write(ptr, pos.x, pos.y, pos.z, color, tex.x, tex.y, overlay, light, normal);
}
private static long writeVertex(long ptr, int vertexIndex, long packedUv, long packedOverlayLight, int packedNormal) {
MemoryUtil.memPutLong(ptr + 0L, CUBE_VERTEX_XY[vertexIndex]);
MemoryUtil.memPutLong(ptr + 8L, CUBE_VERTEX_ZW[vertexIndex]); // overlaps with color attribute
MemoryUtil.memPutLong(ptr + 16L, packedUv);
MemoryUtil.memPutLong(ptr + 24L, packedOverlayLight);
MemoryUtil.memPutInt(ptr + 32L, packedNormal);

private static void prepareVertices(PoseStack.Pose matrices, ModelCuboid cuboid) {
buildVertexPosition(CUBE_CORNERS[VERTEX_X1_Y1_Z1], cuboid.x1, cuboid.y1, cuboid.z1, matrices.pose());
buildVertexPosition(CUBE_CORNERS[VERTEX_X2_Y1_Z1], cuboid.x2, cuboid.y1, cuboid.z1, matrices.pose());
buildVertexPosition(CUBE_CORNERS[VERTEX_X2_Y2_Z1], cuboid.x2, cuboid.y2, cuboid.z1, matrices.pose());
buildVertexPosition(CUBE_CORNERS[VERTEX_X1_Y2_Z1], cuboid.x1, cuboid.y2, cuboid.z1, matrices.pose());
buildVertexPosition(CUBE_CORNERS[VERTEX_X1_Y1_Z2], cuboid.x1, cuboid.y1, cuboid.z2, matrices.pose());
buildVertexPosition(CUBE_CORNERS[VERTEX_X2_Y1_Z2], cuboid.x2, cuboid.y1, cuboid.z2, matrices.pose());
buildVertexPosition(CUBE_CORNERS[VERTEX_X2_Y2_Z2], cuboid.x2, cuboid.y2, cuboid.z2, matrices.pose());
buildVertexPosition(CUBE_CORNERS[VERTEX_X1_Y2_Z2], cuboid.x1, cuboid.y2, cuboid.z2, matrices.pose());

buildVertexTexCoord(VERTEX_TEXTURES[FACE_NEG_Y], cuboid.u1, cuboid.v0, cuboid.u2, cuboid.v1);
buildVertexTexCoord(VERTEX_TEXTURES[FACE_POS_Y], cuboid.u2, cuboid.v1, cuboid.u3, cuboid.v0);
buildVertexTexCoord(VERTEX_TEXTURES[FACE_NEG_Z], cuboid.u1, cuboid.v1, cuboid.u2, cuboid.v2);
buildVertexTexCoord(VERTEX_TEXTURES[FACE_POS_Z], cuboid.u4, cuboid.v1, cuboid.u5, cuboid.v2);
buildVertexTexCoord(VERTEX_TEXTURES[FACE_NEG_X], cuboid.u2, cuboid.v1, cuboid.u4, cuboid.v2);
buildVertexTexCoord(VERTEX_TEXTURES[FACE_POS_X], cuboid.u0, cuboid.v1, cuboid.u1, cuboid.v2);
return ptr + EntityVertex.STRIDE;
}

public static void prepareNormalsIfChanged(PoseStack.Pose matrices) {
if (!matrices.normal().equals(lastMatrix)) {
lastMatrix.set(matrices.normal());

CUBE_NORMALS[FACE_NEG_Y] = MatrixHelper.transformNormal(matrices.normal(), matrices.trustedNormals, Direction.DOWN);
CUBE_NORMALS[FACE_POS_Y] = MatrixHelper.transformNormal(matrices.normal(), matrices.trustedNormals, Direction.UP);
CUBE_NORMALS[FACE_NEG_Z] = MatrixHelper.transformNormal(matrices.normal(), matrices.trustedNormals, Direction.NORTH);
CUBE_NORMALS[FACE_POS_Z] = MatrixHelper.transformNormal(matrices.normal(), matrices.trustedNormals, Direction.SOUTH);
CUBE_NORMALS[FACE_POS_X] = MatrixHelper.transformNormal(matrices.normal(), matrices.trustedNormals, Direction.WEST);
CUBE_NORMALS[FACE_NEG_X] = MatrixHelper.transformNormal(matrices.normal(), matrices.trustedNormals, Direction.EAST);

// When mirroring is used, the normals for EAST and WEST are swapped.
CUBE_NORMALS_MIRRORED[FACE_NEG_Y] = CUBE_NORMALS[FACE_NEG_Y];
CUBE_NORMALS_MIRRORED[FACE_POS_Y] = CUBE_NORMALS[FACE_POS_Y];
CUBE_NORMALS_MIRRORED[FACE_NEG_Z] = CUBE_NORMALS[FACE_NEG_Z];
CUBE_NORMALS_MIRRORED[FACE_POS_Z] = CUBE_NORMALS[FACE_POS_Z];
CUBE_NORMALS_MIRRORED[FACE_POS_X] = CUBE_NORMALS[FACE_NEG_X]; // mirrored
CUBE_NORMALS_MIRRORED[FACE_NEG_X] = CUBE_NORMALS[FACE_POS_X]; // mirrored
}
private static void prepareVertices(PoseStack.Pose matrices, ModelCuboid cuboid, int color) {
var pose = matrices.pose();

float vxx = (pose.m00() * cuboid.sizeX), vxy = (pose.m01() * cuboid.sizeX), vxz = (pose.m02() * cuboid.sizeX);
float vyx = (pose.m10() * cuboid.sizeY), vyy = (pose.m11() * cuboid.sizeY), vyz = (pose.m12() * cuboid.sizeY);
float vzx = (pose.m20() * cuboid.sizeZ), vzy = (pose.m21() * cuboid.sizeZ), vzz = (pose.m22() * cuboid.sizeZ);

// Compute the transformed origin point of the cuboid
float c000x = MatrixHelper.transformPositionX(pose, cuboid.originX, cuboid.originY, cuboid.originZ);
float c000y = MatrixHelper.transformPositionY(pose, cuboid.originX, cuboid.originY, cuboid.originZ);
float c000z = MatrixHelper.transformPositionZ(pose, cuboid.originX, cuboid.originY, cuboid.originZ);
setVertex(VERTEX_X0_Y0_Z0, c000x, c000y, c000z, color);

// Add the pre-multiplied vectors to find the other 7 vertices
// This avoids needing to multiply each vertex position against the pose matrix, which eliminates many
// floating-point operations (going from 21 flops/vert to 3 flops/vert).
// Originally suggested by MoePus on GitHub in this pull request:
// https://github.com/CaffeineMC/sodium/pull/2960
float c100x = c000x + vxx;
float c100y = c000y + vxy;
float c100z = c000z + vxz;
setVertex(VERTEX_X1_Y0_Z0, c100x, c100y, c100z, color);

float c110x = c100x + vyx;
float c110y = c100y + vyy;
float c110z = c100z + vyz;
setVertex(VERTEX_X1_Y1_Z0, c110x, c110y, c110z, color);

float c010x = c000x + vyx;
float c010y = c000y + vyy;
float c010z = c000z + vyz;
setVertex(VERTEX_X0_Y1_Z0, c010x, c010y, c010z, color);

float c001x = c000x + vzx;
float c001y = c000y + vzy;
float c001z = c000z + vzz;
setVertex(VERTEX_X0_Y0_Z1, c001x, c001y, c001z, color);

float c101x = c100x + vzx;
float c101y = c100y + vzy;
float c101z = c100z + vzz;
setVertex(VERTEX_X1_Y0_Z1, c101x, c101y, c101z, color);

float c111x = c110x + vzx;
float c111y = c110y + vzy;
float c111z = c110z + vzz;
setVertex(VERTEX_X1_Y1_Z1, c111x, c111y, c111z, color);

float c011x = c010x + vzx;
float c011y = c010y + vzy;
float c011z = c010z + vzz;
setVertex(VERTEX_X0_Y1_Z1, c011x, c011y, c011z, color);
}

private static void buildVertexPosition(Vector3f vector, float x, float y, float z, Matrix4f matrix) {
vector.x = MatrixHelper.transformPositionX(matrix, x, y, z);
vector.y = MatrixHelper.transformPositionY(matrix, x, y, z);
vector.z = MatrixHelper.transformPositionZ(matrix, x, y, z);
private static void setVertex(int vertexIndex, float x, float y, float z, int color) {
// Since we have a spare element, pack the color into it. This makes the code a little obtuse,
// but it avoids another 32-bit memory write in the hot path, which helps a lot.
CUBE_VERTEX_XY[vertexIndex] = Int2.pack(Float.floatToRawIntBits(x), Float.floatToRawIntBits(y));
CUBE_VERTEX_ZW[vertexIndex] = Int2.pack(Float.floatToRawIntBits(z), color);
}

private static void buildVertexTexCoord(Vector2f[] uvs, float u1, float v1, float u2, float v2) {
uvs[0].set(u2, v1);
uvs[1].set(u1, v1);
uvs[2].set(u1, v2);
uvs[3].set(u2, v2);
private static void prepareNormalsIfChanged(PoseStack.Pose matrices) {
if (matrices.normal().equals(prevNormalMatrix)) {
return;
}

CUBE_FACE_NORMAL[FACE_NEG_Y] = MatrixHelper.transformNormal(matrices.normal(), matrices.trustedNormals, Direction.DOWN);
CUBE_FACE_NORMAL[FACE_POS_Y] = MatrixHelper.transformNormal(matrices.normal(), matrices.trustedNormals, Direction.UP);
CUBE_FACE_NORMAL[FACE_NEG_Z] = MatrixHelper.transformNormal(matrices.normal(), matrices.trustedNormals, Direction.NORTH);
CUBE_FACE_NORMAL[FACE_POS_Z] = MatrixHelper.transformNormal(matrices.normal(), matrices.trustedNormals, Direction.SOUTH);
CUBE_FACE_NORMAL[FACE_POS_X] = MatrixHelper.transformNormal(matrices.normal(), matrices.trustedNormals, Direction.WEST);
CUBE_FACE_NORMAL[FACE_NEG_X] = MatrixHelper.transformNormal(matrices.normal(), matrices.trustedNormals, Direction.EAST);

prevNormalMatrix.set(matrices.normal());
}
}
Loading

0 comments on commit 7f25220

Please sign in to comment.