From 4575c8a4d825d0d23506866e5fe90a5ecd479649 Mon Sep 17 00:00:00 2001 From: Webster Sheets Date: Tue, 7 Nov 2023 20:54:56 -0500 Subject: [PATCH] Improve perlin noise performance - On my machine compiled for SSE2, this provides an approximately 20% performance improvement to noise generation functions (33 -> 26 MCycles) - Replaces simplex offset computation if statements with a 100% identical bitwise computation - Also rearrange the gradient dot functions for better readability (no performance change) --- src/perlin.cpp | 88 ++++++++++++-------------------------------------- 1 file changed, 21 insertions(+), 67 deletions(-) diff --git a/src/perlin.cpp b/src/perlin.cpp index c18557ccc15..dbbe31bae84 100644 --- a/src/perlin.cpp +++ b/src/perlin.cpp @@ -132,57 +132,18 @@ double noise(const vector3d &p) int i1, j1, k1; // Offsets for second corner of simplex in (i,j,k) coords int i2, j2, k2; // Offsets for third corner of simplex in (i,j,k) coords - if (x0 >= y0) { - if (y0 >= z0) { - i1 = 1; - j1 = 0; - k1 = 0; - i2 = 1; - j2 = 1; - k2 = 0; - } // X Y Z order - else if (x0 >= z0) { - i1 = 1; - j1 = 0; - k1 = 0; - i2 = 1; - j2 = 0; - k2 = 1; - } // X Z Y order - else { - i1 = 0; - j1 = 0; - k1 = 1; - i2 = 1; - j2 = 0; - k2 = 1; - } // Z X Y order - } else { // x0= y0; + int y_ge_z = y0 >= z0; + int x_ge_z = x0 >= z0; + + // Compute simplex offsets - all values will be 0 or 1 + i1 = x_ge_y & x_ge_z; + j1 = y_ge_z & (!x_ge_y); + k1 = (!x_ge_z) & (!y_ge_z); + + i2 = x_ge_y | x_ge_z; + j2 = (!x_ge_y) | y_ge_z; + k2 = !(x_ge_z & y_ge_z); // A step of (1,0,0) in (i,j,k) means a step of (1-c,-c,-c) in (x,y,z), // a step of (0,1,0) in (i,j,k) means a step of (-c,1-c,-c) in (x,y,z), and @@ -208,37 +169,30 @@ double noise(const vector3d &p) const int gi3 = mod12[perm[ii + 1 + perm[jj + 1 + perm[kk + 1]]]]; // Noise contributions from the four corners - double n0, n1, n2, n3; + double n0 = 0.0, n1 = 0.0, n2 = 0.0, n3 = 0.0; // Calculate the contribution from the four corners double t0 = 0.6 - x0 * x0 - y0 * y0 - z0 * z0; - if (t0 < 0) - n0 = 0.0; - else { + double t1 = 0.6 - x1 * x1 - y1 * y1 - z1 * z1; + double t2 = 0.6 - x2 * x2 - y2 * y2 - z2 * z2; + double t3 = 0.6 - x3 * x3 - y3 * y3 - z3 * z3; + + if (t0 > 0.0) { t0 *= t0; n0 = t0 * t0 * dot(grad3[gi0], x0, y0, z0); } - double t1 = 0.6 - x1 * x1 - y1 * y1 - z1 * z1; - if (t1 < 0) - n1 = 0.0; - else { + if (t1 > 0.0) { t1 *= t1; n1 = t1 * t1 * dot(grad3[gi1], x1, y1, z1); } - double t2 = 0.6 - x2 * x2 - y2 * y2 - z2 * z2; - if (t2 < 0) - n2 = 0.0; - else { + if (t2 > 0.0) { t2 *= t2; n2 = t2 * t2 * dot(grad3[gi2], x2, y2, z2); } - double t3 = 0.6 - x3 * x3 - y3 * y3 - z3 * z3; - if (t3 < 0) - n3 = 0.0; - else { + if (t3 > 0.0) { t3 *= t3; n3 = t3 * t3 * dot(grad3[gi3], x3, y3, z3); }