Merge branch 'master' into for-0.55.0/sync

DaemonEngine · May 27, 2024 · 610f950 · 610f950
2 parents 78fc730 + e8779b8
commit 610f950
Show file tree

Hide file tree

Showing 21 changed files with 125 additions and 71 deletions.
diff --git a/cmake/DaemonFlags.cmake b/cmake/DaemonFlags.cmake
@@ -116,6 +116,8 @@ macro(try_linker_flag PROP FLAG)
     endif()
 endmacro()
 
+option(USE_CPU_RECOMMENDED_FEATURES "Use some common hardware features like SSE2, NEON, VFP, MCX16, etc." ON)
+
 if(MINGW AND USE_BREAKPAD)
     set_linker_flag("-Wl,--build-id")
 endif()
@@ -140,7 +142,11 @@ if (MSVC)
     set_c_cxx_flag("/W4")
 
     if (ARCH STREQUAL "i686")
-        set_c_cxx_flag("/arch:SSE2")
+        if (USE_CPU_RECOMMENDED_FEATURES)
+            set_c_cxx_flag("/arch:SSE2") # This is the default
+        else()
+            set_c_cxx_flag("/arch:IA32") # minimum
+        endif()
     endif()
 
     if (USE_LTO)
@@ -234,7 +240,6 @@ else()
         endif()
     endif()
 
-    option(USE_CPU_RECOMMENDED_FEATURES "Enforce usage of hardware features like SSE, NEON, VFP, MCX16, etc." ON)
     if (USE_CPU_RECOMMENDED_FEATURES)
         if (ARCH STREQUAL "amd64")
             # CMPXCHG16B minimum (x86-64-v2): AMD64 revision F.

diff --git a/src/common/Compiler.h b/src/common/Compiler.h
@@ -70,9 +70,6 @@ int CountTrailingZeroes(unsigned long long x);
 // Marks this function as memory allocator
 #define ALLOCATOR
 
-// Align the address of a variable to a certain value
-#define ALIGNED(a, x) x __attribute__((__aligned__(a)))
-
 // Shared library function import/export
 #ifdef _WIN32
 #define DLLEXPORT __attribute__((__dllexport__))
@@ -144,6 +141,8 @@ inline int CountTrailingZeroes(unsigned long long x) { return __builtin_ctzll(x)
 #pragma warning(disable : 4244) // 'XXX': conversion from 'YYY' to 'ZZZ', possible loss of data
 #pragma warning(disable : 4267) // 'initializing' : conversion from 'size_t' to 'int', possible loss of data
 
+#pragma warning(disable : 4324) // 'refBone_t': structure was padded due to alignment specifier
+
 #pragma warning(disable : 4458) // declaration of 'XXX' hides class member
 #pragma warning(disable : 4459) // declaration of 'XXX' hides global declaration
 
@@ -167,7 +166,6 @@ inline int CountTrailingZeroes(unsigned long long x) { return __builtin_ctzll(x)
 #define ALLOCATOR
 #endif
 #define MALLOC_LIKE ALLOCATOR __declspec(restrict)
-#define ALIGNED(a,x) __declspec(align(a)) x
 #define DLLEXPORT __declspec(dllexport)
 #define DLLIMPORT __declspec(dllimport)
 #define BREAKPOINT() __debugbreak()
@@ -205,7 +203,6 @@ inline int CountTrailingZeroes(unsigned long long x)
 #define PRINTF_TRANSLATE_ARG(a)
 #define MALLOC_LIKE
 #define ALLOCATOR
-#define ALIGNED(a,x) x
 #define DLLEXPORT
 #define DLLIMPORT
 #define BREAKPOINT()

diff --git a/src/common/cm/unittest.cpp b/src/common/cm/unittest.cpp
@@ -29,12 +29,16 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
 #include <gtest/gtest.h>
+#include <gmock/gmock.h>
 
 #include "cm_public.h"
 #include "common/FileSystem.h"
 
 namespace {
 
+using ::testing::FloatNear;
+using ::testing::Pointwise;
+
 constexpr int contentmask = ~0;
 constexpr int skipmask = 0;
 
@@ -139,6 +143,11 @@ TEST_F(TraceTest, AllInPatch)
     EXPECT_EQ(1.0f, tr.fraction);
 }
 
+// The patch planes (produced from a cGrid_t) come out fairly differently if 80-bit x87 math is used
+constexpr float PATCH_PLANE_NORMAL_ATOL = 5.0e-6;
+constexpr float PATCH_PLANE_DIST_ATOL = 8.0e-3;
+constexpr float PATCH_TRACE_FRACTION_ATOL = 3.0e-6;
+
 TEST_F(TraceTest, PointHitPatch)
 {
     trace_t tr;
@@ -149,8 +158,11 @@ TEST_F(TraceTest, PointHitPatch)
     EXPECT_EQ(CM_CheckTraceConsistency(start, end, contentmask, skipmask, tr), "");
 
     EXPECT_FALSE(tr.startsolid);
-    EXPECT_FLOAT_EQ(tr.fraction, 0.4261826);
+    EXPECT_NEAR(tr.fraction, 0.426183, PATCH_TRACE_FRACTION_ATOL);
     EXPECT_EQ(tr.contents, CONTENTS_SOLID);
+    const vec3_t expectedPlaneNormal = {0, .2425355, -0.970142};
+    EXPECT_THAT(tr.plane.normal, Pointwise(FloatNear(PATCH_PLANE_NORMAL_ATOL), expectedPlaneNormal));
+    EXPECT_NEAR(tr.plane.dist, 325.9677, PATCH_PLANE_DIST_ATOL);
 }
 
 TEST_F(TraceTest, BoxHitPatch)
@@ -165,8 +177,11 @@ TEST_F(TraceTest, BoxHitPatch)
     EXPECT_EQ(CM_CheckTraceConsistency(start, end, contentmask, skipmask, tr), "");
 
     EXPECT_FALSE(tr.startsolid);
-    EXPECT_FLOAT_EQ(tr.fraction, 0.1921389);
+    EXPECT_NEAR(tr.fraction, 0.192139, PATCH_TRACE_FRACTION_ATOL);
     EXPECT_EQ(tr.contents, CONTENTS_SOLID);
+    const vec3_t expectedPlaneNormal = {0, .2425355, -0.970142};
+    EXPECT_THAT(tr.plane.normal, Pointwise(FloatNear(PATCH_PLANE_NORMAL_ATOL), expectedPlaneNormal));
+    EXPECT_NEAR(tr.plane.dist, 362.105, PATCH_PLANE_DIST_ATOL);
 }
 
 } // namespace
diff --git a/src/engine/framework/System.cpp b/src/engine/framework/System.cpp
@@ -584,10 +584,14 @@ static void Init(int argc, char** argv)
 	cmdlineArgs_t cmdlineArgs;
 
 #ifdef _WIN32
-	// If we were launched from a console, make our output visible on it
-	if (AttachConsole(ATTACH_PARENT_PROCESS)) {
-		(void)freopen("CONOUT$", "w", stdout);
-		(void)freopen("CONOUT$", "w", stderr);
+	// Detect MSYS2 terminal. The AttachConsole code makes output not appear
+	const char* msystem = getenv("MSYSTEM");
+	if (!msystem || !Str::IsPrefix("MINGW", msystem)) {
+		// If we were launched from a console, make our output visible on it
+		if (AttachConsole(ATTACH_PARENT_PROCESS)) {
+			(void)freopen("CONOUT$", "w", stdout);
+			(void)freopen("CONOUT$", "w", stderr);
+		}
 	}
 #endif
 

diff --git a/src/engine/qcommon/common.cpp b/src/engine/qcommon/common.cpp
@@ -878,25 +878,39 @@ void Com_Frame()
 	}
 
 	Com_EventLoop();
+
+	// It must be called at least once.
+	IN_Frame();
+
 	com_frameTime = Sys::Milliseconds();
 
-	if ( lastTime > com_frameTime )
-	{
-		lastTime = com_frameTime; // possible on first frame
-	}
+	// lastTime can be greater than com_frameTime on first frame.
+	lastTime = std::min( lastTime, com_frameTime );
 
 	msec = com_frameTime - lastTime;
 
-	IN_Frame(); // must be called at least once
+	// For framerates up to 250fps, sleep until 1ms is remaining
+	// use extra margin of 2ms when looking for an higher framerate.
+	int margin = minMsec > 3 ? 1 : 2;
 
 	while ( msec < minMsec )
 	{
-		//give cycles back to the OS
-		Sys::SleepFor(std::chrono::milliseconds(std::min(minMsec - msec, 50)));
-		IN_Frame();
+		// Never sleep more than 50ms.
+		// Never sleep when there is only “margin” left or less remaining.
+		int sleep = std::min( std::max( minMsec - msec - margin, 0 ), 50 );
+
+		if ( sleep )
+		{
+			// Give cycles back to the OS.
+			Sys::SleepFor( std::chrono::milliseconds( sleep ) );
+		}
 
 		Com_EventLoop();
+
+		IN_Frame();
+
 		com_frameTime = Sys::Milliseconds();
+
 		msec = com_frameTime - lastTime;
 	}
 

diff --git a/src/engine/qcommon/q_math.cpp b/src/engine/qcommon/q_math.cpp
@@ -751,8 +751,8 @@ int BoxOnPlaneSide( const vec3_t emins, const vec3_t emaxs, const cplane_t *p )
 	auto pmax = _mm_max_ps( prod0, prod1 );
 	auto pmin = _mm_min_ps( prod0, prod1 );
 
-	ALIGNED( 16, vec4_t pmaxv );
-	ALIGNED( 16, vec4_t pminv );
+	alignas(16) vec4_t pmaxv;
+	alignas(16) vec4_t pminv;
 	_mm_store_ps( pmaxv, pmax );
 	_mm_store_ps( pminv, pmin );
 

diff --git a/src/engine/qcommon/q_shared.h b/src/engine/qcommon/q_shared.h
@@ -256,7 +256,7 @@ void  Com_Free_Aligned( void *ptr );
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wpedantic"
 #endif
-	ALIGNED(16, union transform_t {
+	union alignas(16) transform_t {
 		struct {
 			quat_t rot;
 			vec3_t trans;
@@ -266,16 +266,16 @@ void  Com_Free_Aligned( void *ptr );
 			__m128 sseRot;
 			__m128 sseTransScale;
 		};
-	});
+	};
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif
 #else
-	ALIGNED(16, struct transform_t {
+	struct alignas(16) transform_t {
 		quat_t rot;
 		vec3_t trans;
 		vec_t  scale;
-	});
+	};
 #endif
 
 	using fixed4_t = int;
@@ -378,14 +378,6 @@ extern const quat_t   quatIdentity;
 #if defined( idx86_sse )
 		// SSE rsqrt relative error bound: 3.7 * 10^-4
 		_mm_store_ss( &y, _mm_rsqrt_ss( _mm_load_ss( &number ) ) );
-#elif idppc
-
-		// error bound: ???
-#ifdef __GNUC__
-		asm( "frsqrte %0, %1" : "=f"( y ) : "f"( number ) );
-#else
-		y = __frsqrte( number );
-#endif
 #else
 		y = Q_uintBitsToFloat( 0x5f3759df - (Q_floatBitsToUint( number ) >> 1) );
 		y *= ( 1.5f - ( x * y * y ) ); // initial iteration
@@ -1041,29 +1033,29 @@ inline float DotProduct( const vec3_t x, const vec3_t y )
 		return *(__m128 *)vec;
 	}
 	inline __m128 mask_0000() {
-		static const ALIGNED(16, int vec[4]) = {  0,  0,  0,  0 };
-		return sseLoadInts( vec );
+		alignas(16) static const std::array<int, 4> vec = { 0, 0, 0, 0 };
+		return sseLoadInts( vec.data() );
 	}
 	inline __m128 mask_000W() {
-		static const ALIGNED(16, int vec[4]) = {  0,  0,  0, -1 };
-		return sseLoadInts( vec );
+		alignas(16) static const std::array<int, 4> vec = { 0, 0, 0, -1 };
+		return sseLoadInts( vec.data() );
 	}
 	inline __m128 mask_XYZ0() {
-		static const ALIGNED(16, int vec[4]) = { -1, -1, -1,  0 };
-		return sseLoadInts( vec );
+		alignas(16) static const std::array<int, 4> vec = { -1, -1, -1, 0 };
+		return sseLoadInts( vec.data() );
 	}
 
 	inline __m128 sign_000W() {
-		static const ALIGNED(16, int vec[4]) = { 0, 0, 0, 1<<31 };
-		return sseLoadInts( vec );
+		alignas(16) static const std::array<int, 4> vec = { 0, 0, 0, 1<<31 };
+		return sseLoadInts( vec.data() );
 	}
 	inline __m128 sign_XYZ0() {
-		static const ALIGNED(16, int vec[4]) = { 1<<31, 1<<31, 1<<31,  0 };
-		return sseLoadInts( vec );
+		alignas(16) static const std::array<int, 4> vec = { 1<<31, 1<<31, 1<<31, 0 };
+		return sseLoadInts( vec.data() );
 	}
 	inline __m128 sign_XYZW() {
-		static const ALIGNED(16, int vec[4]) = { 1<<31, 1<<31, 1<<31, 1<<31 };
-		return sseLoadInts( vec );
+		alignas(16) static const std::array<int, 4> vec = { 1<<31, 1<<31, 1<<31, 1<<31 };
+		return sseLoadInts( vec.data() );
 	}
 
 	inline __m128 sseDot4( __m128 a, __m128 b ) {

diff --git a/src/engine/renderer/gl_shader.cpp b/src/engine/renderer/gl_shader.cpp
@@ -1710,6 +1710,7 @@ GLShader_forwardLighting_omniXYZ::GLShader_forwardLighting_omniXYZ( GLShaderMana
 	u_ViewOrigin( this ),
 	u_LightOrigin( this ),
 	u_LightColor( this ),
+	u_InverseLightFactor( this ),
 	u_LightRadius( this ),
 	u_LightScale( this ),
 	u_LightAttenuationMatrix( this ),
@@ -1768,6 +1769,7 @@ GLShader_forwardLighting_projXYZ::GLShader_forwardLighting_projXYZ( GLShaderMana
 	u_ViewOrigin( this ),
 	u_LightOrigin( this ),
 	u_LightColor( this ),
+	u_InverseLightFactor( this ),
 	u_LightRadius( this ),
 	u_LightScale( this ),
 	u_LightAttenuationMatrix( this ),
@@ -1828,6 +1830,7 @@ GLShader_forwardLighting_directionalSun::GLShader_forwardLighting_directionalSun
 	u_ViewOrigin( this ),
 	u_LightDir( this ),
 	u_LightColor( this ),
+	u_InverseLightFactor( this ),
 	u_LightRadius( this ),
 	u_LightScale( this ),
 	u_LightAttenuationMatrix( this ),

diff --git a/src/engine/renderer/gl_shader.h b/src/engine/renderer/gl_shader.h
@@ -2372,6 +2372,7 @@ class GLShader_forwardLighting_omniXYZ :
 	public u_ViewOrigin,
 	public u_LightOrigin,
 	public u_LightColor,
+	public u_InverseLightFactor,
 	public u_LightRadius,
 	public u_LightScale,
 	public u_LightAttenuationMatrix,
@@ -2409,6 +2410,7 @@ class GLShader_forwardLighting_projXYZ :
 	public u_ViewOrigin,
 	public u_LightOrigin,
 	public u_LightColor,
+	public u_InverseLightFactor,
 	public u_LightRadius,
 	public u_LightScale,
 	public u_LightAttenuationMatrix,
@@ -2447,6 +2449,7 @@ class GLShader_forwardLighting_directionalSun :
 	public u_ViewOrigin,
 	public u_LightDir,
 	public u_LightColor,
+	public u_InverseLightFactor,
 	public u_LightRadius,
 	public u_LightScale,
 	public u_LightAttenuationMatrix,

diff --git a/src/engine/renderer/glsl_source/forwardLighting_fp.glsl b/src/engine/renderer/glsl_source/forwardLighting_fp.glsl
@@ -65,6 +65,7 @@ uniform vec3		u_LightDir;
 uniform vec3		u_LightOrigin;
 #endif
 uniform vec3		u_LightColor;
+uniform float u_InverseLightFactor;
 uniform float		u_LightRadius;
 uniform float       u_LightScale;
 uniform float		u_AlphaThreshold;
@@ -1006,6 +1007,7 @@ void	main()
 	color.rgb *= attenuationZ;
 #endif
 	color.rgb *= abs(u_LightScale);
+	color.rgb *= u_InverseLightFactor;
 	color.rgb *= shadow;
 
 	color.rgb *= var_Color.rgb;

diff --git a/src/engine/renderer/tr_backend.cpp b/src/engine/renderer/tr_backend.cpp
@@ -57,10 +57,10 @@ void GL_Bind( image_t *image )
 		texnum = tr.blackImage->texnum;
 	}
 
-	if ( glState.currenttextures[ glState.currenttmu ] != texnum )
+	if ( tr.currenttextures[ glState.currenttmu ] != texnum )
 	{
 		image->frameUsed = tr.frameCount;
-		glState.currenttextures[ glState.currenttmu ] = texnum;
+		tr.currenttextures[ glState.currenttmu ] = texnum;
 		glBindTexture( image->type, texnum );
 	}
 }
@@ -69,7 +69,7 @@ void GL_Unbind( image_t *image )
 {
 	GLimp_LogComment( "--- GL_Unbind() ---\n" );
 
-	glState.currenttextures[ glState.currenttmu ] = 0;
+	tr.currenttextures[ glState.currenttmu ] = 0;
 	glBindTexture( image->type, 0 );
 }
 
@@ -148,7 +148,7 @@ void GL_SelectTexture( int unit )
 		return;
 	}
 
-	if ( unit >= 0 && unit <= 31 )
+	if ( unit >= 0 && unit < glConfig2.maxTextureUnits )
 	{
 		glActiveTexture( GL_TEXTURE0 + unit );
 
@@ -175,12 +175,12 @@ void GL_BindToTMU( int unit, image_t *image )
 
 	int texnum = image->texnum;
 
-	if ( unit < 0 || unit > 31 )
+	if ( unit < 0 || unit >= glConfig2.maxTextureUnits )
 	{
 		Sys::Drop( "GL_BindToTMU: unit %i is out of range\n", unit );
 	}
 
-	if ( glState.currenttextures[ unit ] == texnum )
+	if ( tr.currenttextures[ unit ] == texnum )
 	{
 		return;
 	}