From a8d61b58e9d4c6f05f996b65c7406b237bc346e6 Mon Sep 17 00:00:00 2001 From: toxieainc Date: Thu, 11 Jan 2024 10:23:13 +0100 Subject: [PATCH] some more tiny SSE optimizations, and increase precision of the (unused) --- ext/libsamplerate/samplerate.c | 32 +++++++++++++++++++++++++++----- src/libpinmame/libpinmame.cpp | 6 +++--- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/ext/libsamplerate/samplerate.c b/ext/libsamplerate/samplerate.c index fb5e8f757..bf2e95aab 100644 --- a/ext/libsamplerate/samplerate.c +++ b/ext/libsamplerate/samplerate.c @@ -501,7 +501,14 @@ src_char_to_float_array(const signed char * const __restrict in, float * const _ { int i; for (i = 0 ; i < len ; i++) +#ifdef RESAMPLER_SSE_OPT + { + __m128 tmp; + out[i] = _mm_cvtss_f32(_mm_mul_ss(_mm_cvt_si2ss(tmp,in[i]),_mm_set_ss((float)(1.0 / 0x80)))); + } +#else out[i] = (float)in[i] * (float)(1.0 / 0x80); // (float)(in[i] / (1.0 * 0x80)); +#endif } /* src_char_to_float_array */ void @@ -509,7 +516,14 @@ src_short_to_float_array (const short * const __restrict in, float * const __res { int i; for (i = 0 ; i < len ; i++) - out [i] = (float)in[i] * (float)(1.0 / 0x8000); // (float) (in [i] / (1.0 * 0x8000)) ; +#ifdef RESAMPLER_SSE_OPT + { + __m128 tmp; + out[i] = _mm_cvtss_f32(_mm_mul_ss(_mm_cvt_si2ss(tmp,in[i]),_mm_set_ss((float)(1.0 / 0x8000)))); + } +#else + out[i] = (float)in[i] * (float)(1.0 / 0x8000); // (float)(in[i] / (1.0 * 0x8000)); +#endif } /* src_short_to_float_array */ void @@ -535,9 +549,17 @@ src_float_to_short_array (const float * const __restrict in, short * const __res void src_int_to_float_array (const int * const __restrict in, float * const __restrict out, int len) { + // >>7 is there to overall increase the precision of the endresult (i.e. mapping 24bits of the integer to the 24bits fp32 mantissa precision) int i; for (i = 0 ; i < len ; i++) - out [i] = (float)in[i] * (float)(1.0 / (8.0 * 0x10000000)); // (float) (in [i] / (8.0 * 0x10000000)) ; +#ifdef RESAMPLER_SSE_OPT + { + __m128 tmp; + out[i] = _mm_cvtss_f32(_mm_mul_ss(_mm_cvt_si2ss(tmp,in[i]>>7),_mm_set_ss((float)(1.0 / (1.0 * (0x80000000>>7))))); + } +#else + out[i] = (float)(in[i]>>7) * (float)(1.0 / (1.0 * (0x80000000>>7))); // (float)(in[i] / (1.0 * (0x80000000>>7))); +#endif } /* src_int_to_float_array */ void @@ -546,15 +568,15 @@ src_float_to_int_array (const float * const __restrict in, int * const __restric int i; for (i = 0 ; i < len ; i++) #ifdef RESAMPLER_SSE_OPT - out[i] = _mm_cvtss_si32(_mm_max_ss(_mm_min_ss(_mm_set_ss(in[i] * (float)(8.0 * 0x10000000)), _mm_set_ss(2147483520.f)), _mm_set_ss((float)(-8.0 * 0x10000000)))); //!! cannot output INT_MAX like this (max is 2147483520 instead of 2147483647), but who cares! + out[i] = _mm_cvtss_si32(_mm_max_ss(_mm_min_ss(_mm_set_ss(in[i] * (float)(1.0 * 0x80000000)), _mm_set_ss(2147483520.f)), _mm_set_ss((float)(-1.0 * 0x80000000)))); //!! cannot output INT_MAX like this (max is 2147483520 instead of 2147483647), but who cares! #else { double scaled_value; - scaled_value = in [i] * (8.0 * 0x10000000) ; + scaled_value = in [i] * (1.0 * 0x80000000) ; if (CPU_CLIPS_POSITIVE == 0 && scaled_value >= (1.0 * 0x7FFFFFFF)) { out [i] = 0x7fffffff ; continue ; } - if (CPU_CLIPS_NEGATIVE == 0 && scaled_value <= (-8.0 * 0x10000000)) + if (CPU_CLIPS_NEGATIVE == 0 && scaled_value <= (-1.0 * 0x80000000)) { out [i] = -1 - 0x7fffffff ; continue ; } diff --git a/src/libpinmame/libpinmame.cpp b/src/libpinmame/libpinmame.cpp index 004f6105d..7a9756cdf 100644 --- a/src/libpinmame/libpinmame.cpp +++ b/src/libpinmame/libpinmame.cpp @@ -2,6 +2,8 @@ #include "libpinmame.h" +#include "../../ext/libsamplerate/samplerate.h" + #include #include @@ -339,9 +341,7 @@ extern "C" int osd_update_audio_stream(INT16* p_buffer) if (_p_Config->audioFormat == PINMAME_AUDIO_FORMAT_INT16) return (*(_p_Config->cb_OnAudioUpdated))((void*)p_buffer, samplesThisFrame, _p_userData); - const int samplesEnd = samplesThisFrame * _audioInfo.channels; - for (int i = 0; i < samplesEnd; i++) - _audioData[i] = (float)p_buffer[i] * (float)(1.0/32768.0); + src_short_to_float_array(p_buffer, _audioData, samplesThisFrame * _audioInfo.channels); return (*(_p_Config->cb_OnAudioUpdated))((void*)_audioData, samplesThisFrame, _p_userData); }