From b36e2558b08de97fa16417d30f2c5b109b4e60c9 Mon Sep 17 00:00:00 2001 From: Adam Wieckowski Date: Wed, 24 Jan 2024 16:08:25 +0100 Subject: [PATCH 1/8] Cleanup * remove unused macros * remove unused functions --- source/Lib/CommonLib/RdCost.cpp | 4 ++-- source/Lib/CommonLib/Rom.cpp | 28 +------------------------- source/Lib/CommonLib/Slice.cpp | 7 ------- source/Lib/CommonLib/Slice.h | 5 ----- source/Lib/CommonLib/TypeDef.h | 25 +---------------------- source/Lib/CommonLib/x86/RdCostX86.h | 14 ++++++------- source/Lib/DecoderLib/DecLibParser.cpp | 4 +--- 7 files changed, 12 insertions(+), 75 deletions(-) diff --git a/source/Lib/CommonLib/RdCost.cpp b/source/Lib/CommonLib/RdCost.cpp index 0e14f083..c03b29a3 100644 --- a/source/Lib/CommonLib/RdCost.cpp +++ b/source/Lib/CommonLib/RdCost.cpp @@ -132,7 +132,7 @@ Distortion RdCost::xGetSAD8( const DistParam& rcDtParam ) } uiSum <<= iSubShift; - return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); + return uiSum; } Distortion RdCost::xGetSAD16( const DistParam& rcDtParam ) @@ -171,7 +171,7 @@ Distortion RdCost::xGetSAD16( const DistParam& rcDtParam ) } uiSum <<= iSubShift; - return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); + return uiSum; } void RdCost::xGetSAD8X5(const DistParam& rcDtParam, Distortion* cost, bool isCalCentrePos) { diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp index 723fc6fc..c44f92dc 100644 --- a/source/Lib/CommonLib/Rom.cpp +++ b/source/Lib/CommonLib/Rom.cpp @@ -203,35 +203,9 @@ const uint16_t g_log2SbbSize[MAX_LOG2_TU_SIZE_PLUS_ONE][MAX_LOG2_TU_SIZE_PLUS_ON // initialize ROM variables void initROM() { -#if RExt__HIGH_BIT_DEPTH_SUPPORT || !( ENABLE_SIMD_LOG2 && defined( TARGET_SIMD_X86 ) ) +#if !( ENABLE_SIMD_LOG2 && defined( TARGET_SIMD_X86 ) ) int c; -#endif -#if RExt__HIGH_BIT_DEPTH_SUPPORT - { - c = 64; - const double s = sqrt((double)c) * (64 << COM16_C806_TRANS_PREC); - - - for (int k = 0; k < c; k++) - { - for (int n = 0; n < c; n++) - { - double w0, v; - const double PI = 3.14159265358979323846; - - // DCT-II - w0 = k == 0 ? sqrt(0.5) : 1; - v = cos(PI*(n + 0.5)*k / c) * w0 * sqrt(2.0 / c); - short sv = (short)(s * v + (v > 0 ? 0.5 : -0.5)); - if (g_aiT64[0][0][c*c + k*c + n] != sv) - { - msg(WARNING, "trap"); - } - } - } - } - #endif #if !( ENABLE_SIMD_LOG2 && defined( TARGET_SIMD_X86 ) ) // g_aucConvertToBit[ x ]: log2(x/4), if x=4 -> 0, x=8 -> 1, x=16 -> 2, ... diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp index a673ec64..58ff972e 100644 --- a/source/Lib/CommonLib/Slice.cpp +++ b/source/Lib/CommonLib/Slice.cpp @@ -787,8 +787,6 @@ void Slice::copySliceInfo(Slice *pSrc, bool cpyAlmostAll) m_colFromL0Flag = pSrc->m_colFromL0Flag; m_colRefIdx = pSrc->m_colRefIdx; - if( cpyAlmostAll ) setLambdas(pSrc->getLambdas()); - m_uiTLayer = pSrc->m_uiTLayer; m_bTLayerSwitchingFlag = pSrc->m_bTLayerSwitchingFlag; @@ -2213,11 +2211,6 @@ ProfileLevelTierFeatures::extractPTLInformation(const SPS &sps) } } -double ProfileLevelTierFeatures::getMinCr() const -{ - return (m_pLevelTier!=0 && m_pProfile!=0) ? (m_pProfile->minCrScaleFactorx100 * m_pLevelTier->minCrBase[m_tier?1:0])/100.0 : 0.0 ; -} - uint64_t ProfileLevelTierFeatures::getCpbSizeInBits() const { return (m_pLevelTier!=0 && m_pProfile!=0) ? uint64_t(m_pProfile->cpbVclFactor) * m_pLevelTier->maxCpb[m_tier?1:0] : uint64_t(0); diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h index 9084c95a..7ffe397f 100644 --- a/source/Lib/CommonLib/Slice.h +++ b/source/Lib/CommonLib/Slice.h @@ -2584,7 +2584,6 @@ class Slice bool m_colFromL0Flag = true; // collocated picture from List0 flag uint32_t m_colRefIdx = 0; - double m_lambdas[MAX_NUM_COMPONENT] = { 0.0, 0.0, 0.0 }; uint32_t m_maxNumIBCMergeCand = 0; bool m_disBdofDmvrFlag = false; @@ -2752,9 +2751,6 @@ class Slice bool isInterB() const { return m_eSliceType == B_SLICE; } bool isInterP() const { return m_eSliceType == P_SLICE; } - void setLambdas( const double lambdas[MAX_NUM_COMPONENT] ) { for (int component = 0; component < MAX_NUM_COMPONENT; component++) m_lambdas[component] = lambdas[component]; } - const double* getLambdas() const { return m_lambdas; } - uint32_t getCuQpDeltaSubdiv() const { return this->isIntra() ? m_pcPicHeader->getCuQpDeltaSubdivIntra() : m_pcPicHeader->getCuQpDeltaSubdivInter(); } uint32_t getCuChromaQpOffsetSubdiv() const { return this->isIntra() ? m_pcPicHeader->getCuChromaQpOffsetSubdivIntra() : m_pcPicHeader->getCuChromaQpOffsetSubdivInter(); } @@ -2984,7 +2980,6 @@ class ProfileLevelTierFeatures const LevelTierFeatures *getLevelTierFeatures() const { return m_pLevelTier; } Tier getTier() const { return m_tier; } uint64_t getCpbSizeInBits() const; - double getMinCr() const; uint32_t getMaxDpbSize( uint32_t picSizeMaxInSamplesY ) const; }; diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index b7198fc7..9d046105 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -133,15 +133,9 @@ namespace vvdec // Tool Switches // ==================================================================================================================== - -// This can be enabled by the makefile -#ifndef RExt__HIGH_BIT_DEPTH_SUPPORT -#define RExt__HIGH_BIT_DEPTH_SUPPORT 0 ///< 0 (default) use data type definitions for 8-10 bit video, 1 = use larger data types to allow for up to 16-bit video (originally developed as part of N0188) -#endif - // SIMD optimizations #define SIMD_ENABLE 1 -#define ENABLE_SIMD_OPT ( SIMD_ENABLE && !RExt__HIGH_BIT_DEPTH_SUPPORT ) ///< SIMD optimizations, no impact on RD performance +#define ENABLE_SIMD_OPT ( SIMD_ENABLE ) ///< SIMD optimizations, no impact on RD performance #define ENABLE_SIMD_OPT_MCIF ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the interpolation filter, no impact on RD performance #define ENABLE_SIMD_OPT_BUFFER ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the buffer operations, no impact on RD performance #define ENABLE_SIMD_OPT_DIST ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the distortion calculations(SAD,SSE,HADAMARD), no impact on RD performance @@ -166,23 +160,6 @@ namespace vvdec #define LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET 1 /// JVET-L0414 (CE11.2.2) with explicit signalling of num interval, threshold and qpOffset -// ==================================================================================================================== -// Derived macros -// ==================================================================================================================== - -#if RExt__HIGH_BIT_DEPTH_SUPPORT -#define FULL_NBIT 1 ///< When enabled, use distortion measure derived from all bits of source data, otherwise discard (bitDepth - 8) least-significant bits of distortion -#else -#define FULL_NBIT 1 ///< When enabled, use distortion measure derived from all bits of source data, otherwise discard (bitDepth - 8) least-significant bits of distortion -#endif - -#if FULL_NBIT -#define DISTORTION_PRECISION_ADJUSTMENT(x) 0 -#else -#define DISTORTION_ESTIMATION_BITS 8 -#define DISTORTION_PRECISION_ADJUSTMENT(x) ((x>DISTORTION_ESTIMATION_BITS)? ((x)-DISTORTION_ESTIMATION_BITS) : 0) -#endif - // ==================================================================================================================== // Error checks // ==================================================================================================================== diff --git a/source/Lib/CommonLib/x86/RdCostX86.h b/source/Lib/CommonLib/x86/RdCostX86.h index 4b055502..6c38c134 100644 --- a/source/Lib/CommonLib/x86/RdCostX86.h +++ b/source/Lib/CommonLib/x86/RdCostX86.h @@ -196,7 +196,7 @@ Distortion xGetSAD_MxN_SIMD( const DistParam &rcDtParam ) } uiSum <<= iSubShift; - return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth); + return uiSum; } template @@ -276,8 +276,8 @@ void xGetSADX5_8xN_SIMDImp(const DistParam& rcDtParam, Distortion* cost) { sum0 = _mm_slli_epi32(sum0, iSubShift); if (isCalCentrePos) sum2 = _mm_slli_epi32(sum2, iSubShift); - sum0 = _mm_srli_epi32(sum0, (1 + (DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)))); - if (isCalCentrePos) sum2 = _mm_srli_epi32(sum2, (1 + (DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)))); + sum0 = _mm_srli_epi32(sum0, 1); + if (isCalCentrePos) sum2 = _mm_srli_epi32(sum2, 1); _mm_storeu_si64( ( __m128i* ) &cost[0], sum0 ); if (isCalCentrePos) cost[2] = (_mm_cvtsi128_si32(sum2)); @@ -502,13 +502,13 @@ void xGetSADX5_16xN_SIMDImp(const DistParam& rcDtParam, Distortion* cost) { sum0134 = _mm_slli_epi32(sum0134, iSubShift); - sum0134 = _mm_srli_epi32(sum0134, (1 + (DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)))); + sum0134 = _mm_srli_epi32(sum0134, 1); _mm_storeu_si64( ( __m128i* ) &cost[0], sum0134 ); if (isCalCentrePos) { int tmp = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum2)) + _mm256_extract_epi32(sum2, 4); tmp <<= iSubShift; - tmp >>= (1 + (DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth))); + tmp >>= 1; cost[2] = tmp; } _mm_storeu_si64( ( __m128i* ) &cost[3], _mm_unpackhi_epi64( sum0134, sum0134 ) ); @@ -586,8 +586,8 @@ void xGetSADX5_16xN_SIMDImp(const DistParam& rcDtParam, Distortion* cost) { sum0 = _mm_slli_epi32(sum0, iSubShift); if (isCalCentrePos) sum2 = _mm_slli_epi32(sum2, iSubShift); - sum0 = _mm_srli_epi32(sum0, (1 + (DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)))); - if (isCalCentrePos) sum2 = _mm_srli_epi32(sum2, (1 + (DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)))); + sum0 = _mm_srli_epi32(sum0, 1); + if (isCalCentrePos) sum2 = _mm_srli_epi32(sum2, 1); _mm_storeu_si64( ( __m128i* ) &cost[0], sum0 ); if (isCalCentrePos) cost[2] = (_mm_cvtsi128_si32(sum2)); diff --git a/source/Lib/DecoderLib/DecLibParser.cpp b/source/Lib/DecoderLib/DecLibParser.cpp index 11d8b1bc..9e55187e 100644 --- a/source/Lib/DecoderLib/DecLibParser.cpp +++ b/source/Lib/DecoderLib/DecLibParser.cpp @@ -1077,12 +1077,10 @@ void DecLibParser::xActivateParameterSets( const int layerId ) xParsePrefixSEImessages(); -#if RExt__HIGH_BIT_DEPTH_SUPPORT == 0 - if( /* sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag() ||*/ sps->getBitDepth( CHANNEL_TYPE_LUMA ) > 12 || sps->getBitDepth( CHANNEL_TYPE_CHROMA ) > 12 ) + if( sps->getBitDepth( CHANNEL_TYPE_LUMA ) > 12 || sps->getBitDepth( CHANNEL_TYPE_CHROMA ) > 12 ) { THROW( "High bit depth support must be enabled at compile-time in order to decode this bitstream\n" ); } -#endif applyReferencePictureListBasedMarking( m_apcSlicePilot, layerId, *pps ); From 6628035c101401659ca4a06e18ebbe6a21bee478 Mon Sep 17 00:00:00 2001 From: Adam Wieckowski Date: Wed, 24 Jan 2024 16:10:55 +0100 Subject: [PATCH 2/8] Added parse dalay scaling and limiting (#164) --- source/Lib/CommonLib/TypeDef.h | 5 +++++ source/Lib/DecoderLib/DecLib.cpp | 1 + 2 files changed, 6 insertions(+) diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 9d046105..f5a7daa3 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -68,6 +68,11 @@ namespace vvdec #define RECO_WHILE_PARSE 1 #define ALLOW_MIDER_LF_DURING_PICEXT 1 +#define MAX_OUT_OF_ORDER_PICS 3 // maximum number of pictures, that are reconstructed out of order +#define DEFAULT_PARSE_DELAY_FACTOR 24 // factor to set default parse delay based on number of threads (4-bit fixed point) +#define DEFAULT_PARSE_DELAY_MAX 48 // maximum parse delay derived from thread count, when not set explicitly +#define MAX_OUT_OF_ORDER_PICS 3 // maximum number of pictures, that are reconstructed out of order + #define MAX_OUT_OF_ORDER_PICS 3 // maximum number of pictures, that are reconstructed out of order #define JVET_O1170_CHECK_BV_AT_DECODER 0 // For decoder to check if a BV is valid or not diff --git a/source/Lib/DecoderLib/DecLib.cpp b/source/Lib/DecoderLib/DecLib.cpp index a4987e8d..a641bc81 100644 --- a/source/Lib/DecoderLib/DecLib.cpp +++ b/source/Lib/DecoderLib/DecLib.cpp @@ -132,6 +132,7 @@ void DecLib::create( int numDecThreads, int parserFrameDelay, const UserAllocato { CHECK( numDecThreads < 0, "invalid number of threads" ); parserFrameDelay = numDecThreads; + parserFrameDelay = std::min( ( numDecThreads * DEFAULT_PARSE_DELAY_FACTOR ) >> 4, DEFAULT_PARSE_DELAY_MAX ); } m_parseFrameDelay = parserFrameDelay; From 015965693395f6b0826cc2f8045d526e5d7f9ec2 Mon Sep 17 00:00:00 2001 From: Adam Wieckowski Date: Wed, 24 Jan 2024 16:12:23 +0100 Subject: [PATCH 3/8] remove doubled macros --- source/Lib/CommonLib/TypeDef.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index f5a7daa3..bb461b3b 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -71,9 +71,6 @@ namespace vvdec #define MAX_OUT_OF_ORDER_PICS 3 // maximum number of pictures, that are reconstructed out of order #define DEFAULT_PARSE_DELAY_FACTOR 24 // factor to set default parse delay based on number of threads (4-bit fixed point) #define DEFAULT_PARSE_DELAY_MAX 48 // maximum parse delay derived from thread count, when not set explicitly -#define MAX_OUT_OF_ORDER_PICS 3 // maximum number of pictures, that are reconstructed out of order - -#define MAX_OUT_OF_ORDER_PICS 3 // maximum number of pictures, that are reconstructed out of order #define JVET_O1170_CHECK_BV_AT_DECODER 0 // For decoder to check if a BV is valid or not From 010dd6e75bfa768abde875090155c2f34b6542ab Mon Sep 17 00:00:00 2001 From: Adam Wieckowski Date: Wed, 24 Jan 2024 16:16:46 +0100 Subject: [PATCH 4/8] Cleanup ARM as well --- source/Lib/CommonLib/arm/RdCostARM.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/Lib/CommonLib/arm/RdCostARM.h b/source/Lib/CommonLib/arm/RdCostARM.h index 5a5b4cbd..eb4f8798 100644 --- a/source/Lib/CommonLib/arm/RdCostARM.h +++ b/source/Lib/CommonLib/arm/RdCostARM.h @@ -148,7 +148,7 @@ Distortion xGetSAD_MxN_SIMD( const DistParam& rcDtParam ) uiSum = vaddlvq_s16( vsum16 ); uiSum <<= iSubShift; - return uiSum >> DISTORTION_PRECISION_ADJUSTMENT( rcDtParam.bitDepth ); + return uiSum; } template @@ -217,9 +217,9 @@ void xGetSADX5_16xN_SIMDImp( const DistParam& rcDtParam, Distortion* cost ) if( isCalCentrePos ) sumTwo = vshlq_s32( sumTwo, vdupq_n_s32( iSubShift ) ); - sum = vshrq_n_s32( sum, ( 1 + ( DISTORTION_PRECISION_ADJUSTMENT( rcDtParam.bitDepth ) ) ) ); + sum = vshrq_n_s32( sum, 1 ); if( isCalCentrePos ) - sumTwo = vshrq_n_s32( sumTwo, ( 1 + ( DISTORTION_PRECISION_ADJUSTMENT( rcDtParam.bitDepth ) ) ) ); + sumTwo = vshrq_n_s32( sumTwo, 1 ); vst1q_lane_u64( (uint64_t*) &cost[ 0 ], (uint64x2_t) sum, 0 ); if( isCalCentrePos ) From cec36b3d18c455cde911904a68847fd8318bc4ca Mon Sep 17 00:00:00 2001 From: Adam Wieckowski Date: Wed, 24 Jan 2024 16:30:51 +0100 Subject: [PATCH 5/8] add comment --- source/Lib/CommonLib/TypeDef.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index bb461b3b..3c4d46a0 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -69,7 +69,7 @@ namespace vvdec #define ALLOW_MIDER_LF_DURING_PICEXT 1 #define MAX_OUT_OF_ORDER_PICS 3 // maximum number of pictures, that are reconstructed out of order -#define DEFAULT_PARSE_DELAY_FACTOR 24 // factor to set default parse delay based on number of threads (4-bit fixed point) +#define DEFAULT_PARSE_DELAY_FACTOR 24 // factor to set default parse delay based on number of threads (4-bit fixed point), equals a 1.5 slope #define DEFAULT_PARSE_DELAY_MAX 48 // maximum parse delay derived from thread count, when not set explicitly #define JVET_O1170_CHECK_BV_AT_DECODER 0 // For decoder to check if a BV is valid or not From 4769a14a67a8f4cd1f4ce0c7118439ba7c5448b5 Mon Sep 17 00:00:00 2001 From: Adam Wieckowski Date: Wed, 24 Jan 2024 16:36:35 +0100 Subject: [PATCH 6/8] Remove old code --- source/Lib/DecoderLib/DecLib.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/source/Lib/DecoderLib/DecLib.cpp b/source/Lib/DecoderLib/DecLib.cpp index a641bc81..c78f13d0 100644 --- a/source/Lib/DecoderLib/DecLib.cpp +++ b/source/Lib/DecoderLib/DecLib.cpp @@ -131,7 +131,6 @@ void DecLib::create( int numDecThreads, int parserFrameDelay, const UserAllocato if( parserFrameDelay < 0 ) { CHECK( numDecThreads < 0, "invalid number of threads" ); - parserFrameDelay = numDecThreads; parserFrameDelay = std::min( ( numDecThreads * DEFAULT_PARSE_DELAY_FACTOR ) >> 4, DEFAULT_PARSE_DELAY_MAX ); } m_parseFrameDelay = parserFrameDelay; From 472d5bcda08505076caa959ad331d8ef35457861 Mon Sep 17 00:00:00 2001 From: Adam Wieckowski Date: Wed, 24 Jan 2024 16:37:13 +0100 Subject: [PATCH 7/8] reomove obsolete comments --- source/Lib/CommonLib/TypeDef.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 3c4d46a0..7620c5eb 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -137,11 +137,11 @@ namespace vvdec // SIMD optimizations #define SIMD_ENABLE 1 -#define ENABLE_SIMD_OPT ( SIMD_ENABLE ) ///< SIMD optimizations, no impact on RD performance -#define ENABLE_SIMD_OPT_MCIF ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the interpolation filter, no impact on RD performance -#define ENABLE_SIMD_OPT_BUFFER ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the buffer operations, no impact on RD performance -#define ENABLE_SIMD_OPT_DIST ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the distortion calculations(SAD,SSE,HADAMARD), no impact on RD performance -#define ENABLE_SIMD_OPT_ALF ( 1 && ENABLE_SIMD_OPT /*&& !ALF_FIX*/ ) ///< SIMD optimization for ALF +#define ENABLE_SIMD_OPT ( SIMD_ENABLE ) ///< SIMD optimizations +#define ENABLE_SIMD_OPT_MCIF ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the interpolation filter +#define ENABLE_SIMD_OPT_BUFFER ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the buffer operations +#define ENABLE_SIMD_OPT_DIST ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the distortion calculations (SAD) +#define ENABLE_SIMD_OPT_ALF ( 1 && ENABLE_SIMD_OPT /*&& !ALF_FIX*/ ) ///< SIMD optimization for ALF #define ENABLE_SIMD_OPT_INTRAPRED ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for Intra Prediction #define ENABLE_SIMD_OPT_QUANT ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for Quant/Dequant #if ENABLE_SIMD_OPT_BUFFER From 8b0e5276ea854ba2e37eb89297170d80b0d10a72 Mon Sep 17 00:00:00 2001 From: Adam Wieckowski Date: Wed, 24 Jan 2024 16:59:40 +0100 Subject: [PATCH 8/8] Reduce default slope on 32-bit machines to 1.0 --- source/Lib/CommonLib/TypeDef.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 7620c5eb..d24664a6 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -69,7 +69,11 @@ namespace vvdec #define ALLOW_MIDER_LF_DURING_PICEXT 1 #define MAX_OUT_OF_ORDER_PICS 3 // maximum number of pictures, that are reconstructed out of order +#if INTPTR_MAX == INT64_MAX #define DEFAULT_PARSE_DELAY_FACTOR 24 // factor to set default parse delay based on number of threads (4-bit fixed point), equals a 1.5 slope +#else +#define DEFAULT_PARSE_DELAY_FACTOR 16 // factor to set default parse delay based on number of threads (4-bit fixed point), equals a 1.0 slope +#endif #define DEFAULT_PARSE_DELAY_MAX 48 // maximum parse delay derived from thread count, when not set explicitly #define JVET_O1170_CHECK_BV_AT_DECODER 0 // For decoder to check if a BV is valid or not