From d15b4aaf05035c62942ddb418fe8af2aebf601f0 Mon Sep 17 00:00:00 2001 From: Elad Gildnur Date: Thu, 9 Jan 2025 15:39:45 +0200 Subject: [PATCH 01/24] QoSReport -> QoSManager --- .../finalization_consensus_test.go | 80 +++++----- protocol/lavaprotocol/request_builder.go | 4 +- .../lavaprotocol/response_builder_test.go | 5 +- protocol/lavaprotocol/reuqest_builder_test.go | 3 +- protocol/lavasession/common.go | 10 +- .../lavasession/consumer_session_manager.go | 18 ++- protocol/lavasession/consumer_types.go | 17 --- protocol/lavasession/consumer_types_test.go | 18 +-- .../lavasession/single_consumer_session.go | 72 +-------- protocol/qos/common.go | 15 ++ protocol/qos/qos_manager.go | 137 ++++++++++++++++++ protocol/rpcconsumer/rpcconsumer_server.go | 10 +- .../reliability_manager_test.go | 9 +- 13 files changed, 241 insertions(+), 157 deletions(-) create mode 100644 protocol/qos/common.go create mode 100644 protocol/qos/qos_manager.go diff --git a/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go b/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go index 194265384c..3b87a9485f 100644 --- a/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go +++ b/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go @@ -282,54 +282,54 @@ func TestQoS(t *testing.T) { currentLatency := time.Millisecond expectedLatency := time.Millisecond latestServicedBlock := expectedBH - singleConsumerSession.CalculateQoS(currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1) - require.Equal(t, uint64(1), singleConsumerSession.QoSInfo.AnsweredRelays) - require.Equal(t, uint64(1), singleConsumerSession.QoSInfo.TotalRelays) - require.Equal(t, int64(1), singleConsumerSession.QoSInfo.SyncScoreSum) - require.Equal(t, int64(1), singleConsumerSession.QoSInfo.TotalSyncScore) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSInfo.LastQoSReport.Availability) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSInfo.LastQoSReport.Sync) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSInfo.LastQoSReport.Latency) + singleConsumerSession.QoSManager.CalculateQoS(currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1) + require.Equal(t, uint64(1), singleConsumerSession.QoSManager.GetAnsweredRelays()) + require.Equal(t, uint64(1), singleConsumerSession.QoSManager.GetTotalRelays()) + require.Equal(t, int64(1), singleConsumerSession.QoSManager.GetSyncScoreSum()) + require.Equal(t, int64(1), singleConsumerSession.QoSManager.GetTotalSyncScore()) + require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Availability) + require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Sync) + require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Latency) latestServicedBlock = expectedBH + 1 - singleConsumerSession.CalculateQoS(currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1) - require.Equal(t, uint64(2), singleConsumerSession.QoSInfo.AnsweredRelays) - require.Equal(t, uint64(2), singleConsumerSession.QoSInfo.TotalRelays) - require.Equal(t, int64(2), singleConsumerSession.QoSInfo.SyncScoreSum) - require.Equal(t, int64(2), singleConsumerSession.QoSInfo.TotalSyncScore) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSInfo.LastQoSReport.Availability) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSInfo.LastQoSReport.Sync) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSInfo.LastQoSReport.Latency) - - singleConsumerSession.QoSInfo.TotalRelays++ // this is how we add a failure - singleConsumerSession.CalculateQoS(currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1) - require.Equal(t, uint64(3), singleConsumerSession.QoSInfo.AnsweredRelays) - require.Equal(t, uint64(4), singleConsumerSession.QoSInfo.TotalRelays) - require.Equal(t, int64(3), singleConsumerSession.QoSInfo.SyncScoreSum) - require.Equal(t, int64(3), singleConsumerSession.QoSInfo.TotalSyncScore) - - require.Equal(t, sdk.ZeroDec(), singleConsumerSession.QoSInfo.LastQoSReport.Availability) // because availability below 95% is 0 - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSInfo.LastQoSReport.Sync) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSInfo.LastQoSReport.Latency) + singleConsumerSession.QoSManager.CalculateQoS(currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1) + require.Equal(t, uint64(2), singleConsumerSession.QoSManager.GetAnsweredRelays()) + require.Equal(t, uint64(2), singleConsumerSession.QoSManager.GetTotalRelays()) + require.Equal(t, int64(2), singleConsumerSession.QoSManager.GetSyncScoreSum()) + require.Equal(t, int64(2), singleConsumerSession.QoSManager.GetTotalSyncScore()) + require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Availability) + require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Sync) + require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Latency) + + singleConsumerSession.QoSManager.IncTotalRelays() // this is how we add a failure + singleConsumerSession.QoSManager.CalculateQoS(currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1) + require.Equal(t, uint64(3), singleConsumerSession.QoSManager.GetAnsweredRelays()) + require.Equal(t, uint64(4), singleConsumerSession.QoSManager.GetTotalRelays()) + require.Equal(t, int64(3), singleConsumerSession.QoSManager.GetSyncScoreSum()) + require.Equal(t, int64(3), singleConsumerSession.QoSManager.GetTotalSyncScore()) + + require.Equal(t, sdk.ZeroDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Availability) // because availability below 95% is 0 + require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Sync) + require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Latency) latestServicedBlock = expectedBH - 1 // is one block below threshold - singleConsumerSession.CalculateQoS(currentLatency, expectedLatency*2, expectedBH-latestServicedBlock, numOfProviders, 1) - require.Equal(t, uint64(4), singleConsumerSession.QoSInfo.AnsweredRelays) - require.Equal(t, uint64(5), singleConsumerSession.QoSInfo.TotalRelays) - require.Equal(t, int64(3), singleConsumerSession.QoSInfo.SyncScoreSum) - require.Equal(t, int64(4), singleConsumerSession.QoSInfo.TotalSyncScore) - - require.Equal(t, sdk.ZeroDec(), singleConsumerSession.QoSInfo.LastQoSReport.Availability) // because availability below 95% is 0 - require.Equal(t, sdk.MustNewDecFromStr("0.75"), singleConsumerSession.QoSInfo.LastQoSReport.Sync) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSInfo.LastQoSReport.Latency) + singleConsumerSession.QoSManager.CalculateQoS(currentLatency, expectedLatency*2, expectedBH-latestServicedBlock, numOfProviders, 1) + require.Equal(t, uint64(4), singleConsumerSession.QoSManager.GetAnsweredRelays()) + require.Equal(t, uint64(5), singleConsumerSession.QoSManager.GetTotalRelays()) + require.Equal(t, int64(3), singleConsumerSession.QoSManager.GetSyncScoreSum()) + require.Equal(t, int64(4), singleConsumerSession.QoSManager.GetTotalSyncScore()) + + require.Equal(t, sdk.ZeroDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Availability) // because availability below 95% is 0 + require.Equal(t, sdk.MustNewDecFromStr("0.75"), singleConsumerSession.QoSManager.GetLastQoSReport().Sync) + require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Latency) latestServicedBlock = expectedBH + 1 // add in a loop so availability goes above 95% for i := 5; i < 100; i++ { - singleConsumerSession.CalculateQoS(currentLatency, expectedLatency*2, expectedBH-latestServicedBlock, numOfProviders, 1) + singleConsumerSession.QoSManager.CalculateQoS(currentLatency, expectedLatency*2, expectedBH-latestServicedBlock, numOfProviders, 1) } - require.Equal(t, sdk.MustNewDecFromStr("0.8"), singleConsumerSession.QoSInfo.LastQoSReport.Availability) // because availability below 95% is 0 - require.Equal(t, sdk.MustNewDecFromStr("0.989898989898989898"), singleConsumerSession.QoSInfo.LastQoSReport.Sync) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSInfo.LastQoSReport.Latency) + require.Equal(t, sdk.MustNewDecFromStr("0.8"), singleConsumerSession.QoSManager.GetLastQoSReport().Availability) // because availability below 95% is 0 + require.Equal(t, sdk.MustNewDecFromStr("0.989898989898989898"), singleConsumerSession.QoSManager.GetLastQoSReport().Sync) + require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Latency) finalizationInsertionsSpreadBlocks := []finalizationTestInsertion{ finalizationInsertionForProviders(chainID, epoch, 200, 0, 1, true, "", blocksInFinalizationProof, blockDistanceForFinalizedData)[0], diff --git a/protocol/lavaprotocol/request_builder.go b/protocol/lavaprotocol/request_builder.go index 194d04aba8..260d4ae941 100644 --- a/protocol/lavaprotocol/request_builder.go +++ b/protocol/lavaprotocol/request_builder.go @@ -71,8 +71,8 @@ func ConstructRelaySession(lavaChainID string, relayRequestData *pairingtypes.Re return nil } - copiedQOS := copyQoSServiceReport(singleConsumerSession.QoSInfo.LastQoSReport) - copiedExcellenceQOS := copyQoSServiceReport(singleConsumerSession.QoSInfo.LastExcellenceQoSReportRaw) // copy raw report for the node + copiedQOS := copyQoSServiceReport(singleConsumerSession.QoSManager.GetLastQoSReport()) + copiedExcellenceQOS := copyQoSServiceReport(singleConsumerSession.QoSManager.GetLastExcellenceQoSReportRaw()) // copy raw report for the node return &pairingtypes.RelaySession{ SpecId: chainID, diff --git a/protocol/lavaprotocol/response_builder_test.go b/protocol/lavaprotocol/response_builder_test.go index e8d545c706..7693035417 100644 --- a/protocol/lavaprotocol/response_builder_test.go +++ b/protocol/lavaprotocol/response_builder_test.go @@ -8,6 +8,7 @@ import ( "github.com/lavanet/lava/v4/protocol/lavaprotocol/finalizationverification" "github.com/lavanet/lava/v4/protocol/lavasession" + "github.com/lavanet/lava/v4/protocol/qos" "github.com/lavanet/lava/v4/utils/sigs" pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" spectypes "github.com/lavanet/lava/v4/x/spec/types" @@ -29,7 +30,7 @@ func TestSignAndExtractResponse(t *testing.T) { singleConsumerSession := &lavasession.SingleConsumerSession{ CuSum: 20, LatestRelayCu: 10, // set by GetSessions cuNeededForSession - QoSInfo: lavasession.QoSReport{LastQoSReport: &pairingtypes.QualityOfServiceReport{}}, + QoSManager: qos.NewQoSManager(), SessionId: 123, Parent: nil, RelayNum: 1, @@ -77,7 +78,7 @@ func TestSignAndExtractResponseLatest(t *testing.T) { singleConsumerSession := &lavasession.SingleConsumerSession{ CuSum: 20, LatestRelayCu: 10, // set by GetSessions cuNeededForSession - QoSInfo: lavasession.QoSReport{LastQoSReport: &pairingtypes.QualityOfServiceReport{}}, + QoSManager: qos.NewQoSManager(), SessionId: 123, Parent: nil, RelayNum: 1, diff --git a/protocol/lavaprotocol/reuqest_builder_test.go b/protocol/lavaprotocol/reuqest_builder_test.go index 8f49fb9add..fd6c7c61b0 100644 --- a/protocol/lavaprotocol/reuqest_builder_test.go +++ b/protocol/lavaprotocol/reuqest_builder_test.go @@ -5,6 +5,7 @@ import ( "testing" "github.com/lavanet/lava/v4/protocol/lavasession" + "github.com/lavanet/lava/v4/protocol/qos" "github.com/lavanet/lava/v4/utils/sigs" pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" "github.com/stretchr/testify/require" @@ -18,7 +19,7 @@ func TestSignAndExtract(t *testing.T) { singleConsumerSession := &lavasession.SingleConsumerSession{ CuSum: 20, LatestRelayCu: 10, // set by GetSessions cuNeededForSession - QoSInfo: lavasession.QoSReport{LastQoSReport: &pairingtypes.QualityOfServiceReport{}}, + QoSManager: qos.NewQoSManager(), SessionId: 123, Parent: nil, RelayNum: 1, diff --git a/protocol/lavasession/common.go b/protocol/lavasession/common.go index 90f65e7b1e..8a1358dc13 100644 --- a/protocol/lavasession/common.go +++ b/protocol/lavasession/common.go @@ -50,12 +50,10 @@ const ( var AvailabilityPercentage sdk.Dec = sdk.NewDecWithPrec(1, 1) // TODO move to params pairing const ( - PercentileToCalculateLatency = 0.9 - MinProvidersForSync = 0.6 - OptimizerPerturbation = 0.10 - LatencyThresholdStatic = 1 * time.Second - LatencyThresholdSlope = 1 * time.Millisecond - StaleEpochDistance = 3 // relays done 3 epochs back are ready to be rewarded + OptimizerPerturbation = 0.10 + LatencyThresholdStatic = 1 * time.Second + LatencyThresholdSlope = 1 * time.Millisecond + StaleEpochDistance = 3 // relays done 3 epochs back are ready to be rewarded ) diff --git a/protocol/lavasession/consumer_session_manager.go b/protocol/lavasession/consumer_session_manager.go index efa4d43352..11c5d45293 100644 --- a/protocol/lavasession/consumer_session_manager.go +++ b/protocol/lavasession/consumer_session_manager.go @@ -935,7 +935,7 @@ func (csm *ConsumerSessionManager) OnSessionFailure(consumerSession *SingleConsu consumerSession.BlockListed = true } - consumerSession.QoSInfo.TotalRelays++ + consumerSession.QoSManager.IncTotalRelays() consumerSession.ConsecutiveErrors = append(consumerSession.ConsecutiveErrors, errorReceived) // copy consecutive errors for report. errorsForConsumerSession := consumerSession.ConsecutiveErrors @@ -1046,7 +1046,7 @@ func (csm *ConsumerSessionManager) OnSessionDone( consumerSession.ConsecutiveErrors = []error{} consumerSession.LatestBlock = latestServicedBlock // update latest serviced block // calculate QoS - consumerSession.CalculateQoS(currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, int64(providersCount)) + consumerSession.QoSManager.CalculateQoS(currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, int64(providersCount)) go csm.providerOptimizer.AppendRelayData(consumerSession.Parent.PublicLavaAddress, currentLatency, isHangingApi, specComputeUnits, uint64(latestServicedBlock)) csm.updateMetricsManager(consumerSession, currentLatency, !isHangingApi) // apply latency only for non hanging apis return nil @@ -1061,14 +1061,18 @@ func (csm *ConsumerSessionManager) updateMetricsManager(consumerSession *SingleC info := csm.RPCEndpoint() apiInterface := info.ApiInterface chainId := info.ChainID + var lastQos *pairingtypes.QualityOfServiceReport - var lastQosExcellence *pairingtypes.QualityOfServiceReport - if consumerSession.QoSInfo.LastQoSReport != nil { - qos := *consumerSession.QoSInfo.LastQoSReport + lastQoSReport := consumerSession.QoSManager.GetLastQoSReport() + if lastQoSReport != nil { + qos := *lastQoSReport lastQos = &qos } - if consumerSession.QoSInfo.LastExcellenceQoSReport != nil { - qosEx := *consumerSession.QoSInfo.LastExcellenceQoSReport + + var lastQosExcellence *pairingtypes.QualityOfServiceReport + lastExcellenceQoSReport := consumerSession.QoSManager.GetLastExcellenceQoSReportRaw() + if lastExcellenceQoSReport != nil { + qosEx := *lastExcellenceQoSReport lastQosExcellence = &qosEx } publicProviderAddress := consumerSession.Parent.PublicLavaAddress diff --git a/protocol/lavasession/consumer_types.go b/protocol/lavasession/consumer_types.go index 22c6bed45c..e305dbc948 100644 --- a/protocol/lavasession/consumer_types.go +++ b/protocol/lavasession/consumer_types.go @@ -84,17 +84,6 @@ type ignoredProviders struct { currentEpoch uint64 } -type QoSReport struct { - LastQoSReport *pairingtypes.QualityOfServiceReport - LastExcellenceQoSReport *pairingtypes.QualityOfServiceReport - LastExcellenceQoSReportRaw *pairingtypes.QualityOfServiceReport - LatencyScoreList []sdk.Dec - SyncScoreSum int64 - TotalSyncScore int64 - TotalRelays uint64 - AnsweredRelays uint64 -} - type DataReliabilitySession struct { SingleConsumerSession *SingleConsumerSession Epoch uint64 @@ -593,12 +582,6 @@ func (cswp *ConsumerSessionsWithProvider) fetchEndpointConnectionFromConsumerSes return connected, endpointsList, cswp.PublicLavaAddress, nil } -func CalculateAvailabilityScore(qosReport *QoSReport) (downtimePercentageRet, scaledAvailabilityScoreRet sdk.Dec) { - downtimePercentage := sdk.NewDecWithPrec(int64(qosReport.TotalRelays-qosReport.AnsweredRelays), 0).Quo(sdk.NewDecWithPrec(int64(qosReport.TotalRelays), 0)) - scaledAvailabilityScore := sdk.MaxDec(sdk.ZeroDec(), AvailabilityPercentage.Sub(downtimePercentage).Quo(AvailabilityPercentage)) - return downtimePercentage, scaledAvailabilityScore -} - func CalcWeightsByStake(providers map[uint64]*ConsumerSessionsWithProvider) (weights map[string]int64) { weights = make(map[string]int64) staticProviders := make([]*ConsumerSessionsWithProvider, 0) diff --git a/protocol/lavasession/consumer_types_test.go b/protocol/lavasession/consumer_types_test.go index 931226cfed..38838592a3 100644 --- a/protocol/lavasession/consumer_types_test.go +++ b/protocol/lavasession/consumer_types_test.go @@ -4,6 +4,7 @@ import ( "testing" sdk "github.com/cosmos/cosmos-sdk/types" + "github.com/lavanet/lava/v4/protocol/qos" "github.com/stretchr/testify/require" ) @@ -11,21 +12,18 @@ func TestCalculateAvailabilityScore(t *testing.T) { avialabilityAsFloat, err := AvailabilityPercentage.Float64() require.NoError(t, err) precision := uint64(10000) - qosReport := &QoSReport{ - TotalRelays: precision, - AnsweredRelays: precision - uint64(avialabilityAsFloat*float64(precision)), - } - downTime, availabilityScore := CalculateAvailabilityScore(qosReport) + qosManager := qos.NewQoSManager() + qosManager.SetTotalRelays(precision) + qosManager.SetAnsweredRelays(precision - uint64(avialabilityAsFloat*float64(precision))) + downTime, availabilityScore := qosManager.CalculateAvailabilityScore() downTimeFloat, err := downTime.Float64() require.NoError(t, err) require.Equal(t, downTimeFloat, avialabilityAsFloat) require.Zero(t, availabilityScore.BigInt().Uint64()) - qosReport = &QoSReport{ - TotalRelays: 2 * precision, - AnsweredRelays: 2*precision - uint64(avialabilityAsFloat*float64(precision)), - } - downTime, availabilityScore = CalculateAvailabilityScore(qosReport) + qosManager.SetTotalRelays(2 * precision) + qosManager.SetAnsweredRelays(2*precision - uint64(avialabilityAsFloat*float64(precision))) + downTime, availabilityScore = qosManager.CalculateAvailabilityScore() downTimeFloat, err = downTime.Float64() require.NoError(t, err) halfDec, err := sdk.NewDecFromStr("0.5") diff --git a/protocol/lavasession/single_consumer_session.go b/protocol/lavasession/single_consumer_session.go index 221cd70f20..07d2c32ebe 100644 --- a/protocol/lavasession/single_consumer_session.go +++ b/protocol/lavasession/single_consumer_session.go @@ -1,12 +1,10 @@ package lavasession import ( - "math" - "sort" - "strconv" "time" sdk "github.com/cosmos/cosmos-sdk/types" + "github.com/lavanet/lava/v4/protocol/qos" "github.com/lavanet/lava/v4/utils" pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" ) @@ -14,7 +12,7 @@ import ( type SingleConsumerSession struct { CuSum uint64 LatestRelayCu uint64 // set by GetSessions cuNeededForSession - QoSInfo QoSReport + QoSManager *qos.QoSManager SessionId int64 Parent *ConsumerSessionsWithProvider lock utils.LavaMutex @@ -39,78 +37,24 @@ func (cs *SingleConsumerSession) CalculateExpectedLatency(timeoutGivenToRelay ti // cs should be locked here to use this method, returns the computed qos or zero if last qos is nil or failed to compute. func (cs *SingleConsumerSession) getQosComputedResultOrZero() sdk.Dec { - if cs.QoSInfo.LastExcellenceQoSReport != nil { - qosComputed, errComputing := cs.QoSInfo.LastExcellenceQoSReport.ComputeQoSExcellence() + lastExcellenceQoSReport := cs.QoSManager.GetLastExcellenceQoSReportRaw() + if lastExcellenceQoSReport != nil { + qosComputed, errComputing := lastExcellenceQoSReport.ComputeQoSExcellence() if errComputing == nil { // if we failed to compute the qos will be 0 so this provider wont be picked to return the error in case we get it return qosComputed } - utils.LavaFormatDebug("Failed computing QoS used for error parsing, could happen if we have no sync data or one of the fields is zero", utils.LogAttr("Report", cs.QoSInfo.LastExcellenceQoSReport), utils.LogAttr("error", errComputing)) + utils.LavaFormatDebug("Failed computing QoS used for error parsing, could happen if we have no sync data or one of the fields is zero", utils.LogAttr("Report", cs.QoSManager.GetLastExcellenceQoSReportRaw()), utils.LogAttr("error", errComputing)) } return sdk.ZeroDec() } -func (cs *SingleConsumerSession) CalculateQoS(latency, expectedLatency time.Duration, blockHeightDiff int64, numOfProviders int, servicersToCount int64) { - // Add current Session QoS - cs.QoSInfo.TotalRelays++ // increase total relays - cs.QoSInfo.AnsweredRelays++ // increase answered relays - - if cs.QoSInfo.LastQoSReport == nil { - cs.QoSInfo.LastQoSReport = &pairingtypes.QualityOfServiceReport{} - } - - downtimePercentage, scaledAvailabilityScore := CalculateAvailabilityScore(&cs.QoSInfo) - cs.QoSInfo.LastQoSReport.Availability = scaledAvailabilityScore - if sdk.OneDec().GT(cs.QoSInfo.LastQoSReport.Availability) { - utils.LavaFormatDebug("QoS Availability report", utils.Attribute{Key: "Availability", Value: cs.QoSInfo.LastQoSReport.Availability}, utils.Attribute{Key: "down percent", Value: downtimePercentage}) - } - - latencyScore := sdk.MinDec(sdk.OneDec(), sdk.NewDecFromInt(sdk.NewInt(int64(expectedLatency))).Quo(sdk.NewDecFromInt(sdk.NewInt(int64(latency))))) - - insertSorted := func(list []sdk.Dec, value sdk.Dec) []sdk.Dec { - index := sort.Search(len(list), func(i int) bool { - return list[i].GTE(value) - }) - if len(list) == index { // nil or empty slice or after last element - return append(list, value) - } - list = append(list[:index+1], list[index:]...) // index < len(a) - list[index] = value - return list - } - cs.QoSInfo.LatencyScoreList = insertSorted(cs.QoSInfo.LatencyScoreList, latencyScore) - cs.QoSInfo.LastQoSReport.Latency = cs.QoSInfo.LatencyScoreList[int(float64(len(cs.QoSInfo.LatencyScoreList))*PercentileToCalculateLatency)] - - // checking if we have enough information to calculate the sync score for the providers, if we haven't talked - // with enough providers we don't have enough information and we will wait to have more information before setting the sync score - shouldCalculateSyncScore := int64(numOfProviders) > int64(math.Ceil(float64(servicersToCount)*MinProvidersForSync)) - if shouldCalculateSyncScore { // - if blockHeightDiff <= 0 { // if the diff is bigger than 0 than the block is too old (blockHeightDiff = expected - allowedLag - blockHeight) and we don't give him the score - cs.QoSInfo.SyncScoreSum++ - } - cs.QoSInfo.TotalSyncScore++ - cs.QoSInfo.LastQoSReport.Sync = sdk.NewDec(cs.QoSInfo.SyncScoreSum).QuoInt64(cs.QoSInfo.TotalSyncScore) - if sdk.OneDec().GT(cs.QoSInfo.LastQoSReport.Sync) { - utils.LavaFormatDebug("QoS Sync report", - utils.Attribute{Key: "Sync", Value: cs.QoSInfo.LastQoSReport.Sync}, - utils.Attribute{Key: "block diff", Value: blockHeightDiff}, - utils.Attribute{Key: "sync score", Value: strconv.FormatInt(cs.QoSInfo.SyncScoreSum, 10) + "/" + strconv.FormatInt(cs.QoSInfo.TotalSyncScore, 10)}, - utils.Attribute{Key: "session_id", Value: cs.SessionId}, - utils.Attribute{Key: "provider", Value: cs.Parent.PublicLavaAddress}, - ) - } - } else { - // we prefer to give them a score of 1 when there is no other data, since otherwise we damage their payments - cs.QoSInfo.LastQoSReport.Sync = sdk.NewDec(1) - } -} - func (scs *SingleConsumerSession) SetUsageForSession(cuNeededForSession uint64, qoSExcellenceReport *pairingtypes.QualityOfServiceReport, rawQoSExcellenceReport *pairingtypes.QualityOfServiceReport, usedProviders UsedProvidersInf, routerKey RouterKey) error { scs.LatestRelayCu = cuNeededForSession // set latestRelayCu scs.RelayNum += RelayNumberIncrement // increase relayNum if scs.RelayNum > 1 { // we only set excellence for sessions with more than one successful relays, this guarantees data within the epoch exists - scs.QoSInfo.LastExcellenceQoSReport = qoSExcellenceReport - scs.QoSInfo.LastExcellenceQoSReportRaw = rawQoSExcellenceReport + scs.QoSManager.SetLastExcellenceQoSReportRaw(qoSExcellenceReport) + scs.QoSManager.SetLastExcellenceQoSReportRaw(rawQoSExcellenceReport) } scs.usedProviders = usedProviders scs.routerKey = routerKey diff --git a/protocol/qos/common.go b/protocol/qos/common.go new file mode 100644 index 0000000000..f4c18c1a64 --- /dev/null +++ b/protocol/qos/common.go @@ -0,0 +1,15 @@ +package qos + +import sdk "github.com/cosmos/cosmos-sdk/types" + +var AvailabilityPercentage sdk.Dec = sdk.NewDecWithPrec(1, 1) // TODO move to params pairing +const ( + PercentileToCalculateLatency = 0.9 + MinProvidersForSync = 0.6 +) + +type DegradeAvailabilityReputation interface { +} + +type SendQoSUpdate interface { +} diff --git a/protocol/qos/qos_manager.go b/protocol/qos/qos_manager.go new file mode 100644 index 0000000000..7d753edc77 --- /dev/null +++ b/protocol/qos/qos_manager.go @@ -0,0 +1,137 @@ +package qos + +import ( + "math" + "sort" + "strconv" + "time" + + sdk "github.com/cosmos/cosmos-sdk/types" + "github.com/lavanet/lava/v4/utils" + pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" +) + +type QoSManager struct { + lastQoSReport *pairingtypes.QualityOfServiceReport + lastExcellenceQoSReport *pairingtypes.QualityOfServiceReport + lastExcellenceQoSReportRaw *pairingtypes.QualityOfServiceReport + latencyScoreList []sdk.Dec + syncScoreSum int64 + totalSyncScore int64 + totalRelays uint64 + answeredRelays uint64 +} + +func NewQoSManager() *QoSManager { + return &QoSManager{ + lastQoSReport: &pairingtypes.QualityOfServiceReport{}, + lastExcellenceQoSReport: &pairingtypes.QualityOfServiceReport{}, + lastExcellenceQoSReportRaw: &pairingtypes.QualityOfServiceReport{}, + latencyScoreList: []sdk.Dec{}, + syncScoreSum: 0, + totalSyncScore: 0, + totalRelays: 0, + answeredRelays: 0, + } +} + +func (qosReport *QoSManager) CalculateAvailabilityScore() (downtimePercentageRet, scaledAvailabilityScoreRet sdk.Dec) { + downtimePercentage := sdk.NewDecWithPrec(int64(qosReport.totalRelays-qosReport.answeredRelays), 0).Quo(sdk.NewDecWithPrec(int64(qosReport.totalRelays), 0)) + scaledAvailabilityScore := sdk.MaxDec(sdk.ZeroDec(), AvailabilityPercentage.Sub(downtimePercentage).Quo(AvailabilityPercentage)) + return downtimePercentage, scaledAvailabilityScore +} + +func (qosReport *QoSManager) GetLastQoSReport() *pairingtypes.QualityOfServiceReport { + return qosReport.lastQoSReport +} + +func (qosReport *QoSManager) GetLastExcellenceQoSReportRaw() *pairingtypes.QualityOfServiceReport { + return qosReport.lastExcellenceQoSReportRaw +} + +func (qosReport *QoSManager) SetLastExcellenceQoSReportRaw(report *pairingtypes.QualityOfServiceReport) { + qosReport.lastExcellenceQoSReportRaw = report +} + +func (qosReport *QoSManager) IncTotalRelays() { + qosReport.totalRelays++ +} + +func (cs *QoSManager) CalculateQoS(latency, expectedLatency time.Duration, blockHeightDiff int64, numOfProviders int, servicersToCount int64) { + // Add current Session QoS + cs.totalRelays++ // increase total relays + cs.answeredRelays++ // increase answered relays + + if cs.lastQoSReport == nil { + cs.lastQoSReport = &pairingtypes.QualityOfServiceReport{} + } + + downtimePercentage, scaledAvailabilityScore := cs.CalculateAvailabilityScore() + cs.lastQoSReport.Availability = scaledAvailabilityScore + if sdk.OneDec().GT(cs.lastQoSReport.Availability) { + utils.LavaFormatDebug("QoS Availability report", utils.Attribute{Key: "Availability", Value: cs.lastQoSReport.Availability}, utils.Attribute{Key: "down percent", Value: downtimePercentage}) + } + + latencyScore := sdk.MinDec(sdk.OneDec(), sdk.NewDecFromInt(sdk.NewInt(int64(expectedLatency))).Quo(sdk.NewDecFromInt(sdk.NewInt(int64(latency))))) + + insertSorted := func(list []sdk.Dec, value sdk.Dec) []sdk.Dec { + index := sort.Search(len(list), func(i int) bool { + return list[i].GTE(value) + }) + if len(list) == index { // nil or empty slice or after last element + return append(list, value) + } + list = append(list[:index+1], list[index:]...) // index < len(a) + list[index] = value + return list + } + cs.latencyScoreList = insertSorted(cs.latencyScoreList, latencyScore) + cs.lastQoSReport.Latency = cs.latencyScoreList[int(float64(len(cs.latencyScoreList))*PercentileToCalculateLatency)] + + // checking if we have enough information to calculate the sync score for the providers, if we haven't talked + // with enough providers we don't have enough information and we will wait to have more information before setting the sync score + shouldCalculateSyncScore := int64(numOfProviders) > int64(math.Ceil(float64(servicersToCount)*MinProvidersForSync)) + if shouldCalculateSyncScore { // + if blockHeightDiff <= 0 { // if the diff is bigger than 0 than the block is too old (blockHeightDiff = expected - allowedLag - blockHeight) and we don't give him the score + cs.syncScoreSum++ + } + cs.totalSyncScore++ + cs.lastQoSReport.Sync = sdk.NewDec(cs.syncScoreSum).QuoInt64(cs.totalSyncScore) + if sdk.OneDec().GT(cs.lastQoSReport.Sync) { + utils.LavaFormatDebug("QoS Sync report", + utils.Attribute{Key: "Sync", Value: cs.lastQoSReport.Sync}, + utils.Attribute{Key: "block diff", Value: blockHeightDiff}, + utils.Attribute{Key: "sync score", Value: strconv.FormatInt(cs.syncScoreSum, 10) + "/" + strconv.FormatInt(cs.totalSyncScore, 10)}, + // utils.Attribute{Key: "session_id", Value: cs.SessionId}, + // utils.Attribute{Key: "provider", Value: cs.Parent.PublicLavaAddress}, + ) + } + } else { + // we prefer to give them a score of 1 when there is no other data, since otherwise we damage their payments + cs.lastQoSReport.Sync = sdk.NewDec(1) + } +} + +func (qosReport *QoSManager) GetAnsweredRelays() uint64 { + return qosReport.answeredRelays +} + +func (qosReport *QoSManager) SetAnsweredRelays(answeredRelays uint64) { + qosReport.answeredRelays = answeredRelays +} + +func (qosReport *QoSManager) GetTotalRelays() uint64 { + return qosReport.totalRelays +} + +func (qosReport *QoSManager) SetTotalRelays(totalRelays uint64) { + qosReport.totalRelays = totalRelays +} + +func (qosReport *QoSManager) GetSyncScoreSum() int64 { + return qosReport.syncScoreSum +} + +func (qosReport *QoSManager) GetTotalSyncScore() int64 { + return qosReport.totalSyncScore +} diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 8592da8408..296fcbe0bf 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -862,16 +862,18 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( utils.Attribute{Key: "providersCount", Value: pairingAddressesLen}, ) } - if rpccs.debugRelays && singleConsumerSession.QoSInfo.LastQoSReport != nil && - singleConsumerSession.QoSInfo.LastQoSReport.Sync.BigInt() != nil && - singleConsumerSession.QoSInfo.LastQoSReport.Sync.LT(sdk.MustNewDecFromStr("0.9")) { + + lastQoSReport := singleConsumerSession.QoSManager.GetLastQoSReport() + if rpccs.debugRelays && lastQoSReport != nil && + lastQoSReport.Sync.BigInt() != nil && + lastQoSReport.Sync.LT(sdk.MustNewDecFromStr("0.9")) { utils.LavaFormatDebug("identified QoS mismatch", utils.Attribute{Key: "expectedBH", Value: expectedBH}, utils.Attribute{Key: "latestServicedBlock", Value: latestBlock}, utils.Attribute{Key: "session_id", Value: singleConsumerSession.SessionId}, utils.Attribute{Key: "provider_address", Value: singleConsumerSession.Parent.PublicLavaAddress}, utils.Attribute{Key: "providersCount", Value: pairingAddressesLen}, - utils.Attribute{Key: "singleConsumerSession.QoSInfo", Value: singleConsumerSession.QoSInfo}, + utils.Attribute{Key: "singleConsumerSession.QoSInfo", Value: singleConsumerSession.QoSManager}, ) } diff --git a/protocol/rpcprovider/reliabilitymanager/reliability_manager_test.go b/protocol/rpcprovider/reliabilitymanager/reliability_manager_test.go index 29ad44c3f6..45b4768f00 100644 --- a/protocol/rpcprovider/reliabilitymanager/reliability_manager_test.go +++ b/protocol/rpcprovider/reliabilitymanager/reliability_manager_test.go @@ -16,6 +16,7 @@ import ( "github.com/lavanet/lava/v4/protocol/lavaprotocol" "github.com/lavanet/lava/v4/protocol/lavaprotocol/finalizationverification" "github.com/lavanet/lava/v4/protocol/lavasession" + "github.com/lavanet/lava/v4/protocol/qos" "github.com/lavanet/lava/v4/protocol/rpcprovider/reliabilitymanager" "github.com/lavanet/lava/v4/protocol/statetracker" testkeeper "github.com/lavanet/lava/v4/testutil/keeper" @@ -47,7 +48,7 @@ func TestFullFlowReliabilityCompare(t *testing.T) { singleConsumerSession := &lavasession.SingleConsumerSession{ CuSum: 20, LatestRelayCu: 10, // set by GetSessions cuNeededForSession - QoSInfo: lavasession.QoSReport{LastQoSReport: &pairingtypes.QualityOfServiceReport{}}, + QoSManager: qos.NewQoSManager(), SessionId: 123, Parent: nil, RelayNum: 1, @@ -58,7 +59,7 @@ func TestFullFlowReliabilityCompare(t *testing.T) { singleConsumerSession2 := &lavasession.SingleConsumerSession{ CuSum: 200, LatestRelayCu: 100, // set by GetSessions cuNeededForSession - QoSInfo: lavasession.QoSReport{LastQoSReport: &pairingtypes.QualityOfServiceReport{}}, + QoSManager: qos.NewQoSManager(), SessionId: 456, Parent: nil, RelayNum: 5, @@ -200,7 +201,7 @@ func TestFullFlowReliabilityConflict(t *testing.T) { singleConsumerSession := &lavasession.SingleConsumerSession{ CuSum: 20, LatestRelayCu: 10, // set by GetSessions cuNeededForSession - QoSInfo: lavasession.QoSReport{LastQoSReport: &pairingtypes.QualityOfServiceReport{}}, + QoSManager: qos.NewQoSManager(), SessionId: 123, Parent: nil, RelayNum: 1, @@ -212,7 +213,7 @@ func TestFullFlowReliabilityConflict(t *testing.T) { singleConsumerSession2 := &lavasession.SingleConsumerSession{ CuSum: 200, LatestRelayCu: 100, // set by GetSessions cuNeededForSession - QoSInfo: lavasession.QoSReport{LastQoSReport: &pairingtypes.QualityOfServiceReport{}}, + QoSManager: qos.NewQoSManager(), SessionId: 456, Parent: consumerSessionWithProvider, RelayNum: 5, From aa9c827920fd8e99815d98f5ef11ef9d4115df26 Mon Sep 17 00:00:00 2001 From: Elad Gildnur Date: Thu, 9 Jan 2025 15:49:52 +0200 Subject: [PATCH 02/24] =?UTF-8?q?Rename=20QoSExcellence/Excellence=20?= =?UTF-8?q?=E2=86=92=20Reputation=20in=20protocol?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- protocol/common/endpoints.go | 6 +- protocol/lavaprotocol/request_builder.go | 4 +- .../lavasession/consumer_session_manager.go | 14 +-- protocol/lavasession/consumer_types.go | 2 +- .../lavasession/single_consumer_session.go | 18 +-- protocol/metrics/consumer_metrics_manager.go | 22 ++-- .../provideroptimizer/provider_optimizer.go | 8 +- .../provider_optimizer_test.go | 6 +- protocol/qos/qos_manager.go | 16 +-- protocol/rpcconsumer/relay_errors.go | 4 +- protocol/rpcconsumer/relay_errors_test.go | 108 +++++++++--------- protocol/rpcconsumer/relay_processor.go | 4 +- protocol/rpcconsumer/rpcconsumer_server.go | 4 +- 13 files changed, 108 insertions(+), 108 deletions(-) diff --git a/protocol/common/endpoints.go b/protocol/common/endpoints.go index 2379512708..764b50ba60 100644 --- a/protocol/common/endpoints.go +++ b/protocol/common/endpoints.go @@ -247,9 +247,9 @@ type ConflictHandlerInterface interface { } type ProviderInfo struct { - ProviderAddress string - ProviderQoSExcellenceSummery sdk.Dec // the number represents the average qos for this provider session - ProviderStake sdk.Coin + ProviderAddress string + ProviderReputationSummery sdk.Dec // the number represents the average qos for this provider session + ProviderStake sdk.Coin } type RelayResult struct { diff --git a/protocol/lavaprotocol/request_builder.go b/protocol/lavaprotocol/request_builder.go index 260d4ae941..c02cdf5964 100644 --- a/protocol/lavaprotocol/request_builder.go +++ b/protocol/lavaprotocol/request_builder.go @@ -72,7 +72,7 @@ func ConstructRelaySession(lavaChainID string, relayRequestData *pairingtypes.Re } copiedQOS := copyQoSServiceReport(singleConsumerSession.QoSManager.GetLastQoSReport()) - copiedExcellenceQOS := copyQoSServiceReport(singleConsumerSession.QoSManager.GetLastExcellenceQoSReportRaw()) // copy raw report for the node + copiedReputation := copyQoSServiceReport(singleConsumerSession.QoSManager.GetLastReputationQoSReportRaw()) // copy raw report for the node return &pairingtypes.RelaySession{ SpecId: chainID, @@ -87,7 +87,7 @@ func ConstructRelaySession(lavaChainID string, relayRequestData *pairingtypes.Re LavaChainId: lavaChainID, Sig: nil, Badge: nil, - QosExcellenceReport: copiedExcellenceQOS, + QosExcellenceReport: copiedReputation, } } diff --git a/protocol/lavasession/consumer_session_manager.go b/protocol/lavasession/consumer_session_manager.go index 11c5d45293..b7545c5897 100644 --- a/protocol/lavasession/consumer_session_manager.go +++ b/protocol/lavasession/consumer_session_manager.go @@ -562,7 +562,7 @@ func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForS sessionInfo.QoSSummeryResult = consumerSession.getQosComputedResultOrZero() sessions[providerAddress] = sessionInfo - qosReport, rawQosReport := csm.providerOptimizer.GetExcellenceQoSReportForProvider(providerAddress) + qosReport, rawQosReport := csm.providerOptimizer.GetReputationReportForProvider(providerAddress) if csm.rpcEndpoint.Geolocation != uint64(endpoint.endpoint.Geolocation) { // rawQosReport is used only when building the relay payment message to be used to update // the provider's reputation on-chain. If the consumer and provider don't share geolocation @@ -1069,17 +1069,17 @@ func (csm *ConsumerSessionManager) updateMetricsManager(consumerSession *SingleC lastQos = &qos } - var lastQosExcellence *pairingtypes.QualityOfServiceReport - lastExcellenceQoSReport := consumerSession.QoSManager.GetLastExcellenceQoSReportRaw() - if lastExcellenceQoSReport != nil { - qosEx := *lastExcellenceQoSReport - lastQosExcellence = &qosEx + var lastReputation *pairingtypes.QualityOfServiceReport + lastReputationReport := consumerSession.QoSManager.GetLastReputationQoSReportRaw() + if lastReputationReport != nil { + qosRep := *lastReputationReport + lastReputation = &qosRep } publicProviderAddress := consumerSession.Parent.PublicLavaAddress publicProviderEndpoint := consumerSession.Parent.Endpoints[0].NetworkAddress go func() { - csm.consumerMetricsManager.SetQOSMetrics(chainId, apiInterface, publicProviderAddress, publicProviderEndpoint, lastQos, lastQosExcellence, consumerSession.LatestBlock, consumerSession.RelayNum, relayLatency, sessionSuccessful) + csm.consumerMetricsManager.SetQOSMetrics(chainId, apiInterface, publicProviderAddress, publicProviderEndpoint, lastQos, lastReputation, consumerSession.LatestBlock, consumerSession.RelayNum, relayLatency, sessionSuccessful) }() } diff --git a/protocol/lavasession/consumer_types.go b/protocol/lavasession/consumer_types.go index e305dbc948..0191f3a092 100644 --- a/protocol/lavasession/consumer_types.go +++ b/protocol/lavasession/consumer_types.go @@ -74,7 +74,7 @@ type ProviderOptimizer interface { AppendRelayFailure(providerAddress string) AppendRelayData(providerAddress string, latency time.Duration, isHangingApi bool, cu, syncBlock uint64) ChooseProvider(allAddresses []string, ignoredProviders map[string]struct{}, cu uint64, requestedBlock int64) (addresses []string, tier int) - GetExcellenceQoSReportForProvider(string) (*pairingtypes.QualityOfServiceReport, *pairingtypes.QualityOfServiceReport) + GetReputationReportForProvider(string) (*pairingtypes.QualityOfServiceReport, *pairingtypes.QualityOfServiceReport) Strategy() provideroptimizer.Strategy UpdateWeights(map[string]int64, uint64) } diff --git a/protocol/lavasession/single_consumer_session.go b/protocol/lavasession/single_consumer_session.go index 07d2c32ebe..d8bb085a53 100644 --- a/protocol/lavasession/single_consumer_session.go +++ b/protocol/lavasession/single_consumer_session.go @@ -37,24 +37,24 @@ func (cs *SingleConsumerSession) CalculateExpectedLatency(timeoutGivenToRelay ti // cs should be locked here to use this method, returns the computed qos or zero if last qos is nil or failed to compute. func (cs *SingleConsumerSession) getQosComputedResultOrZero() sdk.Dec { - lastExcellenceQoSReport := cs.QoSManager.GetLastExcellenceQoSReportRaw() - if lastExcellenceQoSReport != nil { - qosComputed, errComputing := lastExcellenceQoSReport.ComputeQoSExcellence() + lastReputationReport := cs.QoSManager.GetLastReputationQoSReportRaw() + if lastReputationReport != nil { + computedReputation, errComputing := lastReputationReport.ComputeQoSExcellence() if errComputing == nil { // if we failed to compute the qos will be 0 so this provider wont be picked to return the error in case we get it - return qosComputed + return computedReputation } - utils.LavaFormatDebug("Failed computing QoS used for error parsing, could happen if we have no sync data or one of the fields is zero", utils.LogAttr("Report", cs.QoSManager.GetLastExcellenceQoSReportRaw()), utils.LogAttr("error", errComputing)) + utils.LavaFormatDebug("Failed computing QoS used for error parsing, could happen if we have no sync data or one of the fields is zero", utils.LogAttr("Report", cs.QoSManager.GetLastReputationQoSReportRaw()), utils.LogAttr("error", errComputing)) } return sdk.ZeroDec() } -func (scs *SingleConsumerSession) SetUsageForSession(cuNeededForSession uint64, qoSExcellenceReport *pairingtypes.QualityOfServiceReport, rawQoSExcellenceReport *pairingtypes.QualityOfServiceReport, usedProviders UsedProvidersInf, routerKey RouterKey) error { +func (scs *SingleConsumerSession) SetUsageForSession(cuNeededForSession uint64, reputationReport *pairingtypes.QualityOfServiceReport, rawReputationReport *pairingtypes.QualityOfServiceReport, usedProviders UsedProvidersInf, routerKey RouterKey) error { scs.LatestRelayCu = cuNeededForSession // set latestRelayCu scs.RelayNum += RelayNumberIncrement // increase relayNum if scs.RelayNum > 1 { - // we only set excellence for sessions with more than one successful relays, this guarantees data within the epoch exists - scs.QoSManager.SetLastExcellenceQoSReportRaw(qoSExcellenceReport) - scs.QoSManager.SetLastExcellenceQoSReportRaw(rawQoSExcellenceReport) + // we only set reputation for sessions with more than one successful relays, this guarantees data within the epoch exists + scs.QoSManager.SetLastReputationQoSReportRaw(reputationReport) + scs.QoSManager.SetLastReputationQoSReportRaw(rawReputationReport) } scs.usedProviders = usedProviders scs.routerKey = routerKey diff --git a/protocol/metrics/consumer_metrics_manager.go b/protocol/metrics/consumer_metrics_manager.go index bb17287a47..838491d0c3 100644 --- a/protocol/metrics/consumer_metrics_manager.go +++ b/protocol/metrics/consumer_metrics_manager.go @@ -53,7 +53,7 @@ type ConsumerMetricsManager struct { blockMetric *prometheus.GaugeVec latencyMetric *prometheus.GaugeVec qosMetric *MappedLabelsGaugeVec - qosExcellenceMetric *MappedLabelsGaugeVec + providerReputationMetric *MappedLabelsGaugeVec LatestBlockMetric *MappedLabelsGaugeVec LatestProviderRelay *prometheus.GaugeVec virtualEpochMetric *prometheus.GaugeVec @@ -163,14 +163,14 @@ func NewConsumerMetricsManager(options ConsumerMetricsManagerOptions) *ConsumerM Labels: qosMetricLabels, }) - qosExcellenceMetricLabels := []string{"spec", "provider_address", "qos_metric"} + providerReputationMetricLabels := []string{"spec", "provider_address", "qos_metric"} if ShowProviderEndpointInMetrics { - qosExcellenceMetricLabels = append(qosExcellenceMetricLabels, "provider_endpoint") + providerReputationMetricLabels = append(providerReputationMetricLabels, "provider_endpoint") } - qosExcellenceMetric := NewMappedLabelsGaugeVec(MappedLabelsMetricOpts{ - Name: "lava_consumer_qos_excellence_metrics", - Help: "The QOS metrics per provider excellence", - Labels: qosExcellenceMetricLabels, + providerReputationMetric := NewMappedLabelsGaugeVec(MappedLabelsMetricOpts{ + Name: "lava_consumer_provider_reputation_metrics", + Help: "The provider reputation metrics per provider", + Labels: providerReputationMetricLabels, }) latestBlockMetricLabels := []string{"spec", "provider_address", "apiInterface"} @@ -289,7 +289,7 @@ func NewConsumerMetricsManager(options ConsumerMetricsManagerOptions) *ConsumerM blockMetric: blockMetric, latencyMetric: latencyMetric, qosMetric: qosMetric, - qosExcellenceMetric: qosExcellenceMetric, + providerReputationMetric: providerReputationMetric, LatestBlockMetric: latestBlockMetric, LatestProviderRelay: latestProviderRelay, providerRelays: map[string]uint64{}, @@ -463,7 +463,7 @@ func (pme *ConsumerMetricsManager) getKeyForProcessingLatency(chainId string, ap return header + "_" + chainId + "_" + apiInterface } -func (pme *ConsumerMetricsManager) SetQOSMetrics(chainId string, apiInterface string, providerAddress string, providerEndpoint string, qos *pairingtypes.QualityOfServiceReport, qosExcellence *pairingtypes.QualityOfServiceReport, latestBlock int64, relays uint64, relayLatency time.Duration, sessionSuccessful bool) { +func (pme *ConsumerMetricsManager) SetQOSMetrics(chainId string, apiInterface string, providerAddress string, providerEndpoint string, qos *pairingtypes.QualityOfServiceReport, reputation *pairingtypes.QualityOfServiceReport, latestBlock int64, relays uint64, relayLatency time.Duration, sessionSuccessful bool) { if pme == nil { return } @@ -521,7 +521,7 @@ func (pme *ConsumerMetricsManager) SetQOSMetrics(chainId string, apiInterface st } } setMetricsForQos(qos, pme.qosMetric, apiInterface, providerEndpoint) - setMetricsForQos(qosExcellence, pme.qosExcellenceMetric, "", providerEndpoint) // it's one api interface for all of them + setMetricsForQos(reputation, pme.providerReputationMetric, "", providerEndpoint) // it's one api interface for all of them labels := map[string]string{"spec": chainId, "provider_address": providerAddress, "apiInterface": apiInterface, "provider_endpoint": providerEndpoint} pme.LatestBlockMetric.WithLabelValues(labels).Set(float64(latestBlock)) @@ -565,7 +565,7 @@ func (pme *ConsumerMetricsManager) ResetSessionRelatedMetrics() { pme.lock.Lock() defer pme.lock.Unlock() pme.qosMetric.Reset() - pme.qosExcellenceMetric.Reset() + pme.providerReputationMetric.Reset() pme.providerRelays = map[string]uint64{} } diff --git a/protocol/provideroptimizer/provider_optimizer.go b/protocol/provideroptimizer/provider_optimizer.go index 2e34963c34..a722efc35f 100644 --- a/protocol/provideroptimizer/provider_optimizer.go +++ b/protocol/provideroptimizer/provider_optimizer.go @@ -476,7 +476,7 @@ func (po *ProviderOptimizer) updateProbeEntrySync(providerData ProviderData, syn syncScoreStore, syncRawScoreStore := score.CalculateTimeDecayFunctionUpdate(oldScore, newScore, halfTime, RELAY_UPDATE_WEIGHT, sampleTime) providerData.Sync = syncScoreStore if !isHangingApi { - // use raw qos excellence reports updates for non-hanging API only + // use raw reputation reports updates for non-hanging API only providerData.SyncRaw = syncRawScoreStore } return providerData @@ -502,7 +502,7 @@ func (po *ProviderOptimizer) updateProbeEntryLatency(providerData ProviderData, latencyScoreStore, latencyRawScoreStore := score.CalculateTimeDecayFunctionUpdate(oldScore, newScore, halfTime, weight, sampleTime) providerData.Latency = latencyScoreStore if isHangingApi { - // use raw qos excellence reports updates for non-hanging API only + // use raw reputation reports updates for non-hanging API only providerData.LatencyRaw = latencyRawScoreStore } return providerData @@ -603,7 +603,7 @@ func pertrubWithNormalGaussian(orig, percentage float64) float64 { return orig + perturb } -func (po *ProviderOptimizer) GetExcellenceQoSReportForProvider(providerAddress string) (qosReport *pairingtypes.QualityOfServiceReport, rawQosReport *pairingtypes.QualityOfServiceReport) { +func (po *ProviderOptimizer) GetReputationReportForProvider(providerAddress string) (qosReport *pairingtypes.QualityOfServiceReport, rawQosReport *pairingtypes.QualityOfServiceReport) { providerData, found := po.getProviderData(providerAddress) if !found { return nil, nil @@ -630,7 +630,7 @@ func (po *ProviderOptimizer) GetExcellenceQoSReportForProvider(providerAddress s Sync: syncScoreRaw, } - utils.LavaFormatTrace("QoS Excellence for provider", + utils.LavaFormatTrace("Reputation for provider", utils.LogAttr("address", providerAddress), utils.LogAttr("Report", ret), utils.LogAttr("raw_report", rawQosReport), diff --git a/protocol/provideroptimizer/provider_optimizer_test.go b/protocol/provideroptimizer/provider_optimizer_test.go index 0e857ec16b..36c383c708 100644 --- a/protocol/provideroptimizer/provider_optimizer_test.go +++ b/protocol/provideroptimizer/provider_optimizer_test.go @@ -543,7 +543,7 @@ func TestProviderOptimizerStrategiesScoring(t *testing.T) { require.Equal(t, providersGen.providersAddresses[0], tier0[0].Address) } -func TestExcellence(t *testing.T) { +func TestReputation(t *testing.T) { floatVal := 0.25 dec := turnFloatToDec(floatVal, 8) floatNew, err := dec.Float64() @@ -563,10 +563,10 @@ func TestExcellence(t *testing.T) { } time.Sleep(4 * time.Millisecond) } - report, rawReport := providerOptimizer.GetExcellenceQoSReportForProvider(providersGen.providersAddresses[0]) + report, rawReport := providerOptimizer.GetReputationReportForProvider(providersGen.providersAddresses[0]) require.NotNil(t, report) require.NotNil(t, rawReport) - report2, rawReport2 := providerOptimizer.GetExcellenceQoSReportForProvider(providersGen.providersAddresses[1]) + report2, rawReport2 := providerOptimizer.GetReputationReportForProvider(providersGen.providersAddresses[1]) require.NotNil(t, report2) require.Equal(t, report, report2) require.NotNil(t, rawReport2) diff --git a/protocol/qos/qos_manager.go b/protocol/qos/qos_manager.go index 7d753edc77..d53793e17d 100644 --- a/protocol/qos/qos_manager.go +++ b/protocol/qos/qos_manager.go @@ -13,8 +13,8 @@ import ( type QoSManager struct { lastQoSReport *pairingtypes.QualityOfServiceReport - lastExcellenceQoSReport *pairingtypes.QualityOfServiceReport - lastExcellenceQoSReportRaw *pairingtypes.QualityOfServiceReport + lastReputationQoSReport *pairingtypes.QualityOfServiceReport + lastReputationQoSReportRaw *pairingtypes.QualityOfServiceReport latencyScoreList []sdk.Dec syncScoreSum int64 totalSyncScore int64 @@ -25,8 +25,8 @@ type QoSManager struct { func NewQoSManager() *QoSManager { return &QoSManager{ lastQoSReport: &pairingtypes.QualityOfServiceReport{}, - lastExcellenceQoSReport: &pairingtypes.QualityOfServiceReport{}, - lastExcellenceQoSReportRaw: &pairingtypes.QualityOfServiceReport{}, + lastReputationQoSReport: &pairingtypes.QualityOfServiceReport{}, + lastReputationQoSReportRaw: &pairingtypes.QualityOfServiceReport{}, latencyScoreList: []sdk.Dec{}, syncScoreSum: 0, totalSyncScore: 0, @@ -45,12 +45,12 @@ func (qosReport *QoSManager) GetLastQoSReport() *pairingtypes.QualityOfServiceRe return qosReport.lastQoSReport } -func (qosReport *QoSManager) GetLastExcellenceQoSReportRaw() *pairingtypes.QualityOfServiceReport { - return qosReport.lastExcellenceQoSReportRaw +func (qosReport *QoSManager) GetLastReputationQoSReportRaw() *pairingtypes.QualityOfServiceReport { + return qosReport.lastReputationQoSReportRaw } -func (qosReport *QoSManager) SetLastExcellenceQoSReportRaw(report *pairingtypes.QualityOfServiceReport) { - qosReport.lastExcellenceQoSReportRaw = report +func (qosReport *QoSManager) SetLastReputationQoSReportRaw(report *pairingtypes.QualityOfServiceReport) { + qosReport.lastReputationQoSReportRaw = report } func (qosReport *QoSManager) IncTotalRelays() { diff --git a/protocol/rpcconsumer/relay_errors.go b/protocol/rpcconsumer/relay_errors.go index 2f8edbdfbd..cb13d609c2 100644 --- a/protocol/rpcconsumer/relay_errors.go +++ b/protocol/rpcconsumer/relay_errors.go @@ -55,10 +55,10 @@ func (r *RelayErrors) GetBestErrorMessageForUser() RelayError { for idx, relayError := range r.relayErrors { errorMessage := r.sanitizeError(relayError.err) errorMap[errorMessage] = append(errorMap[errorMessage], idx) - if relayError.ProviderInfo.ProviderQoSExcellenceSummery.IsNil() || relayError.ProviderInfo.ProviderStake.Amount.IsNil() { + if relayError.ProviderInfo.ProviderReputationSummery.IsNil() || relayError.ProviderInfo.ProviderStake.Amount.IsNil() { continue } - currentResult := relayError.ProviderInfo.ProviderQoSExcellenceSummery.MulInt(relayError.ProviderInfo.ProviderStake.Amount) + currentResult := relayError.ProviderInfo.ProviderReputationSummery.MulInt(relayError.ProviderInfo.ProviderStake.Amount) if currentResult.GTE(bestResult) { // 0 or 1 here are valid replacements, so even 0 scores will return the error value bestResult.Set(currentResult) bestIndex = idx diff --git a/protocol/rpcconsumer/relay_errors_test.go b/protocol/rpcconsumer/relay_errors_test.go index 5e48db5975..b2049dfa6f 100644 --- a/protocol/rpcconsumer/relay_errors_test.go +++ b/protocol/rpcconsumer/relay_errors_test.go @@ -23,36 +23,36 @@ func TestRelayError(t *testing.T) { { err: fmt.Errorf("test1"), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.OneDec(), - ProviderStake: sdk.NewInt64Coin("ulava", 10), + ProviderReputationSummery: sdk.OneDec(), + ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, { err: fmt.Errorf("test2"), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.OneDec(), - ProviderStake: sdk.NewInt64Coin("ulava", 20), + ProviderReputationSummery: sdk.OneDec(), + ProviderStake: sdk.NewInt64Coin("ulava", 20), }, }, { err: fmt.Errorf("test3"), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.OneDec(), - ProviderStake: sdk.NewInt64Coin("ulava", 30), + ProviderReputationSummery: sdk.OneDec(), + ProviderStake: sdk.NewInt64Coin("ulava", 30), }, }, { err: fmt.Errorf("test4"), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.OneDec(), - ProviderStake: sdk.NewInt64Coin("ulava", 40), + ProviderReputationSummery: sdk.OneDec(), + ProviderStake: sdk.NewInt64Coin("ulava", 40), }, }, { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.OneDec(), - ProviderStake: sdk.NewInt64Coin("ulava", 50), + ProviderReputationSummery: sdk.OneDec(), + ProviderStake: sdk.NewInt64Coin("ulava", 50), }, }, }, @@ -66,50 +66,50 @@ func TestRelayError(t *testing.T) { { err: fmt.Errorf("test1"), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.MustNewDecFromStr("0.5"), - ProviderStake: sdk.NewInt64Coin("ulava", 10), + ProviderReputationSummery: sdk.MustNewDecFromStr("0.5"), + ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, { err: fmt.Errorf("test1"), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.MustNewDecFromStr("0.25"), - ProviderStake: sdk.NewInt64Coin("ulava", 10), + ProviderReputationSummery: sdk.MustNewDecFromStr("0.25"), + ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, { err: fmt.Errorf("test3"), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.MustNewDecFromStr("0.6"), - ProviderStake: sdk.NewInt64Coin("ulava", 10), + ProviderReputationSummery: sdk.MustNewDecFromStr("0.6"), + ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, { err: fmt.Errorf("test3"), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.MustNewDecFromStr("0.7"), - ProviderStake: sdk.NewInt64Coin("ulava", 10), + ProviderReputationSummery: sdk.MustNewDecFromStr("0.7"), + ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, { err: fmt.Errorf("test4"), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.MustNewDecFromStr("0.7"), - ProviderStake: sdk.NewInt64Coin("ulava", 10), + ProviderReputationSummery: sdk.MustNewDecFromStr("0.7"), + ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, { err: fmt.Errorf("test4"), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.MustNewDecFromStr("0.7"), - ProviderStake: sdk.NewInt64Coin("ulava", 10), + ProviderReputationSummery: sdk.MustNewDecFromStr("0.7"), + ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.MustNewDecFromStr("0.8"), - ProviderStake: sdk.NewInt64Coin("ulava", 10), + ProviderReputationSummery: sdk.MustNewDecFromStr("0.8"), + ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, }, @@ -123,36 +123,36 @@ func TestRelayError(t *testing.T) { { err: fmt.Errorf("test1"), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.OneDec(), - ProviderStake: sdk.NewInt64Coin("ulava", 1000), + ProviderReputationSummery: sdk.OneDec(), + ProviderStake: sdk.NewInt64Coin("ulava", 1000), }, }, { err: fmt.Errorf("test2"), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.OneDec(), - ProviderStake: sdk.NewInt64Coin("ulava", 1000), + ProviderReputationSummery: sdk.OneDec(), + ProviderStake: sdk.NewInt64Coin("ulava", 1000), }, }, { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.ZeroDec(), - ProviderStake: sdk.NewInt64Coin("ulava", 0), + ProviderReputationSummery: sdk.ZeroDec(), + ProviderStake: sdk.NewInt64Coin("ulava", 0), }, }, { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.ZeroDec(), - ProviderStake: sdk.NewInt64Coin("ulava", 0), + ProviderReputationSummery: sdk.ZeroDec(), + ProviderStake: sdk.NewInt64Coin("ulava", 0), }, }, { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.ZeroDec(), - ProviderStake: sdk.NewInt64Coin("ulava", 0), + ProviderReputationSummery: sdk.ZeroDec(), + ProviderStake: sdk.NewInt64Coin("ulava", 0), }, }, }, @@ -166,36 +166,36 @@ func TestRelayError(t *testing.T) { { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.OneDec(), - ProviderStake: sdk.NewInt64Coin("ulava", 10), + ProviderReputationSummery: sdk.OneDec(), + ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.OneDec(), - ProviderStake: sdk.NewInt64Coin("ulava", 20), + ProviderReputationSummery: sdk.OneDec(), + ProviderStake: sdk.NewInt64Coin("ulava", 20), }, }, { err: fmt.Errorf("test3"), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.OneDec(), - ProviderStake: sdk.NewInt64Coin("ulava", 30), + ProviderReputationSummery: sdk.OneDec(), + ProviderStake: sdk.NewInt64Coin("ulava", 30), }, }, { err: fmt.Errorf("test4"), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.OneDec(), - ProviderStake: sdk.NewInt64Coin("ulava", 40), + ProviderReputationSummery: sdk.OneDec(), + ProviderStake: sdk.NewInt64Coin("ulava", 40), }, }, { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.OneDec(), - ProviderStake: sdk.NewInt64Coin("ulava", 10), + ProviderReputationSummery: sdk.OneDec(), + ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, }, @@ -209,36 +209,36 @@ func TestRelayError(t *testing.T) { { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.OneDec(), - ProviderStake: sdk.NewInt64Coin("ulava", 10), + ProviderReputationSummery: sdk.OneDec(), + ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.OneDec(), - ProviderStake: sdk.NewInt64Coin("ulava", 20), + ProviderReputationSummery: sdk.OneDec(), + ProviderStake: sdk.NewInt64Coin("ulava", 20), }, }, { err: fmt.Errorf("test3"), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.OneDec(), - ProviderStake: sdk.NewInt64Coin("ulava", 30), + ProviderReputationSummery: sdk.OneDec(), + ProviderStake: sdk.NewInt64Coin("ulava", 30), }, }, { err: fmt.Errorf("test4"), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.OneDec(), - ProviderStake: sdk.NewInt64Coin("ulava", 40), + ProviderReputationSummery: sdk.OneDec(), + ProviderStake: sdk.NewInt64Coin("ulava", 40), }, }, { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderQoSExcellenceSummery: sdk.OneDec(), - ProviderStake: sdk.NewInt64Coin("ulava", 10), + ProviderReputationSummery: sdk.OneDec(), + ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, }, diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index d68520a8a8..72bbeda50f 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -314,10 +314,10 @@ func (rp *RelayProcessor) responsesQuorum(results []common.RelayResult, quorumSi if result.Reply != nil && result.Reply.Data != nil { countMap[string(result.Reply.Data)]++ if !deterministic { - if result.ProviderInfo.ProviderQoSExcellenceSummery.IsNil() || result.ProviderInfo.ProviderStake.Amount.IsNil() { + if result.ProviderInfo.ProviderReputationSummery.IsNil() || result.ProviderInfo.ProviderStake.Amount.IsNil() { continue } - currentResult := result.ProviderInfo.ProviderQoSExcellenceSummery.MulInt(result.ProviderInfo.ProviderStake.Amount) + currentResult := result.ProviderInfo.ProviderReputationSummery.MulInt(result.ProviderInfo.ProviderStake.Amount) if currentResult.GTE(bestQos) { bestQos.Set(currentResult) bestQosResult = result diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 296fcbe0bf..7714c33848 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -735,7 +735,7 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( go func(providerPublicAddress string, sessionInfo *lavasession.SessionInfo) { // add ticker launch metrics localRelayResult := &common.RelayResult{ - ProviderInfo: common.ProviderInfo{ProviderAddress: providerPublicAddress, ProviderStake: sessionInfo.StakeSize, ProviderQoSExcellenceSummery: sessionInfo.QoSSummeryResult}, + ProviderInfo: common.ProviderInfo{ProviderAddress: providerPublicAddress, ProviderStake: sessionInfo.StakeSize, ProviderReputationSummery: sessionInfo.QoSSummeryResult}, Finalized: false, // setting the single consumer session as the conflict handler. // to be able to validate if we need to report this provider or not. @@ -1053,7 +1053,7 @@ func (rpccs *RPCConsumerServer) relayInner(ctx context.Context, singleConsumerSe utils.LogAttr("provider", relayRequest.RelaySession.Provider), utils.LogAttr("cuSum", relayRequest.RelaySession.CuSum), utils.LogAttr("QosReport", relayRequest.RelaySession.QosReport), - utils.LogAttr("QosReportExcellence", relayRequest.RelaySession.QosExcellenceReport), + utils.LogAttr("ReputationReport", relayRequest.RelaySession.QosExcellenceReport), utils.LogAttr("relayNum", relayRequest.RelaySession.RelayNum), utils.LogAttr("sessionId", relayRequest.RelaySession.SessionId), utils.LogAttr("latency", relayLatency), From 6080e550d815a366d1681e75af093c12d2c04a27 Mon Sep 17 00:00:00 2001 From: Elad Gildnur Date: Sun, 12 Jan 2025 15:09:14 +0200 Subject: [PATCH 03/24] QoS Managet mutations queue --- .../finalization_consensus_test.go | 80 +++---- protocol/lavaprotocol/request_builder.go | 4 +- .../lavasession/consumer_session_manager.go | 8 +- protocol/lavasession/consumer_types.go | 1 + protocol/lavasession/consumer_types_test.go | 33 --- .../lavasession/single_consumer_session.go | 12 +- protocol/qos/common.go | 6 +- protocol/qos/qos_manager.go | 208 +++++++++++------- protocol/qos/qos_mutators.go | 118 ++++++++++ protocol/rpcconsumer/rpcconsumer_server.go | 2 +- 10 files changed, 300 insertions(+), 172 deletions(-) delete mode 100644 protocol/lavasession/consumer_types_test.go create mode 100644 protocol/qos/qos_mutators.go diff --git a/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go b/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go index 3b87a9485f..ef75111e6b 100644 --- a/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go +++ b/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go @@ -282,54 +282,54 @@ func TestQoS(t *testing.T) { currentLatency := time.Millisecond expectedLatency := time.Millisecond latestServicedBlock := expectedBH - singleConsumerSession.QoSManager.CalculateQoS(currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1) - require.Equal(t, uint64(1), singleConsumerSession.QoSManager.GetAnsweredRelays()) - require.Equal(t, uint64(1), singleConsumerSession.QoSManager.GetTotalRelays()) - require.Equal(t, int64(1), singleConsumerSession.QoSManager.GetSyncScoreSum()) - require.Equal(t, int64(1), singleConsumerSession.QoSManager.GetTotalSyncScore()) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Availability) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Sync) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Latency) + singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1) + require.Equal(t, uint64(1), singleConsumerSession.QoSManager.GetAnsweredRelays(epoch, singleConsumerSession.SessionId)) + require.Equal(t, uint64(1), singleConsumerSession.QoSManager.GetTotalRelays(epoch, singleConsumerSession.SessionId)) + require.Equal(t, int64(1), singleConsumerSession.QoSManager.GetSyncScoreSum(epoch, singleConsumerSession.SessionId)) + require.Equal(t, int64(1), singleConsumerSession.QoSManager.GetTotalSyncScore(epoch, singleConsumerSession.SessionId)) + require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Availability) + require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Sync) + require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Latency) latestServicedBlock = expectedBH + 1 - singleConsumerSession.QoSManager.CalculateQoS(currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1) - require.Equal(t, uint64(2), singleConsumerSession.QoSManager.GetAnsweredRelays()) - require.Equal(t, uint64(2), singleConsumerSession.QoSManager.GetTotalRelays()) - require.Equal(t, int64(2), singleConsumerSession.QoSManager.GetSyncScoreSum()) - require.Equal(t, int64(2), singleConsumerSession.QoSManager.GetTotalSyncScore()) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Availability) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Sync) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Latency) - - singleConsumerSession.QoSManager.IncTotalRelays() // this is how we add a failure - singleConsumerSession.QoSManager.CalculateQoS(currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1) - require.Equal(t, uint64(3), singleConsumerSession.QoSManager.GetAnsweredRelays()) - require.Equal(t, uint64(4), singleConsumerSession.QoSManager.GetTotalRelays()) - require.Equal(t, int64(3), singleConsumerSession.QoSManager.GetSyncScoreSum()) - require.Equal(t, int64(3), singleConsumerSession.QoSManager.GetTotalSyncScore()) - - require.Equal(t, sdk.ZeroDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Availability) // because availability below 95% is 0 - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Sync) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Latency) + singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1) + require.Equal(t, uint64(2), singleConsumerSession.QoSManager.GetAnsweredRelays(epoch, singleConsumerSession.SessionId)) + require.Equal(t, uint64(2), singleConsumerSession.QoSManager.GetTotalRelays(epoch, singleConsumerSession.SessionId)) + require.Equal(t, int64(2), singleConsumerSession.QoSManager.GetSyncScoreSum(epoch, singleConsumerSession.SessionId)) + require.Equal(t, int64(2), singleConsumerSession.QoSManager.GetTotalSyncScore(epoch, singleConsumerSession.SessionId)) + require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Availability) + require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Sync) + require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Latency) + + singleConsumerSession.QoSManager.AddFailedRelay(epoch, singleConsumerSession.SessionId) // this is how we add a failure + singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1) + require.Equal(t, uint64(3), singleConsumerSession.QoSManager.GetAnsweredRelays(epoch, singleConsumerSession.SessionId)) + require.Equal(t, uint64(4), singleConsumerSession.QoSManager.GetTotalRelays(epoch, singleConsumerSession.SessionId)) + require.Equal(t, int64(3), singleConsumerSession.QoSManager.GetSyncScoreSum(epoch, singleConsumerSession.SessionId)) + require.Equal(t, int64(3), singleConsumerSession.QoSManager.GetTotalSyncScore(epoch, singleConsumerSession.SessionId)) + + require.Equal(t, sdk.ZeroDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Availability) // because availability below 95% is 0 + require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Sync) + require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Latency) latestServicedBlock = expectedBH - 1 // is one block below threshold - singleConsumerSession.QoSManager.CalculateQoS(currentLatency, expectedLatency*2, expectedBH-latestServicedBlock, numOfProviders, 1) - require.Equal(t, uint64(4), singleConsumerSession.QoSManager.GetAnsweredRelays()) - require.Equal(t, uint64(5), singleConsumerSession.QoSManager.GetTotalRelays()) - require.Equal(t, int64(3), singleConsumerSession.QoSManager.GetSyncScoreSum()) - require.Equal(t, int64(4), singleConsumerSession.QoSManager.GetTotalSyncScore()) - - require.Equal(t, sdk.ZeroDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Availability) // because availability below 95% is 0 - require.Equal(t, sdk.MustNewDecFromStr("0.75"), singleConsumerSession.QoSManager.GetLastQoSReport().Sync) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Latency) + singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency*2, expectedBH-latestServicedBlock, numOfProviders, 1) + require.Equal(t, uint64(4), singleConsumerSession.QoSManager.GetAnsweredRelays(epoch, singleConsumerSession.SessionId)) + require.Equal(t, uint64(5), singleConsumerSession.QoSManager.GetTotalRelays(epoch, singleConsumerSession.SessionId)) + require.Equal(t, int64(3), singleConsumerSession.QoSManager.GetSyncScoreSum(epoch, singleConsumerSession.SessionId)) + require.Equal(t, int64(4), singleConsumerSession.QoSManager.GetTotalSyncScore(epoch, singleConsumerSession.SessionId)) + + require.Equal(t, sdk.ZeroDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Availability) // because availability below 95% is 0 + require.Equal(t, sdk.MustNewDecFromStr("0.75"), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Sync) + require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Latency) latestServicedBlock = expectedBH + 1 // add in a loop so availability goes above 95% for i := 5; i < 100; i++ { - singleConsumerSession.QoSManager.CalculateQoS(currentLatency, expectedLatency*2, expectedBH-latestServicedBlock, numOfProviders, 1) + singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency*2, expectedBH-latestServicedBlock, numOfProviders, 1) } - require.Equal(t, sdk.MustNewDecFromStr("0.8"), singleConsumerSession.QoSManager.GetLastQoSReport().Availability) // because availability below 95% is 0 - require.Equal(t, sdk.MustNewDecFromStr("0.989898989898989898"), singleConsumerSession.QoSManager.GetLastQoSReport().Sync) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport().Latency) + require.Equal(t, sdk.MustNewDecFromStr("0.8"), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Availability) // because availability below 95% is 0 + require.Equal(t, sdk.MustNewDecFromStr("0.989898989898989898"), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Sync) + require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Latency) finalizationInsertionsSpreadBlocks := []finalizationTestInsertion{ finalizationInsertionForProviders(chainID, epoch, 200, 0, 1, true, "", blocksInFinalizationProof, blockDistanceForFinalizedData)[0], diff --git a/protocol/lavaprotocol/request_builder.go b/protocol/lavaprotocol/request_builder.go index c02cdf5964..322e9c5591 100644 --- a/protocol/lavaprotocol/request_builder.go +++ b/protocol/lavaprotocol/request_builder.go @@ -71,8 +71,8 @@ func ConstructRelaySession(lavaChainID string, relayRequestData *pairingtypes.Re return nil } - copiedQOS := copyQoSServiceReport(singleConsumerSession.QoSManager.GetLastQoSReport()) - copiedReputation := copyQoSServiceReport(singleConsumerSession.QoSManager.GetLastReputationQoSReportRaw()) // copy raw report for the node + copiedQOS := copyQoSServiceReport(singleConsumerSession.QoSManager.GetLastQoSReport(uint64(epoch), singleConsumerSession.SessionId)) + copiedReputation := copyQoSServiceReport(singleConsumerSession.QoSManager.GetLastReputationQoSReportRaw(uint64(epoch), singleConsumerSession.SessionId)) // copy raw report for the node return &pairingtypes.RelaySession{ SpecId: chainID, diff --git a/protocol/lavasession/consumer_session_manager.go b/protocol/lavasession/consumer_session_manager.go index b7545c5897..82e7f4938f 100644 --- a/protocol/lavasession/consumer_session_manager.go +++ b/protocol/lavasession/consumer_session_manager.go @@ -935,7 +935,7 @@ func (csm *ConsumerSessionManager) OnSessionFailure(consumerSession *SingleConsu consumerSession.BlockListed = true } - consumerSession.QoSManager.IncTotalRelays() + consumerSession.QoSManager.AddFailedRelay(consumerSession.epoch, consumerSession.SessionId) consumerSession.ConsecutiveErrors = append(consumerSession.ConsecutiveErrors, errorReceived) // copy consecutive errors for report. errorsForConsumerSession := consumerSession.ConsecutiveErrors @@ -1046,7 +1046,7 @@ func (csm *ConsumerSessionManager) OnSessionDone( consumerSession.ConsecutiveErrors = []error{} consumerSession.LatestBlock = latestServicedBlock // update latest serviced block // calculate QoS - consumerSession.QoSManager.CalculateQoS(currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, int64(providersCount)) + consumerSession.QoSManager.CalculateQoS(csm.atomicReadCurrentEpoch(), consumerSession.SessionId, consumerSession.Parent.PublicLavaAddress, currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, int64(providersCount)) go csm.providerOptimizer.AppendRelayData(consumerSession.Parent.PublicLavaAddress, currentLatency, isHangingApi, specComputeUnits, uint64(latestServicedBlock)) csm.updateMetricsManager(consumerSession, currentLatency, !isHangingApi) // apply latency only for non hanging apis return nil @@ -1063,14 +1063,14 @@ func (csm *ConsumerSessionManager) updateMetricsManager(consumerSession *SingleC chainId := info.ChainID var lastQos *pairingtypes.QualityOfServiceReport - lastQoSReport := consumerSession.QoSManager.GetLastQoSReport() + lastQoSReport := consumerSession.QoSManager.GetLastQoSReport(csm.atomicReadCurrentEpoch(), consumerSession.SessionId) if lastQoSReport != nil { qos := *lastQoSReport lastQos = &qos } var lastReputation *pairingtypes.QualityOfServiceReport - lastReputationReport := consumerSession.QoSManager.GetLastReputationQoSReportRaw() + lastReputationReport := consumerSession.QoSManager.GetLastReputationQoSReportRaw(csm.atomicReadCurrentEpoch(), consumerSession.SessionId) if lastReputationReport != nil { qosRep := *lastReputationReport lastReputation = &qosRep diff --git a/protocol/lavasession/consumer_types.go b/protocol/lavasession/consumer_types.go index 0191f3a092..a19c5a8795 100644 --- a/protocol/lavasession/consumer_types.go +++ b/protocol/lavasession/consumer_types.go @@ -429,6 +429,7 @@ func (cswp *ConsumerSessionsWithProvider) GetConsumerSessionInstanceFromEndpoint EndpointConnection: endpointConnection, StaticProvider: cswp.StaticProvider, routerKey: NewRouterKey(nil), + epoch: cswp.PairingEpoch, } consumerSession.TryUseSession() // we must lock the session so other requests wont get it. diff --git a/protocol/lavasession/consumer_types_test.go b/protocol/lavasession/consumer_types_test.go deleted file mode 100644 index 38838592a3..0000000000 --- a/protocol/lavasession/consumer_types_test.go +++ /dev/null @@ -1,33 +0,0 @@ -package lavasession - -import ( - "testing" - - sdk "github.com/cosmos/cosmos-sdk/types" - "github.com/lavanet/lava/v4/protocol/qos" - "github.com/stretchr/testify/require" -) - -func TestCalculateAvailabilityScore(t *testing.T) { - avialabilityAsFloat, err := AvailabilityPercentage.Float64() - require.NoError(t, err) - precision := uint64(10000) - qosManager := qos.NewQoSManager() - qosManager.SetTotalRelays(precision) - qosManager.SetAnsweredRelays(precision - uint64(avialabilityAsFloat*float64(precision))) - downTime, availabilityScore := qosManager.CalculateAvailabilityScore() - downTimeFloat, err := downTime.Float64() - require.NoError(t, err) - require.Equal(t, downTimeFloat, avialabilityAsFloat) - require.Zero(t, availabilityScore.BigInt().Uint64()) - - qosManager.SetTotalRelays(2 * precision) - qosManager.SetAnsweredRelays(2*precision - uint64(avialabilityAsFloat*float64(precision))) - downTime, availabilityScore = qosManager.CalculateAvailabilityScore() - downTimeFloat, err = downTime.Float64() - require.NoError(t, err) - halfDec, err := sdk.NewDecFromStr("0.5") - require.NoError(t, err) - require.Equal(t, downTimeFloat*2, avialabilityAsFloat) - require.Equal(t, halfDec, availabilityScore) -} diff --git a/protocol/lavasession/single_consumer_session.go b/protocol/lavasession/single_consumer_session.go index d8bb085a53..1cf4650587 100644 --- a/protocol/lavasession/single_consumer_session.go +++ b/protocol/lavasession/single_consumer_session.go @@ -27,6 +27,7 @@ type SingleConsumerSession struct { providerUniqueId string StaticProvider bool routerKey RouterKey + epoch uint64 } // returns the expected latency to a threshold. @@ -37,13 +38,16 @@ func (cs *SingleConsumerSession) CalculateExpectedLatency(timeoutGivenToRelay ti // cs should be locked here to use this method, returns the computed qos or zero if last qos is nil or failed to compute. func (cs *SingleConsumerSession) getQosComputedResultOrZero() sdk.Dec { - lastReputationReport := cs.QoSManager.GetLastReputationQoSReportRaw() + lastReputationReport := cs.QoSManager.GetLastReputationQoSReportRaw(cs.epoch, cs.SessionId) if lastReputationReport != nil { computedReputation, errComputing := lastReputationReport.ComputeQoSExcellence() if errComputing == nil { // if we failed to compute the qos will be 0 so this provider wont be picked to return the error in case we get it return computedReputation } - utils.LavaFormatDebug("Failed computing QoS used for error parsing, could happen if we have no sync data or one of the fields is zero", utils.LogAttr("Report", cs.QoSManager.GetLastReputationQoSReportRaw()), utils.LogAttr("error", errComputing)) + utils.LavaFormatDebug("Failed computing QoS used for error parsing, could happen if we have no sync data or one of the fields is zero", + utils.LogAttr("Report", cs.QoSManager.GetLastReputationQoSReportRaw(cs.epoch, cs.SessionId)), + utils.LogAttr("error", errComputing), + ) } return sdk.ZeroDec() } @@ -53,8 +57,8 @@ func (scs *SingleConsumerSession) SetUsageForSession(cuNeededForSession uint64, scs.RelayNum += RelayNumberIncrement // increase relayNum if scs.RelayNum > 1 { // we only set reputation for sessions with more than one successful relays, this guarantees data within the epoch exists - scs.QoSManager.SetLastReputationQoSReportRaw(reputationReport) - scs.QoSManager.SetLastReputationQoSReportRaw(rawReputationReport) + scs.QoSManager.SetLastReputationQoSReportRaw(scs.epoch, scs.SessionId, reputationReport) + scs.QoSManager.SetLastReputationQoSReportRaw(scs.epoch, scs.SessionId, rawReputationReport) } scs.usedProviders = usedProviders scs.routerKey = routerKey diff --git a/protocol/qos/common.go b/protocol/qos/common.go index f4c18c1a64..59005acb99 100644 --- a/protocol/qos/common.go +++ b/protocol/qos/common.go @@ -8,8 +8,6 @@ const ( MinProvidersForSync = 0.6 ) -type DegradeAvailabilityReputation interface { -} +type DegradeAvailabilityReputation interface{} -type SendQoSUpdate interface { -} +type SendQoSUpdate interface{} diff --git a/protocol/qos/qos_manager.go b/protocol/qos/qos_manager.go index d53793e17d..2bc692080e 100644 --- a/protocol/qos/qos_manager.go +++ b/protocol/qos/qos_manager.go @@ -1,17 +1,14 @@ package qos import ( - "math" - "sort" - "strconv" + "sync" "time" sdk "github.com/cosmos/cosmos-sdk/types" - "github.com/lavanet/lava/v4/utils" pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" ) -type QoSManager struct { +type QoSReport struct { lastQoSReport *pairingtypes.QualityOfServiceReport lastReputationQoSReport *pairingtypes.QualityOfServiceReport lastReputationQoSReportRaw *pairingtypes.QualityOfServiceReport @@ -20,118 +17,161 @@ type QoSManager struct { totalSyncScore int64 totalRelays uint64 answeredRelays uint64 + lock sync.RWMutex } -func NewQoSManager() *QoSManager { - return &QoSManager{ - lastQoSReport: &pairingtypes.QualityOfServiceReport{}, - lastReputationQoSReport: &pairingtypes.QualityOfServiceReport{}, - lastReputationQoSReportRaw: &pairingtypes.QualityOfServiceReport{}, - latencyScoreList: []sdk.Dec{}, - syncScoreSum: 0, - totalSyncScore: 0, - totalRelays: 0, - answeredRelays: 0, - } +type QoSManager struct { + qosReports map[uint64]map[int64]*QoSReport // first key is the epoch, second key is the session id + mutatorsQueue chan Mutator + qosReportsLock sync.RWMutex } -func (qosReport *QoSManager) CalculateAvailabilityScore() (downtimePercentageRet, scaledAvailabilityScoreRet sdk.Dec) { - downtimePercentage := sdk.NewDecWithPrec(int64(qosReport.totalRelays-qosReport.answeredRelays), 0).Quo(sdk.NewDecWithPrec(int64(qosReport.totalRelays), 0)) - scaledAvailabilityScore := sdk.MaxDec(sdk.ZeroDec(), AvailabilityPercentage.Sub(downtimePercentage).Quo(AvailabilityPercentage)) - return downtimePercentage, scaledAvailabilityScore +func NewQoSManager() *QoSManager { + qosManager := &QoSManager{} + qosManager.qosReports = make(map[uint64]map[int64]*QoSReport) + qosManager.mutatorsQueue = make(chan Mutator, 1000) + go qosManager.processMutations() + return qosManager } -func (qosReport *QoSManager) GetLastQoSReport() *pairingtypes.QualityOfServiceReport { - return qosReport.lastQoSReport +func (qosManager *QoSManager) processMutations() { + for mutator := range qosManager.mutatorsQueue { + epoch, sessionId := mutator.GetEpochAndSessionId() + qosReport := qosManager.fetchOrSetSessionFromMap(epoch, sessionId) + mutator.Mutate(qosReport) + } } -func (qosReport *QoSManager) GetLastReputationQoSReportRaw() *pairingtypes.QualityOfServiceReport { - return qosReport.lastReputationQoSReportRaw +func (qosManager *QoSManager) fetchOrSetSessionFromMap(epoch uint64, sessionId int64) *QoSReport { + qosManager.qosReportsLock.Lock() + defer qosManager.qosReportsLock.Unlock() + if qosManager.qosReports[epoch] == nil { + qosManager.qosReports[epoch] = make(map[int64]*QoSReport) + } + if qosManager.qosReports[epoch][sessionId] == nil { + qosManager.qosReports[epoch][sessionId] = &QoSReport{ + lastQoSReport: &pairingtypes.QualityOfServiceReport{}, + lastReputationQoSReport: &pairingtypes.QualityOfServiceReport{}, + lastReputationQoSReportRaw: &pairingtypes.QualityOfServiceReport{}, + latencyScoreList: []sdk.Dec{}, + syncScoreSum: 0, + totalSyncScore: 0, + totalRelays: 0, + answeredRelays: 0, + } + } + return qosManager.qosReports[epoch][sessionId] } -func (qosReport *QoSManager) SetLastReputationQoSReportRaw(report *pairingtypes.QualityOfServiceReport) { - qosReport.lastReputationQoSReportRaw = report +func (qosManager *QoSManager) CalculateQoS(epoch uint64, sessionId int64, providerAddress string, latency, expectedLatency time.Duration, blockHeightDiff int64, numOfProviders int, servicersToCount int64) { + go func() { + qosManager.mutatorsQueue <- &QoSUpdateTaskRelaySuccess{ + QoSUpdateTaskBase: QoSUpdateTaskBase{ + epoch: epoch, + sessionId: sessionId, + }, + providerAddress: providerAddress, + latency: latency, + expectedLatency: expectedLatency, + blockHeightDiff: blockHeightDiff, + numOfProviders: numOfProviders, + servicersToCount: servicersToCount, + } + }() } -func (qosReport *QoSManager) IncTotalRelays() { - qosReport.totalRelays++ +func (qosManager *QoSManager) AddFailedRelay(epoch uint64, sessionId int64) { + go func() { + qosManager.mutatorsQueue <- &QoSUpdateTaskRelayFailure{ + QoSUpdateTaskBase: QoSUpdateTaskBase{ + epoch: epoch, + sessionId: sessionId, + }, + } + }() } -func (cs *QoSManager) CalculateQoS(latency, expectedLatency time.Duration, blockHeightDiff int64, numOfProviders int, servicersToCount int64) { - // Add current Session QoS - cs.totalRelays++ // increase total relays - cs.answeredRelays++ // increase answered relays +func (qosManager *QoSManager) SetLastReputationQoSReportRaw(epoch uint64, sessionId int64, report *pairingtypes.QualityOfServiceReport) { + qosManager.mutatorsQueue <- &QoSUpdateTaskSetReputationRaw{ + QoSUpdateTaskBase: QoSUpdateTaskBase{ + epoch: epoch, + sessionId: sessionId, + }, + report: report, + } +} - if cs.lastQoSReport == nil { - cs.lastQoSReport = &pairingtypes.QualityOfServiceReport{} +func (qosManager *QoSManager) getQoSReport(epoch uint64, sessionId int64) *QoSReport { + qosManager.qosReportsLock.RLock() + defer qosManager.qosReportsLock.RUnlock() + if qosManager.qosReports[epoch] == nil || qosManager.qosReports[epoch][sessionId] == nil { + return nil } + return qosManager.qosReports[epoch][sessionId] +} - downtimePercentage, scaledAvailabilityScore := cs.CalculateAvailabilityScore() - cs.lastQoSReport.Availability = scaledAvailabilityScore - if sdk.OneDec().GT(cs.lastQoSReport.Availability) { - utils.LavaFormatDebug("QoS Availability report", utils.Attribute{Key: "Availability", Value: cs.lastQoSReport.Availability}, utils.Attribute{Key: "down percent", Value: downtimePercentage}) +func (qosManager *QoSManager) GetLastQoSReport(epoch uint64, sessionId int64) *pairingtypes.QualityOfServiceReport { + qosReport := qosManager.getQoSReport(epoch, sessionId) + if qosReport == nil { + return nil } - latencyScore := sdk.MinDec(sdk.OneDec(), sdk.NewDecFromInt(sdk.NewInt(int64(expectedLatency))).Quo(sdk.NewDecFromInt(sdk.NewInt(int64(latency))))) + qosReport.lock.RLock() + defer qosReport.lock.RUnlock() + return qosReport.lastQoSReport +} - insertSorted := func(list []sdk.Dec, value sdk.Dec) []sdk.Dec { - index := sort.Search(len(list), func(i int) bool { - return list[i].GTE(value) - }) - if len(list) == index { // nil or empty slice or after last element - return append(list, value) - } - list = append(list[:index+1], list[index:]...) // index < len(a) - list[index] = value - return list - } - cs.latencyScoreList = insertSorted(cs.latencyScoreList, latencyScore) - cs.lastQoSReport.Latency = cs.latencyScoreList[int(float64(len(cs.latencyScoreList))*PercentileToCalculateLatency)] - - // checking if we have enough information to calculate the sync score for the providers, if we haven't talked - // with enough providers we don't have enough information and we will wait to have more information before setting the sync score - shouldCalculateSyncScore := int64(numOfProviders) > int64(math.Ceil(float64(servicersToCount)*MinProvidersForSync)) - if shouldCalculateSyncScore { // - if blockHeightDiff <= 0 { // if the diff is bigger than 0 than the block is too old (blockHeightDiff = expected - allowedLag - blockHeight) and we don't give him the score - cs.syncScoreSum++ - } - cs.totalSyncScore++ - cs.lastQoSReport.Sync = sdk.NewDec(cs.syncScoreSum).QuoInt64(cs.totalSyncScore) - if sdk.OneDec().GT(cs.lastQoSReport.Sync) { - utils.LavaFormatDebug("QoS Sync report", - utils.Attribute{Key: "Sync", Value: cs.lastQoSReport.Sync}, - utils.Attribute{Key: "block diff", Value: blockHeightDiff}, - utils.Attribute{Key: "sync score", Value: strconv.FormatInt(cs.syncScoreSum, 10) + "/" + strconv.FormatInt(cs.totalSyncScore, 10)}, - // utils.Attribute{Key: "session_id", Value: cs.SessionId}, - // utils.Attribute{Key: "provider", Value: cs.Parent.PublicLavaAddress}, - ) - } - } else { - // we prefer to give them a score of 1 when there is no other data, since otherwise we damage their payments - cs.lastQoSReport.Sync = sdk.NewDec(1) +func (qosManager *QoSManager) GetLastReputationQoSReportRaw(epoch uint64, sessionId int64) *pairingtypes.QualityOfServiceReport { + qosReport := qosManager.getQoSReport(epoch, sessionId) + if qosReport == nil { + return nil } + + qosReport.lock.RLock() + defer qosReport.lock.RUnlock() + return qosReport.lastReputationQoSReportRaw } -func (qosReport *QoSManager) GetAnsweredRelays() uint64 { +func (qosManager *QoSManager) GetAnsweredRelays(epoch uint64, sessionId int64) uint64 { + qosReport := qosManager.getQoSReport(epoch, sessionId) + if qosReport == nil { + return 0 + } + + qosReport.lock.RLock() + defer qosReport.lock.RUnlock() return qosReport.answeredRelays } -func (qosReport *QoSManager) SetAnsweredRelays(answeredRelays uint64) { - qosReport.answeredRelays = answeredRelays -} +func (qosManager *QoSManager) GetTotalRelays(epoch uint64, sessionId int64) uint64 { + qosReport := qosManager.getQoSReport(epoch, sessionId) + if qosReport == nil { + return 0 + } -func (qosReport *QoSManager) GetTotalRelays() uint64 { + qosReport.lock.RLock() + defer qosReport.lock.RUnlock() return qosReport.totalRelays } -func (qosReport *QoSManager) SetTotalRelays(totalRelays uint64) { - qosReport.totalRelays = totalRelays -} +func (qosManager *QoSManager) GetSyncScoreSum(epoch uint64, sessionId int64) int64 { + qosReport := qosManager.getQoSReport(epoch, sessionId) + if qosReport == nil { + return 0 + } -func (qosReport *QoSManager) GetSyncScoreSum() int64 { + qosReport.lock.RLock() + defer qosReport.lock.RUnlock() return qosReport.syncScoreSum } -func (qosReport *QoSManager) GetTotalSyncScore() int64 { +func (qosManager *QoSManager) GetTotalSyncScore(epoch uint64, sessionId int64) int64 { + qosReport := qosManager.getQoSReport(epoch, sessionId) + if qosReport == nil { + return 0 + } + + qosReport.lock.RLock() + defer qosReport.lock.RUnlock() return qosReport.totalSyncScore } diff --git a/protocol/qos/qos_mutators.go b/protocol/qos/qos_mutators.go new file mode 100644 index 0000000000..5640584466 --- /dev/null +++ b/protocol/qos/qos_mutators.go @@ -0,0 +1,118 @@ +package qos + +import ( + "math" + "sort" + "strconv" + "time" + + sdk "github.com/cosmos/cosmos-sdk/types" + "github.com/lavanet/lava/v4/utils" + pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" +) + +// Base interface for all mutators +type Mutator interface { + Mutate(report *QoSReport) + GetEpochAndSessionId() (epoch uint64, sessionId int64) +} + +type QoSUpdateTaskBase struct { + epoch uint64 + sessionId int64 +} + +func (qoSUpdateTaskBase *QoSUpdateTaskBase) GetEpochAndSessionId() (epoch uint64, sessionId int64) { + return qoSUpdateTaskBase.epoch, qoSUpdateTaskBase.sessionId +} + +// Mutator for relay success +type QoSUpdateTaskRelaySuccess struct { + QoSUpdateTaskBase + latency time.Duration + expectedLatency time.Duration + blockHeightDiff int64 + numOfProviders int + servicersToCount int64 + providerAddress string +} + +func (qoSUpdateTaskRelaySuccess *QoSUpdateTaskRelaySuccess) Mutate(report *QoSReport) { + report.lock.Lock() + defer report.lock.Unlock() + report.totalRelays++ + report.answeredRelays++ + + if report.lastQoSReport == nil { + report.lastQoSReport = &pairingtypes.QualityOfServiceReport{} + } + + downtimePercentage := sdk.NewDecWithPrec(int64(report.totalRelays-report.answeredRelays), 0).Quo(sdk.NewDecWithPrec(int64(report.totalRelays), 0)) + scaledAvailabilityScore := sdk.MaxDec(sdk.ZeroDec(), AvailabilityPercentage.Sub(downtimePercentage).Quo(AvailabilityPercentage)) + report.lastQoSReport.Availability = scaledAvailabilityScore + if sdk.OneDec().GT(report.lastQoSReport.Availability) { + utils.LavaFormatDebug("QoS Availability report", utils.Attribute{Key: "Availability", Value: report.lastQoSReport.Availability}, utils.Attribute{Key: "down percent", Value: downtimePercentage}) + } + + latencyScore := sdk.MinDec(sdk.OneDec(), sdk.NewDecFromInt(sdk.NewInt(int64(qoSUpdateTaskRelaySuccess.expectedLatency))).Quo(sdk.NewDecFromInt(sdk.NewInt(int64(qoSUpdateTaskRelaySuccess.latency))))) + + insertSorted := func(list []sdk.Dec, value sdk.Dec) []sdk.Dec { + index := sort.Search(len(list), func(i int) bool { + return list[i].GTE(value) + }) + if len(list) == index { // nil or empty slice or after last element + return append(list, value) + } + list = append(list[:index+1], list[index:]...) // index < len(a) + list[index] = value + return list + } + report.latencyScoreList = insertSorted(report.latencyScoreList, latencyScore) + report.lastQoSReport.Latency = report.latencyScoreList[int(float64(len(report.latencyScoreList))*PercentileToCalculateLatency)] + + // checking if we have enough information to calculate the sync score for the providers, if we haven't talked + // with enough providers we don't have enough information and we will wait to have more information before setting the sync score + shouldCalculateSyncScore := int64(qoSUpdateTaskRelaySuccess.numOfProviders) > int64(math.Ceil(float64(qoSUpdateTaskRelaySuccess.servicersToCount)*MinProvidersForSync)) + if shouldCalculateSyncScore { // + if qoSUpdateTaskRelaySuccess.blockHeightDiff <= 0 { // if the diff is bigger than 0 than the block is too old (blockHeightDiff = expected - allowedLag - blockHeight) and we don't give him the score + report.syncScoreSum++ + } + report.totalSyncScore++ + report.lastQoSReport.Sync = sdk.NewDec(report.syncScoreSum).QuoInt64(report.totalSyncScore) + if sdk.OneDec().GT(report.lastQoSReport.Sync) { + utils.LavaFormatDebug("QoS Sync report", + utils.Attribute{Key: "Sync", Value: report.lastQoSReport.Sync}, + utils.Attribute{Key: "block diff", Value: qoSUpdateTaskRelaySuccess.blockHeightDiff}, + utils.Attribute{Key: "sync score", Value: strconv.FormatInt(report.syncScoreSum, 10) + "/" + strconv.FormatInt(report.totalSyncScore, 10)}, + utils.Attribute{Key: "session_id", Value: qoSUpdateTaskRelaySuccess.sessionId}, + utils.Attribute{Key: "provider", Value: qoSUpdateTaskRelaySuccess.providerAddress}, + ) + } + } else { + // we prefer to give them a score of 1 when there is no other data, since otherwise we damage their payments + report.lastQoSReport.Sync = sdk.NewDec(1) + } +} + +// Mutator for relay failure +type QoSUpdateTaskRelayFailure struct { + QoSUpdateTaskBase +} + +func (qoSUpdateTaskRelayFailure *QoSUpdateTaskRelayFailure) Mutate(report *QoSReport) { + report.lock.Lock() + defer report.lock.Unlock() + report.totalRelays++ +} + +// Mutator to set usage for a session +type QoSUpdateTaskSetReputationRaw struct { + QoSUpdateTaskBase + report *pairingtypes.QualityOfServiceReport +} + +func (qoSUpdateTaskSetReputationRaw *QoSUpdateTaskSetReputationRaw) Mutate(report *QoSReport) { + report.lock.Lock() + defer report.lock.Unlock() + report.lastReputationQoSReportRaw = qoSUpdateTaskSetReputationRaw.report +} diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 7714c33848..138116cd61 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -863,7 +863,7 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( ) } - lastQoSReport := singleConsumerSession.QoSManager.GetLastQoSReport() + lastQoSReport := singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId) if rpccs.debugRelays && lastQoSReport != nil && lastQoSReport.Sync.BigInt() != nil && lastQoSReport.Sync.LT(sdk.MustNewDecFromStr("0.9")) { From efb866b036924807323329dfa01be648678368f7 Mon Sep 17 00:00:00 2001 From: Elad Gildnur Date: Sun, 12 Jan 2025 16:14:54 +0200 Subject: [PATCH 04/24] Some fixes to QoS mutators and tests --- .../finalization_consensus_test.go | 61 ++++++++++------ protocol/lavasession/consumer_types.go | 2 + protocol/qos/qos_manager.go | 51 +++++++------ protocol/qos/qos_mutator_base.go | 17 +++++ protocol/qos/qos_mutator_relay_failure.go | 15 ++++ ...tators.go => qos_mutator_relay_success.go} | 71 ++++++------------- .../qos/qos_mutator_relay_success_test.go | 34 +++++++++ .../qos/qos_mutator_set_reputation_raw.go | 20 ++++++ 8 files changed, 180 insertions(+), 91 deletions(-) create mode 100644 protocol/qos/qos_mutator_base.go create mode 100644 protocol/qos/qos_mutator_relay_failure.go rename protocol/qos/{qos_mutators.go => qos_mutator_relay_success.go} (55%) create mode 100644 protocol/qos/qos_mutator_relay_success_test.go create mode 100644 protocol/qos/qos_mutator_set_reputation_raw.go diff --git a/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go b/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go index ef75111e6b..19549e39fa 100644 --- a/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go +++ b/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go @@ -205,6 +205,14 @@ func TestQoS(t *testing.T) { lavasession.AvailabilityPercentage = decToSet rand.InitRandomSeed() chainsToTest := []string{"APT1", "LAV1", "ETH1"} + + waitForDoneChan := func(doneChan <-chan struct{}) { + select { + case <-doneChan: + case <-time.After(5 * time.Hour): + t.Fatal("timeout waiting for qos calculation to finish") + } + } for i := 0; i < 10; i++ { for _, chainID := range chainsToTest { t.Run(chainID, func(t *testing.T) { @@ -282,54 +290,65 @@ func TestQoS(t *testing.T) { currentLatency := time.Millisecond expectedLatency := time.Millisecond latestServicedBlock := expectedBH - singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1) + waitForDoneChan(singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1)) require.Equal(t, uint64(1), singleConsumerSession.QoSManager.GetAnsweredRelays(epoch, singleConsumerSession.SessionId)) require.Equal(t, uint64(1), singleConsumerSession.QoSManager.GetTotalRelays(epoch, singleConsumerSession.SessionId)) require.Equal(t, int64(1), singleConsumerSession.QoSManager.GetSyncScoreSum(epoch, singleConsumerSession.SessionId)) require.Equal(t, int64(1), singleConsumerSession.QoSManager.GetTotalSyncScore(epoch, singleConsumerSession.SessionId)) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Availability) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Sync) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Latency) + + lastQoSReport := singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId) + require.Equal(t, sdk.OneDec(), lastQoSReport.Availability) + require.Equal(t, sdk.OneDec(), lastQoSReport.Sync) + require.Equal(t, sdk.OneDec(), lastQoSReport.Latency) latestServicedBlock = expectedBH + 1 - singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1) + waitForDoneChan(singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1)) require.Equal(t, uint64(2), singleConsumerSession.QoSManager.GetAnsweredRelays(epoch, singleConsumerSession.SessionId)) require.Equal(t, uint64(2), singleConsumerSession.QoSManager.GetTotalRelays(epoch, singleConsumerSession.SessionId)) require.Equal(t, int64(2), singleConsumerSession.QoSManager.GetSyncScoreSum(epoch, singleConsumerSession.SessionId)) require.Equal(t, int64(2), singleConsumerSession.QoSManager.GetTotalSyncScore(epoch, singleConsumerSession.SessionId)) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Availability) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Sync) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Latency) - singleConsumerSession.QoSManager.AddFailedRelay(epoch, singleConsumerSession.SessionId) // this is how we add a failure - singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1) + lastQoSReport = singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId) + require.Equal(t, sdk.OneDec(), lastQoSReport.Availability) + require.Equal(t, sdk.OneDec(), lastQoSReport.Sync) + require.Equal(t, sdk.OneDec(), lastQoSReport.Latency) + + waitForDoneChan(singleConsumerSession.QoSManager.AddFailedRelay(epoch, singleConsumerSession.SessionId)) // this is how we add a failure + waitForDoneChan(singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1)) require.Equal(t, uint64(3), singleConsumerSession.QoSManager.GetAnsweredRelays(epoch, singleConsumerSession.SessionId)) require.Equal(t, uint64(4), singleConsumerSession.QoSManager.GetTotalRelays(epoch, singleConsumerSession.SessionId)) require.Equal(t, int64(3), singleConsumerSession.QoSManager.GetSyncScoreSum(epoch, singleConsumerSession.SessionId)) require.Equal(t, int64(3), singleConsumerSession.QoSManager.GetTotalSyncScore(epoch, singleConsumerSession.SessionId)) - require.Equal(t, sdk.ZeroDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Availability) // because availability below 95% is 0 - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Sync) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Latency) + lastQoSReport = singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId) + require.Equal(t, sdk.ZeroDec(), lastQoSReport.Availability) // because availability below 95% is 0 + require.Equal(t, sdk.OneDec(), lastQoSReport.Sync) + require.Equal(t, sdk.OneDec(), lastQoSReport.Latency) latestServicedBlock = expectedBH - 1 // is one block below threshold - singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency*2, expectedBH-latestServicedBlock, numOfProviders, 1) + waitForDoneChan(singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency*2, expectedBH-latestServicedBlock, numOfProviders, 1)) require.Equal(t, uint64(4), singleConsumerSession.QoSManager.GetAnsweredRelays(epoch, singleConsumerSession.SessionId)) require.Equal(t, uint64(5), singleConsumerSession.QoSManager.GetTotalRelays(epoch, singleConsumerSession.SessionId)) require.Equal(t, int64(3), singleConsumerSession.QoSManager.GetSyncScoreSum(epoch, singleConsumerSession.SessionId)) require.Equal(t, int64(4), singleConsumerSession.QoSManager.GetTotalSyncScore(epoch, singleConsumerSession.SessionId)) - require.Equal(t, sdk.ZeroDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Availability) // because availability below 95% is 0 - require.Equal(t, sdk.MustNewDecFromStr("0.75"), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Sync) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Latency) + lastQoSReport = singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId) + require.Equal(t, sdk.ZeroDec(), lastQoSReport.Availability) // because availability below 95% is 0 + require.Equal(t, sdk.MustNewDecFromStr("0.75"), lastQoSReport.Sync) + require.Equal(t, sdk.OneDec(), lastQoSReport.Latency) + latestServicedBlock = expectedBH + 1 // add in a loop so availability goes above 95% + doneChan := make(<-chan struct{}) for i := 5; i < 100; i++ { - singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency*2, expectedBH-latestServicedBlock, numOfProviders, 1) + doneChan = singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency*2, expectedBH-latestServicedBlock, numOfProviders, 1) } - require.Equal(t, sdk.MustNewDecFromStr("0.8"), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Availability) // because availability below 95% is 0 - require.Equal(t, sdk.MustNewDecFromStr("0.989898989898989898"), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Sync) - require.Equal(t, sdk.OneDec(), singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId).Latency) + waitForDoneChan(doneChan) + + lastQoSReport = singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId) + require.Equal(t, sdk.MustNewDecFromStr("0.8"), lastQoSReport.Availability) // because availability below 95% is 0 + require.Equal(t, sdk.MustNewDecFromStr("0.989898989898989898"), lastQoSReport.Sync) + require.Equal(t, sdk.OneDec(), lastQoSReport.Latency) finalizationInsertionsSpreadBlocks := []finalizationTestInsertion{ finalizationInsertionForProviders(chainID, epoch, 200, 0, 1, true, "", blocksInFinalizationProof, blockDistanceForFinalizedData)[0], diff --git a/protocol/lavasession/consumer_types.go b/protocol/lavasession/consumer_types.go index a19c5a8795..469102021b 100644 --- a/protocol/lavasession/consumer_types.go +++ b/protocol/lavasession/consumer_types.go @@ -9,6 +9,7 @@ import ( sdk "github.com/cosmos/cosmos-sdk/types" "github.com/lavanet/lava/v4/protocol/provideroptimizer" + "github.com/lavanet/lava/v4/protocol/qos" "github.com/lavanet/lava/v4/utils" "github.com/lavanet/lava/v4/utils/rand" pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" @@ -430,6 +431,7 @@ func (cswp *ConsumerSessionsWithProvider) GetConsumerSessionInstanceFromEndpoint StaticProvider: cswp.StaticProvider, routerKey: NewRouterKey(nil), epoch: cswp.PairingEpoch, + QoSManager: qos.NewQoSManager(), } consumerSession.TryUseSession() // we must lock the session so other requests wont get it. diff --git a/protocol/qos/qos_manager.go b/protocol/qos/qos_manager.go index 2bc692080e..0918139e09 100644 --- a/protocol/qos/qos_manager.go +++ b/protocol/qos/qos_manager.go @@ -20,6 +20,8 @@ type QoSReport struct { lock sync.RWMutex } +type DoneChan <-chan struct{} + type QoSManager struct { qosReports map[uint64]map[int64]*QoSReport // first key is the epoch, second key is the session id mutatorsQueue chan Mutator @@ -63,13 +65,21 @@ func (qosManager *QoSManager) fetchOrSetSessionFromMap(epoch uint64, sessionId i return qosManager.qosReports[epoch][sessionId] } -func (qosManager *QoSManager) CalculateQoS(epoch uint64, sessionId int64, providerAddress string, latency, expectedLatency time.Duration, blockHeightDiff int64, numOfProviders int, servicersToCount int64) { +func (qosManager *QoSManager) createQoSMutatorBase(epoch uint64, sessionId int64) (*QoSMutatorBase, chan struct{}) { + doneChan := make(chan struct{}, 1) + qosMutatorBase := &QoSMutatorBase{ + epoch: epoch, + sessionId: sessionId, + doneChan: doneChan, + } + return qosMutatorBase, doneChan +} + +func (qosManager *QoSManager) CalculateQoS(epoch uint64, sessionId int64, providerAddress string, latency, expectedLatency time.Duration, blockHeightDiff int64, numOfProviders int, servicersToCount int64) DoneChan { + qosMutatorBase, doneChan := qosManager.createQoSMutatorBase(epoch, sessionId) go func() { - qosManager.mutatorsQueue <- &QoSUpdateTaskRelaySuccess{ - QoSUpdateTaskBase: QoSUpdateTaskBase{ - epoch: epoch, - sessionId: sessionId, - }, + qosManager.mutatorsQueue <- &QoSMutatorRelaySuccess{ + QoSMutatorBase: *qosMutatorBase, providerAddress: providerAddress, latency: latency, expectedLatency: expectedLatency, @@ -78,27 +88,28 @@ func (qosManager *QoSManager) CalculateQoS(epoch uint64, sessionId int64, provid servicersToCount: servicersToCount, } }() + return doneChan } -func (qosManager *QoSManager) AddFailedRelay(epoch uint64, sessionId int64) { +func (qosManager *QoSManager) AddFailedRelay(epoch uint64, sessionId int64) DoneChan { + qosMutatorBase, doneChan := qosManager.createQoSMutatorBase(epoch, sessionId) go func() { - qosManager.mutatorsQueue <- &QoSUpdateTaskRelayFailure{ - QoSUpdateTaskBase: QoSUpdateTaskBase{ - epoch: epoch, - sessionId: sessionId, - }, + qosManager.mutatorsQueue <- &QoSMutatorRelayFailure{ + QoSMutatorBase: *qosMutatorBase, } }() + return doneChan } -func (qosManager *QoSManager) SetLastReputationQoSReportRaw(epoch uint64, sessionId int64, report *pairingtypes.QualityOfServiceReport) { - qosManager.mutatorsQueue <- &QoSUpdateTaskSetReputationRaw{ - QoSUpdateTaskBase: QoSUpdateTaskBase{ - epoch: epoch, - sessionId: sessionId, - }, - report: report, - } +func (qosManager *QoSManager) SetLastReputationQoSReportRaw(epoch uint64, sessionId int64, report *pairingtypes.QualityOfServiceReport) DoneChan { + qosMutatorBase, doneChan := qosManager.createQoSMutatorBase(epoch, sessionId) + go func() { + qosManager.mutatorsQueue <- &QoSMutatorSetReputationRaw{ + QoSMutatorBase: *qosMutatorBase, + report: report, + } + }() + return doneChan } func (qosManager *QoSManager) getQoSReport(epoch uint64, sessionId int64) *QoSReport { diff --git a/protocol/qos/qos_mutator_base.go b/protocol/qos/qos_mutator_base.go new file mode 100644 index 0000000000..2919feac6f --- /dev/null +++ b/protocol/qos/qos_mutator_base.go @@ -0,0 +1,17 @@ +package qos + +// Base interface for all mutators +type Mutator interface { + Mutate(report *QoSReport) + GetEpochAndSessionId() (epoch uint64, sessionId int64) +} + +type QoSMutatorBase struct { + epoch uint64 + sessionId int64 + doneChan chan<- struct{} +} + +func (qoSMutatorBase *QoSMutatorBase) GetEpochAndSessionId() (epoch uint64, sessionId int64) { + return qoSMutatorBase.epoch, qoSMutatorBase.sessionId +} diff --git a/protocol/qos/qos_mutator_relay_failure.go b/protocol/qos/qos_mutator_relay_failure.go new file mode 100644 index 0000000000..c5f3bcc730 --- /dev/null +++ b/protocol/qos/qos_mutator_relay_failure.go @@ -0,0 +1,15 @@ +package qos + +// Mutator for relay failure +type QoSMutatorRelayFailure struct { + QoSMutatorBase +} + +func (qoSMutatorRelayFailure *QoSMutatorRelayFailure) Mutate(report *QoSReport) { + report.lock.Lock() + defer func() { + report.lock.Unlock() + qoSMutatorRelayFailure.doneChan <- struct{}{} + }() + report.totalRelays++ +} diff --git a/protocol/qos/qos_mutators.go b/protocol/qos/qos_mutator_relay_success.go similarity index 55% rename from protocol/qos/qos_mutators.go rename to protocol/qos/qos_mutator_relay_success.go index 5640584466..dce7c8e627 100644 --- a/protocol/qos/qos_mutators.go +++ b/protocol/qos/qos_mutator_relay_success.go @@ -11,24 +11,9 @@ import ( pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" ) -// Base interface for all mutators -type Mutator interface { - Mutate(report *QoSReport) - GetEpochAndSessionId() (epoch uint64, sessionId int64) -} - -type QoSUpdateTaskBase struct { - epoch uint64 - sessionId int64 -} - -func (qoSUpdateTaskBase *QoSUpdateTaskBase) GetEpochAndSessionId() (epoch uint64, sessionId int64) { - return qoSUpdateTaskBase.epoch, qoSUpdateTaskBase.sessionId -} - // Mutator for relay success -type QoSUpdateTaskRelaySuccess struct { - QoSUpdateTaskBase +type QoSMutatorRelaySuccess struct { + QoSMutatorBase latency time.Duration expectedLatency time.Duration blockHeightDiff int64 @@ -37,9 +22,19 @@ type QoSUpdateTaskRelaySuccess struct { providerAddress string } -func (qoSUpdateTaskRelaySuccess *QoSUpdateTaskRelaySuccess) Mutate(report *QoSReport) { +func (qoSMutatorRelaySuccess *QoSMutatorRelaySuccess) calculateAvailabilityScore(qosReport *QoSReport) (downtimePercentageRet, scaledAvailabilityScoreRet sdk.Dec) { + downtimePercentage := sdk.NewDecWithPrec(int64(qosReport.totalRelays-qosReport.answeredRelays), 0).Quo(sdk.NewDecWithPrec(int64(qosReport.totalRelays), 0)) + scaledAvailabilityScore := sdk.MaxDec(sdk.ZeroDec(), AvailabilityPercentage.Sub(downtimePercentage).Quo(AvailabilityPercentage)) + return downtimePercentage, scaledAvailabilityScore +} + +func (qoSMutatorRelaySuccess *QoSMutatorRelaySuccess) Mutate(report *QoSReport) { report.lock.Lock() - defer report.lock.Unlock() + defer func() { + report.lock.Unlock() + qoSMutatorRelaySuccess.doneChan <- struct{}{} + }() + report.totalRelays++ report.answeredRelays++ @@ -47,14 +42,13 @@ func (qoSUpdateTaskRelaySuccess *QoSUpdateTaskRelaySuccess) Mutate(report *QoSRe report.lastQoSReport = &pairingtypes.QualityOfServiceReport{} } - downtimePercentage := sdk.NewDecWithPrec(int64(report.totalRelays-report.answeredRelays), 0).Quo(sdk.NewDecWithPrec(int64(report.totalRelays), 0)) - scaledAvailabilityScore := sdk.MaxDec(sdk.ZeroDec(), AvailabilityPercentage.Sub(downtimePercentage).Quo(AvailabilityPercentage)) + downtimePercentage, scaledAvailabilityScore := qoSMutatorRelaySuccess.calculateAvailabilityScore(report) report.lastQoSReport.Availability = scaledAvailabilityScore if sdk.OneDec().GT(report.lastQoSReport.Availability) { utils.LavaFormatDebug("QoS Availability report", utils.Attribute{Key: "Availability", Value: report.lastQoSReport.Availability}, utils.Attribute{Key: "down percent", Value: downtimePercentage}) } - latencyScore := sdk.MinDec(sdk.OneDec(), sdk.NewDecFromInt(sdk.NewInt(int64(qoSUpdateTaskRelaySuccess.expectedLatency))).Quo(sdk.NewDecFromInt(sdk.NewInt(int64(qoSUpdateTaskRelaySuccess.latency))))) + latencyScore := sdk.MinDec(sdk.OneDec(), sdk.NewDecFromInt(sdk.NewInt(int64(qoSMutatorRelaySuccess.expectedLatency))).Quo(sdk.NewDecFromInt(sdk.NewInt(int64(qoSMutatorRelaySuccess.latency))))) insertSorted := func(list []sdk.Dec, value sdk.Dec) []sdk.Dec { index := sort.Search(len(list), func(i int) bool { @@ -72,9 +66,9 @@ func (qoSUpdateTaskRelaySuccess *QoSUpdateTaskRelaySuccess) Mutate(report *QoSRe // checking if we have enough information to calculate the sync score for the providers, if we haven't talked // with enough providers we don't have enough information and we will wait to have more information before setting the sync score - shouldCalculateSyncScore := int64(qoSUpdateTaskRelaySuccess.numOfProviders) > int64(math.Ceil(float64(qoSUpdateTaskRelaySuccess.servicersToCount)*MinProvidersForSync)) + shouldCalculateSyncScore := int64(qoSMutatorRelaySuccess.numOfProviders) > int64(math.Ceil(float64(qoSMutatorRelaySuccess.servicersToCount)*MinProvidersForSync)) if shouldCalculateSyncScore { // - if qoSUpdateTaskRelaySuccess.blockHeightDiff <= 0 { // if the diff is bigger than 0 than the block is too old (blockHeightDiff = expected - allowedLag - blockHeight) and we don't give him the score + if qoSMutatorRelaySuccess.blockHeightDiff <= 0 { // if the diff is bigger than 0 than the block is too old (blockHeightDiff = expected - allowedLag - blockHeight) and we don't give him the score report.syncScoreSum++ } report.totalSyncScore++ @@ -82,10 +76,10 @@ func (qoSUpdateTaskRelaySuccess *QoSUpdateTaskRelaySuccess) Mutate(report *QoSRe if sdk.OneDec().GT(report.lastQoSReport.Sync) { utils.LavaFormatDebug("QoS Sync report", utils.Attribute{Key: "Sync", Value: report.lastQoSReport.Sync}, - utils.Attribute{Key: "block diff", Value: qoSUpdateTaskRelaySuccess.blockHeightDiff}, + utils.Attribute{Key: "block diff", Value: qoSMutatorRelaySuccess.blockHeightDiff}, utils.Attribute{Key: "sync score", Value: strconv.FormatInt(report.syncScoreSum, 10) + "/" + strconv.FormatInt(report.totalSyncScore, 10)}, - utils.Attribute{Key: "session_id", Value: qoSUpdateTaskRelaySuccess.sessionId}, - utils.Attribute{Key: "provider", Value: qoSUpdateTaskRelaySuccess.providerAddress}, + utils.Attribute{Key: "session_id", Value: qoSMutatorRelaySuccess.sessionId}, + utils.Attribute{Key: "provider", Value: qoSMutatorRelaySuccess.providerAddress}, ) } } else { @@ -93,26 +87,3 @@ func (qoSUpdateTaskRelaySuccess *QoSUpdateTaskRelaySuccess) Mutate(report *QoSRe report.lastQoSReport.Sync = sdk.NewDec(1) } } - -// Mutator for relay failure -type QoSUpdateTaskRelayFailure struct { - QoSUpdateTaskBase -} - -func (qoSUpdateTaskRelayFailure *QoSUpdateTaskRelayFailure) Mutate(report *QoSReport) { - report.lock.Lock() - defer report.lock.Unlock() - report.totalRelays++ -} - -// Mutator to set usage for a session -type QoSUpdateTaskSetReputationRaw struct { - QoSUpdateTaskBase - report *pairingtypes.QualityOfServiceReport -} - -func (qoSUpdateTaskSetReputationRaw *QoSUpdateTaskSetReputationRaw) Mutate(report *QoSReport) { - report.lock.Lock() - defer report.lock.Unlock() - report.lastReputationQoSReportRaw = qoSUpdateTaskSetReputationRaw.report -} diff --git a/protocol/qos/qos_mutator_relay_success_test.go b/protocol/qos/qos_mutator_relay_success_test.go new file mode 100644 index 0000000000..309a501e37 --- /dev/null +++ b/protocol/qos/qos_mutator_relay_success_test.go @@ -0,0 +1,34 @@ +package qos + +import ( + "testing" + + sdk "github.com/cosmos/cosmos-sdk/types" + "github.com/stretchr/testify/require" +) + +func TestCalculateAvailabilityScore(t *testing.T) { + avialabilityAsFloat, err := AvailabilityPercentage.Float64() + require.NoError(t, err) + precision := uint64(10000) + + qosReport := QoSReport{} + qosReport.totalRelays = precision + qosReport.answeredRelays = precision - uint64(avialabilityAsFloat*float64(precision)) + qoSMutatorRelaySuccess := QoSMutatorRelaySuccess{} + downTime, availabilityScore := qoSMutatorRelaySuccess.calculateAvailabilityScore(&qosReport) + downTimeFloat, err := downTime.Float64() + require.NoError(t, err) + require.Equal(t, downTimeFloat, avialabilityAsFloat) + require.Zero(t, availabilityScore.BigInt().Uint64()) + + qosReport.totalRelays = 2 * precision + qosReport.answeredRelays = 2*precision - uint64(avialabilityAsFloat*float64(precision)) + downTime, availabilityScore = qoSMutatorRelaySuccess.calculateAvailabilityScore(&qosReport) + downTimeFloat, err = downTime.Float64() + require.NoError(t, err) + halfDec, err := sdk.NewDecFromStr("0.5") + require.NoError(t, err) + require.Equal(t, downTimeFloat*2, avialabilityAsFloat) + require.Equal(t, halfDec, availabilityScore) +} diff --git a/protocol/qos/qos_mutator_set_reputation_raw.go b/protocol/qos/qos_mutator_set_reputation_raw.go new file mode 100644 index 0000000000..52dc9bcfea --- /dev/null +++ b/protocol/qos/qos_mutator_set_reputation_raw.go @@ -0,0 +1,20 @@ +package qos + +import ( + pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" +) + +// Mutator to set usage for a session +type QoSMutatorSetReputationRaw struct { + QoSMutatorBase + report *pairingtypes.QualityOfServiceReport +} + +func (qoSMutatorSetReputationRaw *QoSMutatorSetReputationRaw) Mutate(report *QoSReport) { + report.lock.Lock() + defer func() { + report.lock.Unlock() + qoSMutatorSetReputationRaw.doneChan <- struct{}{} + }() + report.lastReputationQoSReportRaw = qoSMutatorSetReputationRaw.report +} From 429d10cab52a04a6cfa697ef9b963ce1ce69ff66 Mon Sep 17 00:00:00 2001 From: Elad Gildnur Date: Sun, 12 Jan 2025 17:27:55 +0200 Subject: [PATCH 05/24] Fix some locks --- protocol/qos/qos_manager.go | 20 +++++++++++-------- protocol/qos/qos_mutator_relay_failure.go | 2 -- protocol/qos/qos_mutator_relay_success.go | 2 -- .../qos/qos_mutator_set_reputation_raw.go | 2 -- 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/protocol/qos/qos_manager.go b/protocol/qos/qos_manager.go index 0918139e09..74e2e04ac4 100644 --- a/protocol/qos/qos_manager.go +++ b/protocol/qos/qos_manager.go @@ -23,9 +23,9 @@ type QoSReport struct { type DoneChan <-chan struct{} type QoSManager struct { - qosReports map[uint64]map[int64]*QoSReport // first key is the epoch, second key is the session id - mutatorsQueue chan Mutator - qosReportsLock sync.RWMutex + qosReports map[uint64]map[int64]*QoSReport // first key is the epoch, second key is the session id + mutatorsQueue chan Mutator + lock sync.RWMutex } func NewQoSManager() *QoSManager { @@ -40,13 +40,17 @@ func (qosManager *QoSManager) processMutations() { for mutator := range qosManager.mutatorsQueue { epoch, sessionId := mutator.GetEpochAndSessionId() qosReport := qosManager.fetchOrSetSessionFromMap(epoch, sessionId) - mutator.Mutate(qosReport) + func() { + qosReport.lock.Lock() + defer qosReport.lock.Unlock() + mutator.Mutate(qosReport) + }() } } func (qosManager *QoSManager) fetchOrSetSessionFromMap(epoch uint64, sessionId int64) *QoSReport { - qosManager.qosReportsLock.Lock() - defer qosManager.qosReportsLock.Unlock() + qosManager.lock.Lock() + defer qosManager.lock.Unlock() if qosManager.qosReports[epoch] == nil { qosManager.qosReports[epoch] = make(map[int64]*QoSReport) } @@ -113,8 +117,8 @@ func (qosManager *QoSManager) SetLastReputationQoSReportRaw(epoch uint64, sessio } func (qosManager *QoSManager) getQoSReport(epoch uint64, sessionId int64) *QoSReport { - qosManager.qosReportsLock.RLock() - defer qosManager.qosReportsLock.RUnlock() + qosManager.lock.RLock() + defer qosManager.lock.RUnlock() if qosManager.qosReports[epoch] == nil || qosManager.qosReports[epoch][sessionId] == nil { return nil } diff --git a/protocol/qos/qos_mutator_relay_failure.go b/protocol/qos/qos_mutator_relay_failure.go index c5f3bcc730..2635933ef0 100644 --- a/protocol/qos/qos_mutator_relay_failure.go +++ b/protocol/qos/qos_mutator_relay_failure.go @@ -6,9 +6,7 @@ type QoSMutatorRelayFailure struct { } func (qoSMutatorRelayFailure *QoSMutatorRelayFailure) Mutate(report *QoSReport) { - report.lock.Lock() defer func() { - report.lock.Unlock() qoSMutatorRelayFailure.doneChan <- struct{}{} }() report.totalRelays++ diff --git a/protocol/qos/qos_mutator_relay_success.go b/protocol/qos/qos_mutator_relay_success.go index dce7c8e627..e24f177894 100644 --- a/protocol/qos/qos_mutator_relay_success.go +++ b/protocol/qos/qos_mutator_relay_success.go @@ -29,9 +29,7 @@ func (qoSMutatorRelaySuccess *QoSMutatorRelaySuccess) calculateAvailabilityScore } func (qoSMutatorRelaySuccess *QoSMutatorRelaySuccess) Mutate(report *QoSReport) { - report.lock.Lock() defer func() { - report.lock.Unlock() qoSMutatorRelaySuccess.doneChan <- struct{}{} }() diff --git a/protocol/qos/qos_mutator_set_reputation_raw.go b/protocol/qos/qos_mutator_set_reputation_raw.go index 52dc9bcfea..e7147f1485 100644 --- a/protocol/qos/qos_mutator_set_reputation_raw.go +++ b/protocol/qos/qos_mutator_set_reputation_raw.go @@ -11,9 +11,7 @@ type QoSMutatorSetReputationRaw struct { } func (qoSMutatorSetReputationRaw *QoSMutatorSetReputationRaw) Mutate(report *QoSReport) { - report.lock.Lock() defer func() { - report.lock.Unlock() qoSMutatorSetReputationRaw.doneChan <- struct{}{} }() report.lastReputationQoSReportRaw = qoSMutatorSetReputationRaw.report From dbe6faecb30d493a95dc2a7713b99e9970df9536 Mon Sep 17 00:00:00 2001 From: Elad Gildnur Date: Sun, 12 Jan 2025 17:28:11 +0200 Subject: [PATCH 06/24] Revert debug line from test --- .../finalizationconsensus/finalization_consensus_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go b/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go index 19549e39fa..41f44255c4 100644 --- a/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go +++ b/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go @@ -209,10 +209,11 @@ func TestQoS(t *testing.T) { waitForDoneChan := func(doneChan <-chan struct{}) { select { case <-doneChan: - case <-time.After(5 * time.Hour): + case <-time.After(5 * time.Second): t.Fatal("timeout waiting for qos calculation to finish") } } + for i := 0; i < 10; i++ { for _, chainID := range chainsToTest { t.Run(chainID, func(t *testing.T) { From b24e944bc85bc6c5bb234f9fd9fa20548f898d52 Mon Sep 17 00:00:00 2001 From: Elad Gildnur Date: Sun, 12 Jan 2025 17:28:23 +0200 Subject: [PATCH 07/24] Add test for QoSManager --- protocol/qos/qos_manager_test.go | 112 +++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 protocol/qos/qos_manager_test.go diff --git a/protocol/qos/qos_manager_test.go b/protocol/qos/qos_manager_test.go new file mode 100644 index 0000000000..f07041b698 --- /dev/null +++ b/protocol/qos/qos_manager_test.go @@ -0,0 +1,112 @@ +package qos + +import ( + "testing" + "time" + + sdk "github.com/cosmos/cosmos-sdk/types" + pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" + "github.com/stretchr/testify/require" +) + +func TestCalculateQoS(t *testing.T) { + qosManager := NewQoSManager() + epoch := uint64(1) + sessionID := int64(1) + providerAddr := "provider1" + + // Test successful relay + doneChan := qosManager.CalculateQoS( + epoch, + sessionID, + providerAddr, + 100*time.Millisecond, + 200*time.Millisecond, + 1, + 3, + 2, + ) + + <-doneChan // Wait for processing + + report := qosManager.GetLastQoSReport(epoch, sessionID) + require.NotNil(t, report) + + totalRelays := qosManager.GetTotalRelays(epoch, sessionID) + require.Equal(t, uint64(1), totalRelays) + + answeredRelays := qosManager.GetAnsweredRelays(epoch, sessionID) + require.Equal(t, uint64(1), answeredRelays) +} + +func TestAddFailedRelay(t *testing.T) { + qosManager := NewQoSManager() + epoch := uint64(1) + sessionID := int64(1) + + doneChan := qosManager.AddFailedRelay(epoch, sessionID) + <-doneChan // Wait for processing + + totalRelays := qosManager.GetTotalRelays(epoch, sessionID) + require.Equal(t, uint64(1), totalRelays) + + answeredRelays := qosManager.GetAnsweredRelays(epoch, sessionID) + require.Equal(t, uint64(0), answeredRelays) +} + +func TestSetLastReputationQoSReportRaw(t *testing.T) { + qosManager := NewQoSManager() + epoch := uint64(1) + sessionID := int64(1) + + testReport := &pairingtypes.QualityOfServiceReport{ + Latency: sdk.NewDec(95), + Availability: sdk.NewDec(100), + } + + doneChan := qosManager.SetLastReputationQoSReportRaw(epoch, sessionID, testReport) + <-doneChan // Wait for processing + + report := qosManager.GetLastReputationQoSReportRaw(epoch, sessionID) + require.NotNil(t, report) + require.Equal(t, testReport.Latency, report.Latency) + require.Equal(t, testReport.Availability, report.Availability) +} + +func TestConcurrentAccess(t *testing.T) { + qosManager := NewQoSManager() + epoch := uint64(1) + sessionID := int64(1) + providerAddr := "provider1" + + // Run multiple operations concurrently + go func() { + for i := 0; i < 100; i++ { + doneChan := qosManager.CalculateQoS( + epoch, + sessionID, + providerAddr, + 100*time.Millisecond, + 200*time.Millisecond, + 1, + 3, + 2, + ) + <-doneChan + } + }() + + go func() { + for i := 0; i < 50; i++ { + doneChan := qosManager.AddFailedRelay(epoch, sessionID) + <-doneChan + } + }() + + // Give time for goroutines to complete + time.Sleep(100 * time.Millisecond) + + // Verify the total number of relays + totalRelays := qosManager.GetTotalRelays(epoch, sessionID) + require.Equal(t, uint64(150), totalRelays) // 100 successful + 50 failed +} From d63add86acf95c016959f327d1c59a273ca8d9af Mon Sep 17 00:00:00 2001 From: Elad Gildnur Date: Sun, 12 Jan 2025 17:28:42 +0200 Subject: [PATCH 08/24] Fix a race in the QoSManager --- protocol/qos/qos_manager.go | 42 ++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/protocol/qos/qos_manager.go b/protocol/qos/qos_manager.go index 74e2e04ac4..9b22f34e1a 100644 --- a/protocol/qos/qos_manager.go +++ b/protocol/qos/qos_manager.go @@ -31,7 +31,7 @@ type QoSManager struct { func NewQoSManager() *QoSManager { qosManager := &QoSManager{} qosManager.qosReports = make(map[uint64]map[int64]*QoSReport) - qosManager.mutatorsQueue = make(chan Mutator, 1000) + qosManager.mutatorsQueue = make(chan Mutator, 10000000) // Buffer of 10 Million mutators go qosManager.processMutations() return qosManager } @@ -70,7 +70,7 @@ func (qosManager *QoSManager) fetchOrSetSessionFromMap(epoch uint64, sessionId i } func (qosManager *QoSManager) createQoSMutatorBase(epoch uint64, sessionId int64) (*QoSMutatorBase, chan struct{}) { - doneChan := make(chan struct{}, 1) + doneChan := make(chan struct{}, 1) // Must be buffered to avoid freezing the queue qosMutatorBase := &QoSMutatorBase{ epoch: epoch, sessionId: sessionId, @@ -81,38 +81,32 @@ func (qosManager *QoSManager) createQoSMutatorBase(epoch uint64, sessionId int64 func (qosManager *QoSManager) CalculateQoS(epoch uint64, sessionId int64, providerAddress string, latency, expectedLatency time.Duration, blockHeightDiff int64, numOfProviders int, servicersToCount int64) DoneChan { qosMutatorBase, doneChan := qosManager.createQoSMutatorBase(epoch, sessionId) - go func() { - qosManager.mutatorsQueue <- &QoSMutatorRelaySuccess{ - QoSMutatorBase: *qosMutatorBase, - providerAddress: providerAddress, - latency: latency, - expectedLatency: expectedLatency, - blockHeightDiff: blockHeightDiff, - numOfProviders: numOfProviders, - servicersToCount: servicersToCount, - } - }() + qosManager.mutatorsQueue <- &QoSMutatorRelaySuccess{ + QoSMutatorBase: *qosMutatorBase, + providerAddress: providerAddress, + latency: latency, + expectedLatency: expectedLatency, + blockHeightDiff: blockHeightDiff, + numOfProviders: numOfProviders, + servicersToCount: servicersToCount, + } return doneChan } func (qosManager *QoSManager) AddFailedRelay(epoch uint64, sessionId int64) DoneChan { qosMutatorBase, doneChan := qosManager.createQoSMutatorBase(epoch, sessionId) - go func() { - qosManager.mutatorsQueue <- &QoSMutatorRelayFailure{ - QoSMutatorBase: *qosMutatorBase, - } - }() + qosManager.mutatorsQueue <- &QoSMutatorRelayFailure{ + QoSMutatorBase: *qosMutatorBase, + } return doneChan } func (qosManager *QoSManager) SetLastReputationQoSReportRaw(epoch uint64, sessionId int64, report *pairingtypes.QualityOfServiceReport) DoneChan { qosMutatorBase, doneChan := qosManager.createQoSMutatorBase(epoch, sessionId) - go func() { - qosManager.mutatorsQueue <- &QoSMutatorSetReputationRaw{ - QoSMutatorBase: *qosMutatorBase, - report: report, - } - }() + qosManager.mutatorsQueue <- &QoSMutatorSetReputationRaw{ + QoSMutatorBase: *qosMutatorBase, + report: report, + } return doneChan } From 90484789ccf2699a25b36ae0e5328796fffab3e1 Mon Sep 17 00:00:00 2001 From: Elad Gildnur Date: Mon, 13 Jan 2025 17:30:39 +0200 Subject: [PATCH 09/24] Fix QoS test --- .../finalizationconsensus/finalization_consensus_test.go | 3 ++- protocol/lavasession/common.go | 2 -- protocol/qos/common.go | 1 + 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go b/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go index 41f44255c4..0dc7eea0ea 100644 --- a/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go +++ b/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go @@ -16,6 +16,7 @@ import ( sdk "github.com/cosmos/cosmos-sdk/types" "github.com/lavanet/lava/v4/protocol/chainlib" "github.com/lavanet/lava/v4/protocol/lavasession" + "github.com/lavanet/lava/v4/protocol/qos" pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" spectypes "github.com/lavanet/lava/v4/x/spec/types" "github.com/stretchr/testify/require" @@ -202,7 +203,7 @@ func TestConsensusHashesInsertion(t *testing.T) { func TestQoS(t *testing.T) { decToSet, _ := sdk.NewDecFromStr("0.05") // test values fit 0.05 Availability requirements - lavasession.AvailabilityPercentage = decToSet + qos.AvailabilityPercentage = decToSet rand.InitRandomSeed() chainsToTest := []string{"APT1", "LAV1", "ETH1"} diff --git a/protocol/lavasession/common.go b/protocol/lavasession/common.go index 8a1358dc13..ca7ebece6e 100644 --- a/protocol/lavasession/common.go +++ b/protocol/lavasession/common.go @@ -15,7 +15,6 @@ import ( sdkerrors "cosmossdk.io/errors" "golang.org/x/exp/slices" - sdk "github.com/cosmos/cosmos-sdk/types" "github.com/gogo/status" "github.com/lavanet/lava/v4/protocol/chainlib/chainproxy" "github.com/lavanet/lava/v4/utils" @@ -48,7 +47,6 @@ const ( unixPrefix = "unix:" ) -var AvailabilityPercentage sdk.Dec = sdk.NewDecWithPrec(1, 1) // TODO move to params pairing const ( OptimizerPerturbation = 0.10 LatencyThresholdStatic = 1 * time.Second diff --git a/protocol/qos/common.go b/protocol/qos/common.go index 59005acb99..5cd14854ef 100644 --- a/protocol/qos/common.go +++ b/protocol/qos/common.go @@ -3,6 +3,7 @@ package qos import sdk "github.com/cosmos/cosmos-sdk/types" var AvailabilityPercentage sdk.Dec = sdk.NewDecWithPrec(1, 1) // TODO move to params pairing + const ( PercentileToCalculateLatency = 0.9 MinProvidersForSync = 0.6 From 34aaf8e50e2015c16a690b5cf821bbc48b371a5e Mon Sep 17 00:00:00 2001 From: Elad Gildnur Date: Mon, 13 Jan 2025 17:31:43 +0200 Subject: [PATCH 10/24] Log clean --- protocol/qos/qos_mutator_relay_success.go | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/protocol/qos/qos_mutator_relay_success.go b/protocol/qos/qos_mutator_relay_success.go index e24f177894..66e1cbb388 100644 --- a/protocol/qos/qos_mutator_relay_success.go +++ b/protocol/qos/qos_mutator_relay_success.go @@ -43,7 +43,12 @@ func (qoSMutatorRelaySuccess *QoSMutatorRelaySuccess) Mutate(report *QoSReport) downtimePercentage, scaledAvailabilityScore := qoSMutatorRelaySuccess.calculateAvailabilityScore(report) report.lastQoSReport.Availability = scaledAvailabilityScore if sdk.OneDec().GT(report.lastQoSReport.Availability) { - utils.LavaFormatDebug("QoS Availability report", utils.Attribute{Key: "Availability", Value: report.lastQoSReport.Availability}, utils.Attribute{Key: "down percent", Value: downtimePercentage}) + utils.LavaFormatDebug("QoS Availability report", + utils.LogAttr("availability", report.lastQoSReport.Availability), + utils.LogAttr("down_percent", downtimePercentage), + utils.LogAttr("session_id", qoSMutatorRelaySuccess.sessionId), + utils.LogAttr("provider", qoSMutatorRelaySuccess.providerAddress), + ) } latencyScore := sdk.MinDec(sdk.OneDec(), sdk.NewDecFromInt(sdk.NewInt(int64(qoSMutatorRelaySuccess.expectedLatency))).Quo(sdk.NewDecFromInt(sdk.NewInt(int64(qoSMutatorRelaySuccess.latency))))) @@ -73,11 +78,11 @@ func (qoSMutatorRelaySuccess *QoSMutatorRelaySuccess) Mutate(report *QoSReport) report.lastQoSReport.Sync = sdk.NewDec(report.syncScoreSum).QuoInt64(report.totalSyncScore) if sdk.OneDec().GT(report.lastQoSReport.Sync) { utils.LavaFormatDebug("QoS Sync report", - utils.Attribute{Key: "Sync", Value: report.lastQoSReport.Sync}, - utils.Attribute{Key: "block diff", Value: qoSMutatorRelaySuccess.blockHeightDiff}, - utils.Attribute{Key: "sync score", Value: strconv.FormatInt(report.syncScoreSum, 10) + "/" + strconv.FormatInt(report.totalSyncScore, 10)}, - utils.Attribute{Key: "session_id", Value: qoSMutatorRelaySuccess.sessionId}, - utils.Attribute{Key: "provider", Value: qoSMutatorRelaySuccess.providerAddress}, + utils.LogAttr("sync", report.lastQoSReport.Sync), + utils.LogAttr("block_diff", qoSMutatorRelaySuccess.blockHeightDiff), + utils.LogAttr("sync_score", strconv.FormatInt(report.syncScoreSum, 10)+"/"+strconv.FormatInt(report.totalSyncScore, 10)), + utils.LogAttr("session_id", qoSMutatorRelaySuccess.sessionId), + utils.LogAttr("provider", qoSMutatorRelaySuccess.providerAddress), ) } } else { From 670200dd88a37f0843bec9009a4b977d0f574f78 Mon Sep 17 00:00:00 2001 From: Elad Gildnur Date: Mon, 13 Jan 2025 17:54:48 +0200 Subject: [PATCH 11/24] Fix a small bug --- protocol/qos/qos_manager.go | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/protocol/qos/qos_manager.go b/protocol/qos/qos_manager.go index 9b22f34e1a..6b74eef6c3 100644 --- a/protocol/qos/qos_manager.go +++ b/protocol/qos/qos_manager.go @@ -55,16 +55,7 @@ func (qosManager *QoSManager) fetchOrSetSessionFromMap(epoch uint64, sessionId i qosManager.qosReports[epoch] = make(map[int64]*QoSReport) } if qosManager.qosReports[epoch][sessionId] == nil { - qosManager.qosReports[epoch][sessionId] = &QoSReport{ - lastQoSReport: &pairingtypes.QualityOfServiceReport{}, - lastReputationQoSReport: &pairingtypes.QualityOfServiceReport{}, - lastReputationQoSReportRaw: &pairingtypes.QualityOfServiceReport{}, - latencyScoreList: []sdk.Dec{}, - syncScoreSum: 0, - totalSyncScore: 0, - totalRelays: 0, - answeredRelays: 0, - } + qosManager.qosReports[epoch][sessionId] = &QoSReport{} } return qosManager.qosReports[epoch][sessionId] } From a142c3ae2706c259c09dd9f0b6a4826d506b8c29 Mon Sep 17 00:00:00 2001 From: Elad Gildnur Date: Mon, 13 Jan 2025 18:12:09 +0200 Subject: [PATCH 12/24] Add some more tests --- protocol/qos/qos_manager_test.go | 282 ++++++++++++++++++++-- protocol/qos/qos_mutator_relay_success.go | 4 + 2 files changed, 268 insertions(+), 18 deletions(-) diff --git a/protocol/qos/qos_manager_test.go b/protocol/qos/qos_manager_test.go index f07041b698..bb9a9c61e0 100644 --- a/protocol/qos/qos_manager_test.go +++ b/protocol/qos/qos_manager_test.go @@ -1,6 +1,8 @@ package qos import ( + "math" + "sync" "testing" "time" @@ -73,19 +75,16 @@ func TestSetLastReputationQoSReportRaw(t *testing.T) { require.Equal(t, testReport.Availability, report.Availability) } -func TestConcurrentAccess(t *testing.T) { +func TestMultipleEpochsAndSessions(t *testing.T) { qosManager := NewQoSManager() - epoch := uint64(1) - sessionID := int64(1) - providerAddr := "provider1" - // Run multiple operations concurrently - go func() { - for i := 0; i < 100; i++ { + // Test multiple epochs and sessions simultaneously + for epoch := uint64(1); epoch <= 3; epoch++ { + for sessionID := int64(1); sessionID <= 3; sessionID++ { doneChan := qosManager.CalculateQoS( epoch, sessionID, - providerAddr, + "provider1", 100*time.Millisecond, 200*time.Millisecond, 1, @@ -94,19 +93,266 @@ func TestConcurrentAccess(t *testing.T) { ) <-doneChan } - }() + } + + // Verify each epoch/session combination + for epoch := uint64(1); epoch <= 3; epoch++ { + for sessionID := int64(1); sessionID <= 3; sessionID++ { + require.Equal(t, uint64(1), qosManager.GetTotalRelays(epoch, sessionID)) + require.NotNil(t, qosManager.GetLastQoSReport(epoch, sessionID)) + } + } +} - go func() { - for i := 0; i < 50; i++ { - doneChan := qosManager.AddFailedRelay(epoch, sessionID) +func TestEdgeCaseLatencies(t *testing.T) { + qosManager := NewQoSManager() + epoch := uint64(1) + sessionID := int64(1) + + testCases := []struct { + name string + latency time.Duration + expectedLatency time.Duration + }{ + {"Zero Latency", 0, 100 * time.Millisecond}, + {"Extremely High Latency", 24 * time.Hour, 100 * time.Millisecond}, + {"Negative Expected Latency", 100 * time.Millisecond, -100 * time.Millisecond}, + {"Equal Latencies", 100 * time.Millisecond, 100 * time.Millisecond}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + doneChan := qosManager.CalculateQoS( + epoch, + sessionID, + "provider1", + tc.latency, + tc.expectedLatency, + 1, + 3, + 2, + ) <-doneChan + require.NotNil(t, qosManager.GetLastQoSReport(epoch, sessionID)) + }) + } +} + +func TestNilReportHandling(t *testing.T) { + qosManager := NewQoSManager() + epoch := uint64(1) + sessionID := int64(1) + + // Test setting nil report + doneChan := qosManager.SetLastReputationQoSReportRaw(epoch, sessionID, nil) + <-doneChan + + // Verify nil handling + report := qosManager.GetLastReputationQoSReportRaw(epoch, sessionID) + require.Nil(t, report) + + // Test non-existent epoch/session + require.Nil(t, qosManager.GetLastQoSReport(999, 999)) + require.Equal(t, uint64(0), qosManager.GetTotalRelays(999, 999)) + require.Equal(t, uint64(0), qosManager.GetAnsweredRelays(999, 999)) +} + +func TestHighConcurrencyScenario(t *testing.T) { + qosManager := NewQoSManager() + numGoroutines := 10 + operationsPerGoroutine := 1000 + + var wg sync.WaitGroup + wg.Add(numGoroutines * 3) // 3 different operation types + + // Launch multiple goroutines for CalculateQoS + for i := 0; i < numGoroutines; i++ { + go func(routineID int) { + defer wg.Done() + for j := 0; j < operationsPerGoroutine; j++ { + doneChan := qosManager.CalculateQoS( + uint64(routineID), + int64(j), + "provider1", + 100*time.Millisecond, + 200*time.Millisecond, + 1, + 3, + 2, + ) + <-doneChan + } + }(i) + } + + // Launch multiple goroutines for AddFailedRelay + for i := 0; i < numGoroutines; i++ { + go func(routineID int) { + defer wg.Done() + for j := 0; j < operationsPerGoroutine; j++ { + doneChan := qosManager.AddFailedRelay(uint64(routineID), int64(j)) + <-doneChan + } + }(i) + } + + // Launch multiple goroutines for SetLastReputationQoSReportRaw + for i := 0; i < numGoroutines; i++ { + go func(routineID int) { + defer wg.Done() + for j := 0; j < operationsPerGoroutine; j++ { + report := &pairingtypes.QualityOfServiceReport{ + Latency: sdk.NewDec(95), + Availability: sdk.NewDec(100), + } + doneChan := qosManager.SetLastReputationQoSReportRaw(uint64(routineID), int64(j), report) + <-doneChan + } + }(i) + } + + wg.Wait() + + // Verify some results + for i := 0; i < numGoroutines; i++ { + for j := 0; j < operationsPerGoroutine; j++ { + totalRelays := qosManager.GetTotalRelays(uint64(i), int64(j)) + require.Equal(t, uint64(2), totalRelays) // 1 successful + 1 failed relay + require.NotNil(t, qosManager.GetLastReputationQoSReportRaw(uint64(i), int64(j))) } - }() + } +} - // Give time for goroutines to complete - time.Sleep(100 * time.Millisecond) +func TestQoSParameterBoundaries(t *testing.T) { + qosManager := NewQoSManager() + epoch := uint64(1) + sessionID := int64(1) - // Verify the total number of relays - totalRelays := qosManager.GetTotalRelays(epoch, sessionID) - require.Equal(t, uint64(150), totalRelays) // 100 successful + 50 failed + testCases := []struct { + name string + latency time.Duration + expectedLatency time.Duration + blockHeightDiff int64 + numOfProviders int + servicersToCount int64 + }{ + {"Max Values", time.Duration(math.MaxInt64), time.Duration(math.MaxInt64), math.MaxInt, math.MaxInt, math.MaxInt}, + {"Min Values", 1, 1, 1, 1, 1}, + {"Zero Values", 0, 0, 0, 0, 0}, + {"Inverted Weights", 100 * time.Millisecond, 100 * time.Millisecond, 10, 5, 7}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + doneChan := qosManager.CalculateQoS( + epoch, + sessionID, + "provider1", + tc.latency, + tc.expectedLatency, + tc.blockHeightDiff, + tc.numOfProviders, + tc.servicersToCount, + ) + <-doneChan + // Verify that the manager doesn't panic and returns a report + report := qosManager.GetLastQoSReport(epoch, sessionID) + require.NotNil(t, report) + }) + } +} + +func TestSequentialOperations(t *testing.T) { + qosManager := NewQoSManager() + epoch := uint64(1) + sessionID := int64(1) + + t.Run("Sequential QoS Calculations", func(t *testing.T) { + // First calculation + doneChan := qosManager.CalculateQoS( + epoch, + sessionID, + "provider1", + 100*time.Millisecond, + 200*time.Millisecond, + 1, 3, 2, + ) + <-doneChan + firstReport := qosManager.GetLastQoSReport(epoch, sessionID) + + // Second calculation with different values + doneChan = qosManager.CalculateQoS( + epoch, + sessionID, + "provider1", + 300*time.Millisecond, + 200*time.Millisecond, + 1, 3, 2, + ) + <-doneChan + secondReport := qosManager.GetLastQoSReport(epoch, sessionID) + + require.NotEqual(t, firstReport, secondReport, "Reports should be different") + require.Equal(t, uint64(2), qosManager.GetTotalRelays(epoch, sessionID)) + }) + + t.Run("Mixed Operations Sequence", func(t *testing.T) { + // Reset with new epoch + epoch++ + + // Sequence: Calculate -> Fail -> Calculate + doneChan := qosManager.CalculateQoS( + epoch, + sessionID, + "provider1", + 100*time.Millisecond, + 200*time.Millisecond, + 1, 3, 2, + ) + <-doneChan + + doneChan = qosManager.AddFailedRelay(epoch, sessionID) + <-doneChan + + doneChan = qosManager.CalculateQoS( + epoch, + sessionID, + "provider1", + 100*time.Millisecond, + 200*time.Millisecond, + 1, 3, 2, + ) + <-doneChan + + require.Equal(t, uint64(3), qosManager.GetTotalRelays(epoch, sessionID)) + require.Equal(t, uint64(2), qosManager.GetAnsweredRelays(epoch, sessionID)) + }) +} + +func TestMemoryManagement(t *testing.T) { + qosManager := NewQoSManager() + + // Create data for multiple epochs + for epoch := uint64(1); epoch <= 100; epoch++ { + doneChan := qosManager.CalculateQoS( + epoch, + 1, + "provider1", + 100*time.Millisecond, + 200*time.Millisecond, + 1, 3, 2, + ) + <-doneChan + } + + // Verify old data is not taking up memory (if cleanup is implemented) + // Note: This test might need adjustment based on actual cleanup implementation + t.Run("Memory Cleanup", func(t *testing.T) { + // Add implementation-specific verification here + // For example, verify that very old epochs are cleaned up + veryOldEpoch := uint64(1) + report := qosManager.GetLastQoSReport(veryOldEpoch, 1) + require.Nil(t, report, "Old epoch data should be cleaned up") + t.Log("Memory cleanup behavior should be verified based on implementation") + }) } diff --git a/protocol/qos/qos_mutator_relay_success.go b/protocol/qos/qos_mutator_relay_success.go index 66e1cbb388..9b4452d164 100644 --- a/protocol/qos/qos_mutator_relay_success.go +++ b/protocol/qos/qos_mutator_relay_success.go @@ -51,6 +51,10 @@ func (qoSMutatorRelaySuccess *QoSMutatorRelaySuccess) Mutate(report *QoSReport) ) } + if qoSMutatorRelaySuccess.latency == 0 { + qoSMutatorRelaySuccess.latency = 1 * time.Microsecond + } + latencyScore := sdk.MinDec(sdk.OneDec(), sdk.NewDecFromInt(sdk.NewInt(int64(qoSMutatorRelaySuccess.expectedLatency))).Quo(sdk.NewDecFromInt(sdk.NewInt(int64(qoSMutatorRelaySuccess.latency))))) insertSorted := func(list []sdk.Dec, value sdk.Dec) []sdk.Dec { From a6c2edb6bcf5ce661d9005964dab8ea5e002cc4c Mon Sep 17 00:00:00 2001 From: Elad Gildnur Date: Mon, 13 Jan 2025 18:42:09 +0200 Subject: [PATCH 13/24] Fix TestReportsClientFlows --- .../metrics/consumer_reports_client_test.go | 15 +++- utils/channeled_wait_group.go | 22 +++++ utils/channeled_wait_group_test.go | 90 +++++++++++++++++++ 3 files changed, 126 insertions(+), 1 deletion(-) create mode 100644 utils/channeled_wait_group.go create mode 100644 utils/channeled_wait_group_test.go diff --git a/protocol/metrics/consumer_reports_client_test.go b/protocol/metrics/consumer_reports_client_test.go index dfbc630290..570e81ac20 100644 --- a/protocol/metrics/consumer_reports_client_test.go +++ b/protocol/metrics/consumer_reports_client_test.go @@ -8,12 +8,15 @@ import ( "testing" "time" + "github.com/lavanet/lava/v4/utils" pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" "github.com/stretchr/testify/require" ) func TestReportsClientFlows(t *testing.T) { t.Run("one-shot", func(t *testing.T) { + serverWaitGroup := utils.NewChanneledWaitGroup() + serverWaitGroup.Add(3) // 2 reports + 1 conflict messages := []map[string]interface{}{} reqMap := []map[string]interface{}{} serverHandle := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -26,6 +29,9 @@ func TestReportsClientFlows(t *testing.T) { reqMap = []map[string]interface{}{} w.WriteHeader(http.StatusOK) fmt.Fprint(w, `{"jsonrpc":"2.0","id":1,"result":"0x10a7a08"}`) + for range messages { + serverWaitGroup.Done() + } }) mockServer := httptest.NewServer(serverHandle) @@ -45,7 +51,14 @@ func TestReportsClientFlows(t *testing.T) { SigBlocks: []byte{}, Metadata: []pairingtypes.Metadata{}, }, &pairingtypes.RelayRequest{}, &pairingtypes.RelayReply{})) - time.Sleep(110 * time.Millisecond) + + select { + case <-serverWaitGroup.Wait(): + // all done + case <-time.After(2 * time.Second): + t.Fatal("Timeout reached before reports were received") + } + require.Len(t, messages, 3) reports := 0 conflicts := 0 diff --git a/utils/channeled_wait_group.go b/utils/channeled_wait_group.go new file mode 100644 index 0000000000..a480fb2c8a --- /dev/null +++ b/utils/channeled_wait_group.go @@ -0,0 +1,22 @@ +package utils + +import "sync" + +type ChanneledWaitGroup struct { + sync.WaitGroup + doneChan chan struct{} +} + +func NewChanneledWaitGroup() *ChanneledWaitGroup { + return &ChanneledWaitGroup{ + doneChan: make(chan struct{}, 1), + } +} + +func (wg *ChanneledWaitGroup) Wait() <-chan struct{} { + go func() { + wg.WaitGroup.Wait() + wg.doneChan <- struct{}{} + }() + return wg.doneChan +} diff --git a/utils/channeled_wait_group_test.go b/utils/channeled_wait_group_test.go new file mode 100644 index 0000000000..75a430bb03 --- /dev/null +++ b/utils/channeled_wait_group_test.go @@ -0,0 +1,90 @@ +package utils + +import ( + "testing" + "time" +) + +func TestChanneledWaitGroup(t *testing.T) { + t.Run("basic functionality", func(t *testing.T) { + wg := NewChanneledWaitGroup() + wg.Add(2) + + go func() { + time.Sleep(50 * time.Millisecond) + wg.Done() + }() + + go func() { + time.Sleep(100 * time.Millisecond) + wg.Done() + }() + + select { + case <-wg.Wait(): + // Success + case <-time.After(200 * time.Millisecond): + t.Fatal("timeout waiting for goroutines") + } + }) + + t.Run("zero count should complete immediately", func(t *testing.T) { + wg := NewChanneledWaitGroup() + + select { + case <-wg.Wait(): + // Success + case <-time.After(100 * time.Millisecond): + t.Fatal("should complete immediately with zero count") + } + }) + + t.Run("multiple waits should all receive completion", func(t *testing.T) { + wg := NewChanneledWaitGroup() + wg.Add(1) + + // Start three goroutines waiting + for i := 0; i < 3; i++ { + go func() { + select { + case <-wg.Wait(): + // Success + case <-time.After(200 * time.Millisecond): + t.Error("timeout waiting for completion") + } + }() + } + + time.Sleep(50 * time.Millisecond) // Give waiters time to start + wg.Done() + time.Sleep(100 * time.Millisecond) // Give waiters time to complete + }) + + t.Run("reuse after completion", func(t *testing.T) { + wg := NewChanneledWaitGroup() + wg.Add(1) + wg.Done() + + // First wait should complete + select { + case <-wg.Wait(): + // Success + case <-time.After(100 * time.Millisecond): + t.Fatal("first wait should complete") + } + + // Reset and use again + wg.Add(1) + go func() { + time.Sleep(50 * time.Millisecond) + wg.Done() + }() + + select { + case <-wg.Wait(): + // Success + case <-time.After(100 * time.Millisecond): + t.Fatal("second wait should complete") + } + }) +} From 4c0e7182b77617ab5a4a941a7e55a84797f0b250 Mon Sep 17 00:00:00 2001 From: Elad Gildnur Date: Tue, 14 Jan 2025 11:39:05 +0200 Subject: [PATCH 14/24] Remove redundant test --- protocol/qos/qos_manager_test.go | 38 +++----------------------------- 1 file changed, 3 insertions(+), 35 deletions(-) diff --git a/protocol/qos/qos_manager_test.go b/protocol/qos/qos_manager_test.go index bb9a9c61e0..d5acc7b28f 100644 --- a/protocol/qos/qos_manager_test.go +++ b/protocol/qos/qos_manager_test.go @@ -263,42 +263,10 @@ func TestQoSParameterBoundaries(t *testing.T) { } func TestSequentialOperations(t *testing.T) { - qosManager := NewQoSManager() - epoch := uint64(1) - sessionID := int64(1) - - t.Run("Sequential QoS Calculations", func(t *testing.T) { - // First calculation - doneChan := qosManager.CalculateQoS( - epoch, - sessionID, - "provider1", - 100*time.Millisecond, - 200*time.Millisecond, - 1, 3, 2, - ) - <-doneChan - firstReport := qosManager.GetLastQoSReport(epoch, sessionID) - - // Second calculation with different values - doneChan = qosManager.CalculateQoS( - epoch, - sessionID, - "provider1", - 300*time.Millisecond, - 200*time.Millisecond, - 1, 3, 2, - ) - <-doneChan - secondReport := qosManager.GetLastQoSReport(epoch, sessionID) - - require.NotEqual(t, firstReport, secondReport, "Reports should be different") - require.Equal(t, uint64(2), qosManager.GetTotalRelays(epoch, sessionID)) - }) - t.Run("Mixed Operations Sequence", func(t *testing.T) { - // Reset with new epoch - epoch++ + qosManager := NewQoSManager() + epoch := uint64(1) + sessionID := int64(1) // Sequence: Calculate -> Fail -> Calculate doneChan := qosManager.CalculateQoS( From ecc945552f8b8456941444da12595151929887c1 Mon Sep 17 00:00:00 2001 From: Elad Gildnur Date: Tue, 14 Jan 2025 12:29:27 +0200 Subject: [PATCH 15/24] Save test for later --- protocol/qos/qos_manager_test.go | 55 ++++++++++++++++---------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/protocol/qos/qos_manager_test.go b/protocol/qos/qos_manager_test.go index d5acc7b28f..9cf46dffef 100644 --- a/protocol/qos/qos_manager_test.go +++ b/protocol/qos/qos_manager_test.go @@ -297,30 +297,31 @@ func TestSequentialOperations(t *testing.T) { }) } -func TestMemoryManagement(t *testing.T) { - qosManager := NewQoSManager() - - // Create data for multiple epochs - for epoch := uint64(1); epoch <= 100; epoch++ { - doneChan := qosManager.CalculateQoS( - epoch, - 1, - "provider1", - 100*time.Millisecond, - 200*time.Millisecond, - 1, 3, 2, - ) - <-doneChan - } - - // Verify old data is not taking up memory (if cleanup is implemented) - // Note: This test might need adjustment based on actual cleanup implementation - t.Run("Memory Cleanup", func(t *testing.T) { - // Add implementation-specific verification here - // For example, verify that very old epochs are cleaned up - veryOldEpoch := uint64(1) - report := qosManager.GetLastQoSReport(veryOldEpoch, 1) - require.Nil(t, report, "Old epoch data should be cleaned up") - t.Log("Memory cleanup behavior should be verified based on implementation") - }) -} +// TODO: Enable this test when we register the QoSManager to epoch updater +// func TestMemoryManagement(t *testing.T) { +// qosManager := NewQoSManager() + +// // Create data for multiple epochs +// for epoch := uint64(1); epoch <= 100; epoch++ { +// doneChan := qosManager.CalculateQoS( +// epoch, +// 1, +// "provider1", +// 100*time.Millisecond, +// 200*time.Millisecond, +// 1, 3, 2, +// ) +// <-doneChan +// } + +// // Verify old data is not taking up memory (if cleanup is implemented) +// // Note: This test might need adjustment based on actual cleanup implementation +// t.Run("Memory Cleanup", func(t *testing.T) { +// // Add implementation-specific verification here +// // For example, verify that very old epochs are cleaned up +// veryOldEpoch := uint64(1) +// report := qosManager.GetLastQoSReport(veryOldEpoch, 1) +// require.Nil(t, report, "Old epoch data should be cleaned up") +// t.Log("Memory cleanup behavior should be verified based on implementation") +// }) +// } From 1e85a9fb57f27a57d8052682f10ebdebcf02e1f5 Mon Sep 17 00:00:00 2001 From: Elad Gildnur Date: Tue, 14 Jan 2025 13:57:09 +0200 Subject: [PATCH 16/24] Small fix --- protocol/lavasession/single_consumer_session.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/protocol/lavasession/single_consumer_session.go b/protocol/lavasession/single_consumer_session.go index 1cf4650587..25c9a1663c 100644 --- a/protocol/lavasession/single_consumer_session.go +++ b/protocol/lavasession/single_consumer_session.go @@ -45,7 +45,7 @@ func (cs *SingleConsumerSession) getQosComputedResultOrZero() sdk.Dec { return computedReputation } utils.LavaFormatDebug("Failed computing QoS used for error parsing, could happen if we have no sync data or one of the fields is zero", - utils.LogAttr("Report", cs.QoSManager.GetLastReputationQoSReportRaw(cs.epoch, cs.SessionId)), + utils.LogAttr("Report", lastReputationReport), utils.LogAttr("error", errComputing), ) } From fc67e266351b911caa14eec4a4e4f15ea85b5952 Mon Sep 17 00:00:00 2001 From: Elad Gildnur Date: Tue, 14 Jan 2025 13:59:42 +0200 Subject: [PATCH 17/24] Fix another bug --- .../lavasession/single_consumer_session.go | 2 +- protocol/qos/qos_manager.go | 9 +++++++++ protocol/qos/qos_mutator_set_reputation.go | 18 ++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 protocol/qos/qos_mutator_set_reputation.go diff --git a/protocol/lavasession/single_consumer_session.go b/protocol/lavasession/single_consumer_session.go index 25c9a1663c..7ff919c7c2 100644 --- a/protocol/lavasession/single_consumer_session.go +++ b/protocol/lavasession/single_consumer_session.go @@ -57,7 +57,7 @@ func (scs *SingleConsumerSession) SetUsageForSession(cuNeededForSession uint64, scs.RelayNum += RelayNumberIncrement // increase relayNum if scs.RelayNum > 1 { // we only set reputation for sessions with more than one successful relays, this guarantees data within the epoch exists - scs.QoSManager.SetLastReputationQoSReportRaw(scs.epoch, scs.SessionId, reputationReport) + scs.QoSManager.SetLastReputationQoSReport(scs.epoch, scs.SessionId, reputationReport) scs.QoSManager.SetLastReputationQoSReportRaw(scs.epoch, scs.SessionId, rawReputationReport) } scs.usedProviders = usedProviders diff --git a/protocol/qos/qos_manager.go b/protocol/qos/qos_manager.go index 6b74eef6c3..159006e28b 100644 --- a/protocol/qos/qos_manager.go +++ b/protocol/qos/qos_manager.go @@ -101,6 +101,15 @@ func (qosManager *QoSManager) SetLastReputationQoSReportRaw(epoch uint64, sessio return doneChan } +func (qosManager *QoSManager) SetLastReputationQoSReport(epoch uint64, sessionId int64, report *pairingtypes.QualityOfServiceReport) DoneChan { + qosMutatorBase, doneChan := qosManager.createQoSMutatorBase(epoch, sessionId) + qosManager.mutatorsQueue <- &QoSMutatorSetReputation{ + QoSMutatorBase: *qosMutatorBase, + report: report, + } + return doneChan +} + func (qosManager *QoSManager) getQoSReport(epoch uint64, sessionId int64) *QoSReport { qosManager.lock.RLock() defer qosManager.lock.RUnlock() diff --git a/protocol/qos/qos_mutator_set_reputation.go b/protocol/qos/qos_mutator_set_reputation.go new file mode 100644 index 0000000000..04e59bdcf5 --- /dev/null +++ b/protocol/qos/qos_mutator_set_reputation.go @@ -0,0 +1,18 @@ +package qos + +import ( + pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" +) + +// Mutator to set usage for a session +type QoSMutatorSetReputation struct { + QoSMutatorBase + report *pairingtypes.QualityOfServiceReport +} + +func (qoSMutatorSetReputation *QoSMutatorSetReputation) Mutate(report *QoSReport) { + defer func() { + qoSMutatorSetReputation.doneChan <- struct{}{} + }() + report.lastReputationQoSReport = qoSMutatorSetReputation.report +} From b60acc7cd6b612fbcf75d43fecfae47d0d01c638 Mon Sep 17 00:00:00 2001 From: Elad Gildnur Date: Mon, 20 Jan 2025 14:29:51 +0200 Subject: [PATCH 18/24] Post merge fix --- protocol/lavaprotocol/request_builder.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/protocol/lavaprotocol/request_builder.go b/protocol/lavaprotocol/request_builder.go index a7fd0246dc..b431298568 100644 --- a/protocol/lavaprotocol/request_builder.go +++ b/protocol/lavaprotocol/request_builder.go @@ -75,7 +75,7 @@ func ConstructRelaySession(lavaChainID string, relayRequestData *pairingtypes.Re copiedReputation := copyQoSServiceReport(singleConsumerSession.QoSManager.GetLastReputationQoSReportRaw(uint64(epoch), singleConsumerSession.SessionId)) // copy raw report for the node // validate and fix QoS excellence report before sending it to the node - copiedExcellenceQOS.ValidateAndFixQoSExcellence() + copiedReputation.ValidateAndFixQoSExcellence() return &pairingtypes.RelaySession{ SpecId: chainID, From bc97cc53aa22a55113ed5faa1f9161243bf27045 Mon Sep 17 00:00:00 2001 From: Elad Gildnur Date: Wed, 22 Jan 2025 14:21:47 +0200 Subject: [PATCH 19/24] Remove obsolete QoS Raw --- protocol/lavaprotocol/request_builder.go | 2 +- .../lavasession/consumer_session_manager.go | 2 +- .../lavasession/single_consumer_session.go | 2 +- protocol/qos/qos_manager.go | 30 +++++++------------ protocol/qos/qos_manager_test.go | 16 +++++----- .../qos/qos_mutator_set_reputation_raw.go | 18 ----------- 6 files changed, 21 insertions(+), 49 deletions(-) delete mode 100644 protocol/qos/qos_mutator_set_reputation_raw.go diff --git a/protocol/lavaprotocol/request_builder.go b/protocol/lavaprotocol/request_builder.go index 322e9c5591..dff9090259 100644 --- a/protocol/lavaprotocol/request_builder.go +++ b/protocol/lavaprotocol/request_builder.go @@ -72,7 +72,7 @@ func ConstructRelaySession(lavaChainID string, relayRequestData *pairingtypes.Re } copiedQOS := copyQoSServiceReport(singleConsumerSession.QoSManager.GetLastQoSReport(uint64(epoch), singleConsumerSession.SessionId)) - copiedReputation := copyQoSServiceReport(singleConsumerSession.QoSManager.GetLastReputationQoSReportRaw(uint64(epoch), singleConsumerSession.SessionId)) // copy raw report for the node + copiedReputation := copyQoSServiceReport(singleConsumerSession.QoSManager.GetLastReputationQoSReport(uint64(epoch), singleConsumerSession.SessionId)) // copy reputation report for the node return &pairingtypes.RelaySession{ SpecId: chainID, diff --git a/protocol/lavasession/consumer_session_manager.go b/protocol/lavasession/consumer_session_manager.go index 44c4c616e2..d15d38d670 100644 --- a/protocol/lavasession/consumer_session_manager.go +++ b/protocol/lavasession/consumer_session_manager.go @@ -1075,7 +1075,7 @@ func (csm *ConsumerSessionManager) updateMetricsManager(consumerSession *SingleC } var lastReputation *pairingtypes.QualityOfServiceReport - lastReputationReport := consumerSession.QoSManager.GetLastReputationQoSReportRaw(csm.atomicReadCurrentEpoch(), consumerSession.SessionId) + lastReputationReport := consumerSession.QoSManager.GetLastReputationQoSReport(csm.atomicReadCurrentEpoch(), consumerSession.SessionId) if lastReputationReport != nil { qosRep := *lastReputationReport lastReputation = &qosRep diff --git a/protocol/lavasession/single_consumer_session.go b/protocol/lavasession/single_consumer_session.go index 646aa1ab44..1d71889a5f 100644 --- a/protocol/lavasession/single_consumer_session.go +++ b/protocol/lavasession/single_consumer_session.go @@ -38,7 +38,7 @@ func (cs *SingleConsumerSession) CalculateExpectedLatency(timeoutGivenToRelay ti // cs should be locked here to use this method, returns the computed qos or zero if last qos is nil or failed to compute. func (cs *SingleConsumerSession) getQosComputedResultOrZero() sdk.Dec { - lastReputationReport := cs.QoSManager.GetLastReputationQoSReportRaw(cs.epoch, cs.SessionId) + lastReputationReport := cs.QoSManager.GetLastReputationQoSReport(cs.epoch, cs.SessionId) if lastReputationReport != nil { computedReputation, errComputing := lastReputationReport.ComputeQoSExcellence() if errComputing == nil { // if we failed to compute the qos will be 0 so this provider wont be picked to return the error in case we get it diff --git a/protocol/qos/qos_manager.go b/protocol/qos/qos_manager.go index 159006e28b..2640b04166 100644 --- a/protocol/qos/qos_manager.go +++ b/protocol/qos/qos_manager.go @@ -9,15 +9,14 @@ import ( ) type QoSReport struct { - lastQoSReport *pairingtypes.QualityOfServiceReport - lastReputationQoSReport *pairingtypes.QualityOfServiceReport - lastReputationQoSReportRaw *pairingtypes.QualityOfServiceReport - latencyScoreList []sdk.Dec - syncScoreSum int64 - totalSyncScore int64 - totalRelays uint64 - answeredRelays uint64 - lock sync.RWMutex + lastQoSReport *pairingtypes.QualityOfServiceReport + lastReputationQoSReport *pairingtypes.QualityOfServiceReport + latencyScoreList []sdk.Dec + syncScoreSum int64 + totalSyncScore int64 + totalRelays uint64 + answeredRelays uint64 + lock sync.RWMutex } type DoneChan <-chan struct{} @@ -92,15 +91,6 @@ func (qosManager *QoSManager) AddFailedRelay(epoch uint64, sessionId int64) Done return doneChan } -func (qosManager *QoSManager) SetLastReputationQoSReportRaw(epoch uint64, sessionId int64, report *pairingtypes.QualityOfServiceReport) DoneChan { - qosMutatorBase, doneChan := qosManager.createQoSMutatorBase(epoch, sessionId) - qosManager.mutatorsQueue <- &QoSMutatorSetReputationRaw{ - QoSMutatorBase: *qosMutatorBase, - report: report, - } - return doneChan -} - func (qosManager *QoSManager) SetLastReputationQoSReport(epoch uint64, sessionId int64, report *pairingtypes.QualityOfServiceReport) DoneChan { qosMutatorBase, doneChan := qosManager.createQoSMutatorBase(epoch, sessionId) qosManager.mutatorsQueue <- &QoSMutatorSetReputation{ @@ -130,7 +120,7 @@ func (qosManager *QoSManager) GetLastQoSReport(epoch uint64, sessionId int64) *p return qosReport.lastQoSReport } -func (qosManager *QoSManager) GetLastReputationQoSReportRaw(epoch uint64, sessionId int64) *pairingtypes.QualityOfServiceReport { +func (qosManager *QoSManager) GetLastReputationQoSReport(epoch uint64, sessionId int64) *pairingtypes.QualityOfServiceReport { qosReport := qosManager.getQoSReport(epoch, sessionId) if qosReport == nil { return nil @@ -138,7 +128,7 @@ func (qosManager *QoSManager) GetLastReputationQoSReportRaw(epoch uint64, sessio qosReport.lock.RLock() defer qosReport.lock.RUnlock() - return qosReport.lastReputationQoSReportRaw + return qosReport.lastReputationQoSReport } func (qosManager *QoSManager) GetAnsweredRelays(epoch uint64, sessionId int64) uint64 { diff --git a/protocol/qos/qos_manager_test.go b/protocol/qos/qos_manager_test.go index 9cf46dffef..61f9d7ac5f 100644 --- a/protocol/qos/qos_manager_test.go +++ b/protocol/qos/qos_manager_test.go @@ -56,7 +56,7 @@ func TestAddFailedRelay(t *testing.T) { require.Equal(t, uint64(0), answeredRelays) } -func TestSetLastReputationQoSReportRaw(t *testing.T) { +func TestSetLastReputationQoSReport(t *testing.T) { qosManager := NewQoSManager() epoch := uint64(1) sessionID := int64(1) @@ -66,10 +66,10 @@ func TestSetLastReputationQoSReportRaw(t *testing.T) { Availability: sdk.NewDec(100), } - doneChan := qosManager.SetLastReputationQoSReportRaw(epoch, sessionID, testReport) + doneChan := qosManager.SetLastReputationQoSReport(epoch, sessionID, testReport) <-doneChan // Wait for processing - report := qosManager.GetLastReputationQoSReportRaw(epoch, sessionID) + report := qosManager.GetLastReputationQoSReport(epoch, sessionID) require.NotNil(t, report) require.Equal(t, testReport.Latency, report.Latency) require.Equal(t, testReport.Availability, report.Availability) @@ -144,11 +144,11 @@ func TestNilReportHandling(t *testing.T) { sessionID := int64(1) // Test setting nil report - doneChan := qosManager.SetLastReputationQoSReportRaw(epoch, sessionID, nil) + doneChan := qosManager.SetLastReputationQoSReport(epoch, sessionID, nil) <-doneChan // Verify nil handling - report := qosManager.GetLastReputationQoSReportRaw(epoch, sessionID) + report := qosManager.GetLastReputationQoSReport(epoch, sessionID) require.Nil(t, report) // Test non-existent epoch/session @@ -196,7 +196,7 @@ func TestHighConcurrencyScenario(t *testing.T) { }(i) } - // Launch multiple goroutines for SetLastReputationQoSReportRaw + // Launch multiple goroutines for SetLastReputationQoSReport for i := 0; i < numGoroutines; i++ { go func(routineID int) { defer wg.Done() @@ -205,7 +205,7 @@ func TestHighConcurrencyScenario(t *testing.T) { Latency: sdk.NewDec(95), Availability: sdk.NewDec(100), } - doneChan := qosManager.SetLastReputationQoSReportRaw(uint64(routineID), int64(j), report) + doneChan := qosManager.SetLastReputationQoSReport(uint64(routineID), int64(j), report) <-doneChan } }(i) @@ -218,7 +218,7 @@ func TestHighConcurrencyScenario(t *testing.T) { for j := 0; j < operationsPerGoroutine; j++ { totalRelays := qosManager.GetTotalRelays(uint64(i), int64(j)) require.Equal(t, uint64(2), totalRelays) // 1 successful + 1 failed relay - require.NotNil(t, qosManager.GetLastReputationQoSReportRaw(uint64(i), int64(j))) + require.NotNil(t, qosManager.GetLastReputationQoSReport(uint64(i), int64(j))) } } } diff --git a/protocol/qos/qos_mutator_set_reputation_raw.go b/protocol/qos/qos_mutator_set_reputation_raw.go deleted file mode 100644 index e7147f1485..0000000000 --- a/protocol/qos/qos_mutator_set_reputation_raw.go +++ /dev/null @@ -1,18 +0,0 @@ -package qos - -import ( - pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" -) - -// Mutator to set usage for a session -type QoSMutatorSetReputationRaw struct { - QoSMutatorBase - report *pairingtypes.QualityOfServiceReport -} - -func (qoSMutatorSetReputationRaw *QoSMutatorSetReputationRaw) Mutate(report *QoSReport) { - defer func() { - qoSMutatorSetReputationRaw.doneChan <- struct{}{} - }() - report.lastReputationQoSReportRaw = qoSMutatorSetReputationRaw.report -} From 3f6a5cb8586bc81fa3a6699bcfca3b0a23a4f4b3 Mon Sep 17 00:00:00 2001 From: Ran Mishael Date: Tue, 28 Jan 2025 15:05:44 +0100 Subject: [PATCH 20/24] fixing channel risky handling. --- .../finalization_consensus_test.go | 22 +--- protocol/qos/qos_manager.go | 106 +++++------------- protocol/qos/qos_manager_test.go | 47 +++----- protocol/qos/qos_mutator_base.go | 5 +- protocol/qos/qos_mutator_relay_failure.go | 5 +- protocol/qos/qos_mutator_relay_success.go | 6 +- protocol/qos/qos_mutator_set_reputation.go | 5 +- protocol/qos/qos_report.go | 61 ++++++++++ 8 files changed, 114 insertions(+), 143 deletions(-) create mode 100644 protocol/qos/qos_report.go diff --git a/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go b/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go index 0dc7eea0ea..4f1bd7b552 100644 --- a/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go +++ b/protocol/lavaprotocol/finalizationconsensus/finalization_consensus_test.go @@ -207,14 +207,6 @@ func TestQoS(t *testing.T) { rand.InitRandomSeed() chainsToTest := []string{"APT1", "LAV1", "ETH1"} - waitForDoneChan := func(doneChan <-chan struct{}) { - select { - case <-doneChan: - case <-time.After(5 * time.Second): - t.Fatal("timeout waiting for qos calculation to finish") - } - } - for i := 0; i < 10; i++ { for _, chainID := range chainsToTest { t.Run(chainID, func(t *testing.T) { @@ -292,7 +284,7 @@ func TestQoS(t *testing.T) { currentLatency := time.Millisecond expectedLatency := time.Millisecond latestServicedBlock := expectedBH - waitForDoneChan(singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1)) + singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1) require.Equal(t, uint64(1), singleConsumerSession.QoSManager.GetAnsweredRelays(epoch, singleConsumerSession.SessionId)) require.Equal(t, uint64(1), singleConsumerSession.QoSManager.GetTotalRelays(epoch, singleConsumerSession.SessionId)) require.Equal(t, int64(1), singleConsumerSession.QoSManager.GetSyncScoreSum(epoch, singleConsumerSession.SessionId)) @@ -304,7 +296,7 @@ func TestQoS(t *testing.T) { require.Equal(t, sdk.OneDec(), lastQoSReport.Latency) latestServicedBlock = expectedBH + 1 - waitForDoneChan(singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1)) + singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1) require.Equal(t, uint64(2), singleConsumerSession.QoSManager.GetAnsweredRelays(epoch, singleConsumerSession.SessionId)) require.Equal(t, uint64(2), singleConsumerSession.QoSManager.GetTotalRelays(epoch, singleConsumerSession.SessionId)) require.Equal(t, int64(2), singleConsumerSession.QoSManager.GetSyncScoreSum(epoch, singleConsumerSession.SessionId)) @@ -315,8 +307,8 @@ func TestQoS(t *testing.T) { require.Equal(t, sdk.OneDec(), lastQoSReport.Sync) require.Equal(t, sdk.OneDec(), lastQoSReport.Latency) - waitForDoneChan(singleConsumerSession.QoSManager.AddFailedRelay(epoch, singleConsumerSession.SessionId)) // this is how we add a failure - waitForDoneChan(singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1)) + singleConsumerSession.QoSManager.AddFailedRelay(epoch, singleConsumerSession.SessionId) // this is how we add a failure + singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, 1) require.Equal(t, uint64(3), singleConsumerSession.QoSManager.GetAnsweredRelays(epoch, singleConsumerSession.SessionId)) require.Equal(t, uint64(4), singleConsumerSession.QoSManager.GetTotalRelays(epoch, singleConsumerSession.SessionId)) require.Equal(t, int64(3), singleConsumerSession.QoSManager.GetSyncScoreSum(epoch, singleConsumerSession.SessionId)) @@ -328,7 +320,7 @@ func TestQoS(t *testing.T) { require.Equal(t, sdk.OneDec(), lastQoSReport.Latency) latestServicedBlock = expectedBH - 1 // is one block below threshold - waitForDoneChan(singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency*2, expectedBH-latestServicedBlock, numOfProviders, 1)) + singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency*2, expectedBH-latestServicedBlock, numOfProviders, 1) require.Equal(t, uint64(4), singleConsumerSession.QoSManager.GetAnsweredRelays(epoch, singleConsumerSession.SessionId)) require.Equal(t, uint64(5), singleConsumerSession.QoSManager.GetTotalRelays(epoch, singleConsumerSession.SessionId)) require.Equal(t, int64(3), singleConsumerSession.QoSManager.GetSyncScoreSum(epoch, singleConsumerSession.SessionId)) @@ -341,11 +333,9 @@ func TestQoS(t *testing.T) { latestServicedBlock = expectedBH + 1 // add in a loop so availability goes above 95% - doneChan := make(<-chan struct{}) for i := 5; i < 100; i++ { - doneChan = singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency*2, expectedBH-latestServicedBlock, numOfProviders, 1) + singleConsumerSession.QoSManager.CalculateQoS(epoch, singleConsumerSession.SessionId, "", currentLatency, expectedLatency*2, expectedBH-latestServicedBlock, numOfProviders, 1) } - waitForDoneChan(doneChan) lastQoSReport = singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId) require.Equal(t, sdk.MustNewDecFromStr("0.8"), lastQoSReport.Availability) // because availability below 95% is 0 diff --git a/protocol/qos/qos_manager.go b/protocol/qos/qos_manager.go index 2640b04166..89b5a39f44 100644 --- a/protocol/qos/qos_manager.go +++ b/protocol/qos/qos_manager.go @@ -4,49 +4,20 @@ import ( "sync" "time" - sdk "github.com/cosmos/cosmos-sdk/types" pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" ) -type QoSReport struct { - lastQoSReport *pairingtypes.QualityOfServiceReport - lastReputationQoSReport *pairingtypes.QualityOfServiceReport - latencyScoreList []sdk.Dec - syncScoreSum int64 - totalSyncScore int64 - totalRelays uint64 - answeredRelays uint64 - lock sync.RWMutex -} - -type DoneChan <-chan struct{} - type QoSManager struct { - qosReports map[uint64]map[int64]*QoSReport // first key is the epoch, second key is the session id - mutatorsQueue chan Mutator - lock sync.RWMutex + qosReports map[uint64]map[int64]*QoSReport // first key is the epoch, second key is the session id + lock sync.RWMutex } func NewQoSManager() *QoSManager { qosManager := &QoSManager{} qosManager.qosReports = make(map[uint64]map[int64]*QoSReport) - qosManager.mutatorsQueue = make(chan Mutator, 10000000) // Buffer of 10 Million mutators - go qosManager.processMutations() return qosManager } -func (qosManager *QoSManager) processMutations() { - for mutator := range qosManager.mutatorsQueue { - epoch, sessionId := mutator.GetEpochAndSessionId() - qosReport := qosManager.fetchOrSetSessionFromMap(epoch, sessionId) - func() { - qosReport.lock.Lock() - defer qosReport.lock.Unlock() - mutator.Mutate(qosReport) - }() - } -} - func (qosManager *QoSManager) fetchOrSetSessionFromMap(epoch uint64, sessionId int64) *QoSReport { qosManager.lock.Lock() defer qosManager.lock.Unlock() @@ -59,51 +30,48 @@ func (qosManager *QoSManager) fetchOrSetSessionFromMap(epoch uint64, sessionId i return qosManager.qosReports[epoch][sessionId] } -func (qosManager *QoSManager) createQoSMutatorBase(epoch uint64, sessionId int64) (*QoSMutatorBase, chan struct{}) { - doneChan := make(chan struct{}, 1) // Must be buffered to avoid freezing the queue +func (qosManager *QoSManager) createQoSMutatorBase(epoch uint64, sessionId int64) *QoSMutatorBase { qosMutatorBase := &QoSMutatorBase{ epoch: epoch, sessionId: sessionId, - doneChan: doneChan, } - return qosMutatorBase, doneChan + return qosMutatorBase +} + +func (qm *QoSManager) mutate(mutator Mutator) { + qosReport := qm.fetchOrSetSessionFromMap(mutator.GetEpochAndSessionId()) + qosReport.mutate(mutator) } -func (qosManager *QoSManager) CalculateQoS(epoch uint64, sessionId int64, providerAddress string, latency, expectedLatency time.Duration, blockHeightDiff int64, numOfProviders int, servicersToCount int64) DoneChan { - qosMutatorBase, doneChan := qosManager.createQoSMutatorBase(epoch, sessionId) - qosManager.mutatorsQueue <- &QoSMutatorRelaySuccess{ - QoSMutatorBase: *qosMutatorBase, +func (qosManager *QoSManager) CalculateQoS(epoch uint64, sessionId int64, providerAddress string, latency, expectedLatency time.Duration, blockHeightDiff int64, numOfProviders int, servicersToCount int64) { + qosManager.mutate(&QoSMutatorRelaySuccess{ + QoSMutatorBase: qosManager.createQoSMutatorBase(epoch, sessionId), providerAddress: providerAddress, latency: latency, expectedLatency: expectedLatency, blockHeightDiff: blockHeightDiff, numOfProviders: numOfProviders, servicersToCount: servicersToCount, - } - return doneChan + }) } -func (qosManager *QoSManager) AddFailedRelay(epoch uint64, sessionId int64) DoneChan { - qosMutatorBase, doneChan := qosManager.createQoSMutatorBase(epoch, sessionId) - qosManager.mutatorsQueue <- &QoSMutatorRelayFailure{ - QoSMutatorBase: *qosMutatorBase, - } - return doneChan +func (qosManager *QoSManager) AddFailedRelay(epoch uint64, sessionId int64) { + qosManager.mutate(&QoSMutatorRelayFailure{ + QoSMutatorBase: qosManager.createQoSMutatorBase(epoch, sessionId), + }) } -func (qosManager *QoSManager) SetLastReputationQoSReport(epoch uint64, sessionId int64, report *pairingtypes.QualityOfServiceReport) DoneChan { - qosMutatorBase, doneChan := qosManager.createQoSMutatorBase(epoch, sessionId) - qosManager.mutatorsQueue <- &QoSMutatorSetReputation{ - QoSMutatorBase: *qosMutatorBase, +func (qosManager *QoSManager) SetLastReputationQoSReport(epoch uint64, sessionId int64, report *pairingtypes.QualityOfServiceReport) { + qosManager.mutate(&QoSMutatorSetReputation{ + QoSMutatorBase: qosManager.createQoSMutatorBase(epoch, sessionId), report: report, - } - return doneChan + }) } func (qosManager *QoSManager) getQoSReport(epoch uint64, sessionId int64) *QoSReport { qosManager.lock.RLock() defer qosManager.lock.RUnlock() - if qosManager.qosReports[epoch] == nil || qosManager.qosReports[epoch][sessionId] == nil { + if qosManager.qosReports[epoch] == nil { return nil } return qosManager.qosReports[epoch][sessionId] @@ -114,10 +82,7 @@ func (qosManager *QoSManager) GetLastQoSReport(epoch uint64, sessionId int64) *p if qosReport == nil { return nil } - - qosReport.lock.RLock() - defer qosReport.lock.RUnlock() - return qosReport.lastQoSReport + return qosReport.getLastQoSReport() } func (qosManager *QoSManager) GetLastReputationQoSReport(epoch uint64, sessionId int64) *pairingtypes.QualityOfServiceReport { @@ -125,10 +90,7 @@ func (qosManager *QoSManager) GetLastReputationQoSReport(epoch uint64, sessionId if qosReport == nil { return nil } - - qosReport.lock.RLock() - defer qosReport.lock.RUnlock() - return qosReport.lastReputationQoSReport + return qosReport.getLastReputationQoSReport() } func (qosManager *QoSManager) GetAnsweredRelays(epoch uint64, sessionId int64) uint64 { @@ -136,10 +98,7 @@ func (qosManager *QoSManager) GetAnsweredRelays(epoch uint64, sessionId int64) u if qosReport == nil { return 0 } - - qosReport.lock.RLock() - defer qosReport.lock.RUnlock() - return qosReport.answeredRelays + return qosReport.getAnsweredRelays() } func (qosManager *QoSManager) GetTotalRelays(epoch uint64, sessionId int64) uint64 { @@ -147,10 +106,7 @@ func (qosManager *QoSManager) GetTotalRelays(epoch uint64, sessionId int64) uint if qosReport == nil { return 0 } - - qosReport.lock.RLock() - defer qosReport.lock.RUnlock() - return qosReport.totalRelays + return qosReport.getTotalRelays() } func (qosManager *QoSManager) GetSyncScoreSum(epoch uint64, sessionId int64) int64 { @@ -158,10 +114,7 @@ func (qosManager *QoSManager) GetSyncScoreSum(epoch uint64, sessionId int64) int if qosReport == nil { return 0 } - - qosReport.lock.RLock() - defer qosReport.lock.RUnlock() - return qosReport.syncScoreSum + return qosReport.getSyncScoreSum() } func (qosManager *QoSManager) GetTotalSyncScore(epoch uint64, sessionId int64) int64 { @@ -169,8 +122,5 @@ func (qosManager *QoSManager) GetTotalSyncScore(epoch uint64, sessionId int64) i if qosReport == nil { return 0 } - - qosReport.lock.RLock() - defer qosReport.lock.RUnlock() - return qosReport.totalSyncScore + return qosReport.getTotalSyncScore() } diff --git a/protocol/qos/qos_manager_test.go b/protocol/qos/qos_manager_test.go index 61f9d7ac5f..62567b9fc6 100644 --- a/protocol/qos/qos_manager_test.go +++ b/protocol/qos/qos_manager_test.go @@ -18,7 +18,7 @@ func TestCalculateQoS(t *testing.T) { providerAddr := "provider1" // Test successful relay - doneChan := qosManager.CalculateQoS( + qosManager.CalculateQoS( epoch, sessionID, providerAddr, @@ -29,8 +29,6 @@ func TestCalculateQoS(t *testing.T) { 2, ) - <-doneChan // Wait for processing - report := qosManager.GetLastQoSReport(epoch, sessionID) require.NotNil(t, report) @@ -46,9 +44,7 @@ func TestAddFailedRelay(t *testing.T) { epoch := uint64(1) sessionID := int64(1) - doneChan := qosManager.AddFailedRelay(epoch, sessionID) - <-doneChan // Wait for processing - + qosManager.AddFailedRelay(epoch, sessionID) totalRelays := qosManager.GetTotalRelays(epoch, sessionID) require.Equal(t, uint64(1), totalRelays) @@ -66,9 +62,7 @@ func TestSetLastReputationQoSReport(t *testing.T) { Availability: sdk.NewDec(100), } - doneChan := qosManager.SetLastReputationQoSReport(epoch, sessionID, testReport) - <-doneChan // Wait for processing - + qosManager.SetLastReputationQoSReport(epoch, sessionID, testReport) report := qosManager.GetLastReputationQoSReport(epoch, sessionID) require.NotNil(t, report) require.Equal(t, testReport.Latency, report.Latency) @@ -81,7 +75,7 @@ func TestMultipleEpochsAndSessions(t *testing.T) { // Test multiple epochs and sessions simultaneously for epoch := uint64(1); epoch <= 3; epoch++ { for sessionID := int64(1); sessionID <= 3; sessionID++ { - doneChan := qosManager.CalculateQoS( + qosManager.CalculateQoS( epoch, sessionID, "provider1", @@ -91,7 +85,6 @@ func TestMultipleEpochsAndSessions(t *testing.T) { 3, 2, ) - <-doneChan } } @@ -122,7 +115,7 @@ func TestEdgeCaseLatencies(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - doneChan := qosManager.CalculateQoS( + qosManager.CalculateQoS( epoch, sessionID, "provider1", @@ -132,7 +125,6 @@ func TestEdgeCaseLatencies(t *testing.T) { 3, 2, ) - <-doneChan require.NotNil(t, qosManager.GetLastQoSReport(epoch, sessionID)) }) } @@ -142,11 +134,8 @@ func TestNilReportHandling(t *testing.T) { qosManager := NewQoSManager() epoch := uint64(1) sessionID := int64(1) - // Test setting nil report - doneChan := qosManager.SetLastReputationQoSReport(epoch, sessionID, nil) - <-doneChan - + qosManager.SetLastReputationQoSReport(epoch, sessionID, nil) // Verify nil handling report := qosManager.GetLastReputationQoSReport(epoch, sessionID) require.Nil(t, report) @@ -170,7 +159,7 @@ func TestHighConcurrencyScenario(t *testing.T) { go func(routineID int) { defer wg.Done() for j := 0; j < operationsPerGoroutine; j++ { - doneChan := qosManager.CalculateQoS( + qosManager.CalculateQoS( uint64(routineID), int64(j), "provider1", @@ -180,7 +169,6 @@ func TestHighConcurrencyScenario(t *testing.T) { 3, 2, ) - <-doneChan } }(i) } @@ -190,8 +178,7 @@ func TestHighConcurrencyScenario(t *testing.T) { go func(routineID int) { defer wg.Done() for j := 0; j < operationsPerGoroutine; j++ { - doneChan := qosManager.AddFailedRelay(uint64(routineID), int64(j)) - <-doneChan + qosManager.AddFailedRelay(uint64(routineID), int64(j)) } }(i) } @@ -205,8 +192,7 @@ func TestHighConcurrencyScenario(t *testing.T) { Latency: sdk.NewDec(95), Availability: sdk.NewDec(100), } - doneChan := qosManager.SetLastReputationQoSReport(uint64(routineID), int64(j), report) - <-doneChan + qosManager.SetLastReputationQoSReport(uint64(routineID), int64(j), report) } }(i) } @@ -244,7 +230,7 @@ func TestQoSParameterBoundaries(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - doneChan := qosManager.CalculateQoS( + qosManager.CalculateQoS( epoch, sessionID, "provider1", @@ -254,7 +240,6 @@ func TestQoSParameterBoundaries(t *testing.T) { tc.numOfProviders, tc.servicersToCount, ) - <-doneChan // Verify that the manager doesn't panic and returns a report report := qosManager.GetLastQoSReport(epoch, sessionID) require.NotNil(t, report) @@ -269,7 +254,7 @@ func TestSequentialOperations(t *testing.T) { sessionID := int64(1) // Sequence: Calculate -> Fail -> Calculate - doneChan := qosManager.CalculateQoS( + qosManager.CalculateQoS( epoch, sessionID, "provider1", @@ -277,12 +262,8 @@ func TestSequentialOperations(t *testing.T) { 200*time.Millisecond, 1, 3, 2, ) - <-doneChan - - doneChan = qosManager.AddFailedRelay(epoch, sessionID) - <-doneChan - - doneChan = qosManager.CalculateQoS( + qosManager.AddFailedRelay(epoch, sessionID) + qosManager.CalculateQoS( epoch, sessionID, "provider1", @@ -290,8 +271,6 @@ func TestSequentialOperations(t *testing.T) { 200*time.Millisecond, 1, 3, 2, ) - <-doneChan - require.Equal(t, uint64(3), qosManager.GetTotalRelays(epoch, sessionID)) require.Equal(t, uint64(2), qosManager.GetAnsweredRelays(epoch, sessionID)) }) diff --git a/protocol/qos/qos_mutator_base.go b/protocol/qos/qos_mutator_base.go index 2919feac6f..894534cad1 100644 --- a/protocol/qos/qos_mutator_base.go +++ b/protocol/qos/qos_mutator_base.go @@ -1,5 +1,7 @@ package qos +import "sync/atomic" + // Base interface for all mutators type Mutator interface { Mutate(report *QoSReport) @@ -9,9 +11,8 @@ type Mutator interface { type QoSMutatorBase struct { epoch uint64 sessionId int64 - doneChan chan<- struct{} } func (qoSMutatorBase *QoSMutatorBase) GetEpochAndSessionId() (epoch uint64, sessionId int64) { - return qoSMutatorBase.epoch, qoSMutatorBase.sessionId + return atomic.LoadUint64(&qoSMutatorBase.epoch), atomic.LoadInt64(&qoSMutatorBase.sessionId) } diff --git a/protocol/qos/qos_mutator_relay_failure.go b/protocol/qos/qos_mutator_relay_failure.go index 2635933ef0..8e05a2b6a9 100644 --- a/protocol/qos/qos_mutator_relay_failure.go +++ b/protocol/qos/qos_mutator_relay_failure.go @@ -2,12 +2,9 @@ package qos // Mutator for relay failure type QoSMutatorRelayFailure struct { - QoSMutatorBase + *QoSMutatorBase } func (qoSMutatorRelayFailure *QoSMutatorRelayFailure) Mutate(report *QoSReport) { - defer func() { - qoSMutatorRelayFailure.doneChan <- struct{}{} - }() report.totalRelays++ } diff --git a/protocol/qos/qos_mutator_relay_success.go b/protocol/qos/qos_mutator_relay_success.go index 9b4452d164..1726a727bb 100644 --- a/protocol/qos/qos_mutator_relay_success.go +++ b/protocol/qos/qos_mutator_relay_success.go @@ -13,7 +13,7 @@ import ( // Mutator for relay success type QoSMutatorRelaySuccess struct { - QoSMutatorBase + *QoSMutatorBase latency time.Duration expectedLatency time.Duration blockHeightDiff int64 @@ -29,10 +29,6 @@ func (qoSMutatorRelaySuccess *QoSMutatorRelaySuccess) calculateAvailabilityScore } func (qoSMutatorRelaySuccess *QoSMutatorRelaySuccess) Mutate(report *QoSReport) { - defer func() { - qoSMutatorRelaySuccess.doneChan <- struct{}{} - }() - report.totalRelays++ report.answeredRelays++ diff --git a/protocol/qos/qos_mutator_set_reputation.go b/protocol/qos/qos_mutator_set_reputation.go index 04e59bdcf5..3961ad81a5 100644 --- a/protocol/qos/qos_mutator_set_reputation.go +++ b/protocol/qos/qos_mutator_set_reputation.go @@ -6,13 +6,10 @@ import ( // Mutator to set usage for a session type QoSMutatorSetReputation struct { - QoSMutatorBase + *QoSMutatorBase report *pairingtypes.QualityOfServiceReport } func (qoSMutatorSetReputation *QoSMutatorSetReputation) Mutate(report *QoSReport) { - defer func() { - qoSMutatorSetReputation.doneChan <- struct{}{} - }() report.lastReputationQoSReport = qoSMutatorSetReputation.report } diff --git a/protocol/qos/qos_report.go b/protocol/qos/qos_report.go new file mode 100644 index 0000000000..33b5fd5364 --- /dev/null +++ b/protocol/qos/qos_report.go @@ -0,0 +1,61 @@ +package qos + +import ( + "sync" + + sdk "github.com/cosmos/cosmos-sdk/types" + pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" +) + +type QoSReport struct { + lastQoSReport *pairingtypes.QualityOfServiceReport + lastReputationQoSReport *pairingtypes.QualityOfServiceReport + latencyScoreList []sdk.Dec + syncScoreSum int64 + totalSyncScore int64 + totalRelays uint64 + answeredRelays uint64 + lock sync.RWMutex +} + +func (qr *QoSReport) mutate(mutator Mutator) { + qr.lock.Lock() + defer qr.lock.Unlock() + mutator.Mutate(qr) +} + +func (qr *QoSReport) getLastQoSReport() *pairingtypes.QualityOfServiceReport { + qr.lock.RLock() + defer qr.lock.RUnlock() + return qr.lastQoSReport +} + +func (qr *QoSReport) getLastReputationQoSReport() *pairingtypes.QualityOfServiceReport { + qr.lock.RLock() + defer qr.lock.RUnlock() + return qr.lastReputationQoSReport +} + +func (qr *QoSReport) getAnsweredRelays() uint64 { + qr.lock.RLock() + defer qr.lock.RUnlock() + return qr.answeredRelays +} + +func (qr *QoSReport) getTotalRelays() uint64 { + qr.lock.RLock() + defer qr.lock.RUnlock() + return qr.totalRelays +} + +func (qr *QoSReport) getSyncScoreSum() int64 { + qr.lock.RLock() + defer qr.lock.RUnlock() + return qr.syncScoreSum +} + +func (qr *QoSReport) getTotalSyncScore() int64 { + qr.lock.RLock() + defer qr.lock.RUnlock() + return qr.totalSyncScore +} From c08f00c7dc3707fc0821950747553818a9b265c3 Mon Sep 17 00:00:00 2001 From: Ran Mishael Date: Tue, 28 Jan 2025 15:09:28 +0100 Subject: [PATCH 21/24] Summery -> Summary --- protocol/common/endpoints.go | 2 +- .../lavasession/consumer_session_manager.go | 4 +- protocol/lavasession/consumer_types.go | 2 +- protocol/rpcconsumer/relay_errors.go | 4 +- protocol/rpcconsumer/relay_errors_test.go | 54 +++++++++---------- protocol/rpcconsumer/relay_processor.go | 4 +- protocol/rpcconsumer/rpcconsumer_server.go | 2 +- 7 files changed, 36 insertions(+), 36 deletions(-) diff --git a/protocol/common/endpoints.go b/protocol/common/endpoints.go index 49386000d0..9075f918df 100644 --- a/protocol/common/endpoints.go +++ b/protocol/common/endpoints.go @@ -252,7 +252,7 @@ type ConflictHandlerInterface interface { type ProviderInfo struct { ProviderAddress string - ProviderReputationSummery sdk.Dec // the number represents the average qos for this provider session + ProviderReputationSummary sdk.Dec // the number represents the average qos for this provider session ProviderStake sdk.Coin } diff --git a/protocol/lavasession/consumer_session_manager.go b/protocol/lavasession/consumer_session_manager.go index d15d38d670..90264976e8 100644 --- a/protocol/lavasession/consumer_session_manager.go +++ b/protocol/lavasession/consumer_session_manager.go @@ -558,9 +558,9 @@ func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForS ReportedProviders: reportedProviders, } - // adding qos summery for error parsing. + // adding qos summary for error parsing. // consumer session is locked here so its ok to read the qos report. - sessionInfo.QoSSummeryResult = consumerSession.getQosComputedResultOrZero() + sessionInfo.QoSSummaryResult = consumerSession.getQosComputedResultOrZero() sessions[providerAddress] = sessionInfo qosReport, _ := csm.providerOptimizer.GetReputationReportForProvider(providerAddress) diff --git a/protocol/lavasession/consumer_types.go b/protocol/lavasession/consumer_types.go index 21aab39407..b00b9cc35b 100644 --- a/protocol/lavasession/consumer_types.go +++ b/protocol/lavasession/consumer_types.go @@ -63,7 +63,7 @@ type UsedProvidersInf interface { type SessionInfo struct { Session *SingleConsumerSession StakeSize sdk.Coin - QoSSummeryResult sdk.Dec // using ComputeQoS to get the total QOS + QoSSummaryResult sdk.Dec // using ComputeQoS to get the total QOS Epoch uint64 ReportedProviders []*pairingtypes.ReportedProvider } diff --git a/protocol/rpcconsumer/relay_errors.go b/protocol/rpcconsumer/relay_errors.go index cb13d609c2..cfc39740c8 100644 --- a/protocol/rpcconsumer/relay_errors.go +++ b/protocol/rpcconsumer/relay_errors.go @@ -55,10 +55,10 @@ func (r *RelayErrors) GetBestErrorMessageForUser() RelayError { for idx, relayError := range r.relayErrors { errorMessage := r.sanitizeError(relayError.err) errorMap[errorMessage] = append(errorMap[errorMessage], idx) - if relayError.ProviderInfo.ProviderReputationSummery.IsNil() || relayError.ProviderInfo.ProviderStake.Amount.IsNil() { + if relayError.ProviderInfo.ProviderReputationSummary.IsNil() || relayError.ProviderInfo.ProviderStake.Amount.IsNil() { continue } - currentResult := relayError.ProviderInfo.ProviderReputationSummery.MulInt(relayError.ProviderInfo.ProviderStake.Amount) + currentResult := relayError.ProviderInfo.ProviderReputationSummary.MulInt(relayError.ProviderInfo.ProviderStake.Amount) if currentResult.GTE(bestResult) { // 0 or 1 here are valid replacements, so even 0 scores will return the error value bestResult.Set(currentResult) bestIndex = idx diff --git a/protocol/rpcconsumer/relay_errors_test.go b/protocol/rpcconsumer/relay_errors_test.go index b2049dfa6f..7aefce983c 100644 --- a/protocol/rpcconsumer/relay_errors_test.go +++ b/protocol/rpcconsumer/relay_errors_test.go @@ -23,35 +23,35 @@ func TestRelayError(t *testing.T) { { err: fmt.Errorf("test1"), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.OneDec(), + ProviderReputationSummary: sdk.OneDec(), ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, { err: fmt.Errorf("test2"), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.OneDec(), + ProviderReputationSummary: sdk.OneDec(), ProviderStake: sdk.NewInt64Coin("ulava", 20), }, }, { err: fmt.Errorf("test3"), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.OneDec(), + ProviderReputationSummary: sdk.OneDec(), ProviderStake: sdk.NewInt64Coin("ulava", 30), }, }, { err: fmt.Errorf("test4"), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.OneDec(), + ProviderReputationSummary: sdk.OneDec(), ProviderStake: sdk.NewInt64Coin("ulava", 40), }, }, { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.OneDec(), + ProviderReputationSummary: sdk.OneDec(), ProviderStake: sdk.NewInt64Coin("ulava", 50), }, }, @@ -66,49 +66,49 @@ func TestRelayError(t *testing.T) { { err: fmt.Errorf("test1"), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.MustNewDecFromStr("0.5"), + ProviderReputationSummary: sdk.MustNewDecFromStr("0.5"), ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, { err: fmt.Errorf("test1"), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.MustNewDecFromStr("0.25"), + ProviderReputationSummary: sdk.MustNewDecFromStr("0.25"), ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, { err: fmt.Errorf("test3"), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.MustNewDecFromStr("0.6"), + ProviderReputationSummary: sdk.MustNewDecFromStr("0.6"), ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, { err: fmt.Errorf("test3"), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.MustNewDecFromStr("0.7"), + ProviderReputationSummary: sdk.MustNewDecFromStr("0.7"), ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, { err: fmt.Errorf("test4"), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.MustNewDecFromStr("0.7"), + ProviderReputationSummary: sdk.MustNewDecFromStr("0.7"), ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, { err: fmt.Errorf("test4"), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.MustNewDecFromStr("0.7"), + ProviderReputationSummary: sdk.MustNewDecFromStr("0.7"), ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.MustNewDecFromStr("0.8"), + ProviderReputationSummary: sdk.MustNewDecFromStr("0.8"), ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, @@ -123,35 +123,35 @@ func TestRelayError(t *testing.T) { { err: fmt.Errorf("test1"), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.OneDec(), + ProviderReputationSummary: sdk.OneDec(), ProviderStake: sdk.NewInt64Coin("ulava", 1000), }, }, { err: fmt.Errorf("test2"), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.OneDec(), + ProviderReputationSummary: sdk.OneDec(), ProviderStake: sdk.NewInt64Coin("ulava", 1000), }, }, { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.ZeroDec(), + ProviderReputationSummary: sdk.ZeroDec(), ProviderStake: sdk.NewInt64Coin("ulava", 0), }, }, { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.ZeroDec(), + ProviderReputationSummary: sdk.ZeroDec(), ProviderStake: sdk.NewInt64Coin("ulava", 0), }, }, { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.ZeroDec(), + ProviderReputationSummary: sdk.ZeroDec(), ProviderStake: sdk.NewInt64Coin("ulava", 0), }, }, @@ -166,35 +166,35 @@ func TestRelayError(t *testing.T) { { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.OneDec(), + ProviderReputationSummary: sdk.OneDec(), ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.OneDec(), + ProviderReputationSummary: sdk.OneDec(), ProviderStake: sdk.NewInt64Coin("ulava", 20), }, }, { err: fmt.Errorf("test3"), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.OneDec(), + ProviderReputationSummary: sdk.OneDec(), ProviderStake: sdk.NewInt64Coin("ulava", 30), }, }, { err: fmt.Errorf("test4"), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.OneDec(), + ProviderReputationSummary: sdk.OneDec(), ProviderStake: sdk.NewInt64Coin("ulava", 40), }, }, { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.OneDec(), + ProviderReputationSummary: sdk.OneDec(), ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, @@ -209,35 +209,35 @@ func TestRelayError(t *testing.T) { { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.OneDec(), + ProviderReputationSummary: sdk.OneDec(), ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.OneDec(), + ProviderReputationSummary: sdk.OneDec(), ProviderStake: sdk.NewInt64Coin("ulava", 20), }, }, { err: fmt.Errorf("test3"), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.OneDec(), + ProviderReputationSummary: sdk.OneDec(), ProviderStake: sdk.NewInt64Coin("ulava", 30), }, }, { err: fmt.Errorf("test4"), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.OneDec(), + ProviderReputationSummary: sdk.OneDec(), ProviderStake: sdk.NewInt64Coin("ulava", 40), }, }, { err: fmt.Errorf("%s", expectedValue), ProviderInfo: common.ProviderInfo{ - ProviderReputationSummery: sdk.OneDec(), + ProviderReputationSummary: sdk.OneDec(), ProviderStake: sdk.NewInt64Coin("ulava", 10), }, }, diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index 72bbeda50f..705a57e87b 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -314,10 +314,10 @@ func (rp *RelayProcessor) responsesQuorum(results []common.RelayResult, quorumSi if result.Reply != nil && result.Reply.Data != nil { countMap[string(result.Reply.Data)]++ if !deterministic { - if result.ProviderInfo.ProviderReputationSummery.IsNil() || result.ProviderInfo.ProviderStake.Amount.IsNil() { + if result.ProviderInfo.ProviderReputationSummary.IsNil() || result.ProviderInfo.ProviderStake.Amount.IsNil() { continue } - currentResult := result.ProviderInfo.ProviderReputationSummery.MulInt(result.ProviderInfo.ProviderStake.Amount) + currentResult := result.ProviderInfo.ProviderReputationSummary.MulInt(result.ProviderInfo.ProviderStake.Amount) if currentResult.GTE(bestQos) { bestQos.Set(currentResult) bestQosResult = result diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 138116cd61..4c158ddc89 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -735,7 +735,7 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( go func(providerPublicAddress string, sessionInfo *lavasession.SessionInfo) { // add ticker launch metrics localRelayResult := &common.RelayResult{ - ProviderInfo: common.ProviderInfo{ProviderAddress: providerPublicAddress, ProviderStake: sessionInfo.StakeSize, ProviderReputationSummery: sessionInfo.QoSSummeryResult}, + ProviderInfo: common.ProviderInfo{ProviderAddress: providerPublicAddress, ProviderStake: sessionInfo.StakeSize, ProviderReputationSummary: sessionInfo.QoSSummaryResult}, Finalized: false, // setting the single consumer session as the conflict handler. // to be able to validate if we need to report this provider or not. From e805748c13a0740228a8f344c3a3e6fc74bb7b83 Mon Sep 17 00:00:00 2001 From: Ran Mishael Date: Tue, 28 Jan 2025 15:11:49 +0100 Subject: [PATCH 22/24] renaming to reputation --- .../lavasession/single_consumer_session.go | 2 +- .../provideroptimizer/provider_optimizer.go | 2 +- .../provider_optimizer_test.go | 10 +++--- x/pairing/keeper/msg_server_relay_payment.go | 2 +- x/pairing/types/qos_report.go | 8 ++--- x/pairing/types/qos_report_test.go | 32 +++++++++---------- 6 files changed, 28 insertions(+), 28 deletions(-) diff --git a/protocol/lavasession/single_consumer_session.go b/protocol/lavasession/single_consumer_session.go index 1d71889a5f..395835c00b 100644 --- a/protocol/lavasession/single_consumer_session.go +++ b/protocol/lavasession/single_consumer_session.go @@ -40,7 +40,7 @@ func (cs *SingleConsumerSession) CalculateExpectedLatency(timeoutGivenToRelay ti func (cs *SingleConsumerSession) getQosComputedResultOrZero() sdk.Dec { lastReputationReport := cs.QoSManager.GetLastReputationQoSReport(cs.epoch, cs.SessionId) if lastReputationReport != nil { - computedReputation, errComputing := lastReputationReport.ComputeQoSExcellence() + computedReputation, errComputing := lastReputationReport.ComputeReputation() if errComputing == nil { // if we failed to compute the qos will be 0 so this provider wont be picked to return the error in case we get it return computedReputation } diff --git a/protocol/provideroptimizer/provider_optimizer.go b/protocol/provideroptimizer/provider_optimizer.go index 7cb390cb51..326c925118 100644 --- a/protocol/provideroptimizer/provider_optimizer.go +++ b/protocol/provideroptimizer/provider_optimizer.go @@ -265,7 +265,7 @@ func (po *ProviderOptimizer) CalculateSelectionTiers(allAddresses []string, igno ) return NewSelectionTier(), Exploration{}, nil } - score, err := qos.ComputeQoSExcellenceFloat64(opts...) + score, err := qos.ComputeReputationFloat64(opts...) if err != nil { utils.LavaFormatWarning("[Optimizer] cannot calculate selection tiers", err, utils.LogAttr("provider", providerAddress), diff --git a/protocol/provideroptimizer/provider_optimizer_test.go b/protocol/provideroptimizer/provider_optimizer_test.go index 62008f7dbe..acc3f58da2 100644 --- a/protocol/provideroptimizer/provider_optimizer_test.go +++ b/protocol/provideroptimizer/provider_optimizer_test.go @@ -344,7 +344,7 @@ func TestProviderOptimizerUpdatingLatency(t *testing.T) { // get current score qos, _ := providerOptimizer.GetReputationReportForProvider(providerAddress) require.NotNil(t, qos) - score, err := qos.ComputeQoSExcellence() + score, err := qos.ComputeReputation() require.NoError(t, err) // add good latency probe @@ -354,7 +354,7 @@ func TestProviderOptimizerUpdatingLatency(t *testing.T) { // check score again and compare to the last score qos, _ = providerOptimizer.GetReputationReportForProvider(providerAddress) require.NotNil(t, qos) - newScore, err := qos.ComputeQoSExcellence() + newScore, err := qos.ComputeReputation() require.NoError(t, err) require.True(t, newScore.LT(score), "newScore: "+newScore.String()+", score: "+score.String()) } @@ -369,7 +369,7 @@ func TestProviderOptimizerUpdatingLatency(t *testing.T) { // get current score qos, _ := providerOptimizer.GetReputationReportForProvider(providerAddress) require.NotNil(t, qos) - score, err := qos.ComputeQoSExcellence() + score, err := qos.ComputeReputation() require.NoError(t, err) // add good latency relay @@ -379,7 +379,7 @@ func TestProviderOptimizerUpdatingLatency(t *testing.T) { // check score again and compare to the last score qos, _ = providerOptimizer.GetReputationReportForProvider(providerAddress) require.NotNil(t, qos) - newScore, err := qos.ComputeQoSExcellence() + newScore, err := qos.ComputeReputation() require.NoError(t, err) require.True(t, newScore.LT(score), "newScore: "+newScore.String()+", score: "+score.String()) } @@ -1028,7 +1028,7 @@ func TestProviderOptimizerLatencySyncScore(t *testing.T) { for _, provider := range providersGen.providersAddresses { qos, _ := providerOptimizer.GetReputationReportForProvider(provider) require.NotNil(t, qos) - score, err := qos.ComputeQoSExcellence() + score, err := qos.ComputeReputation() require.NoError(t, err) scores = append(scores, score) } diff --git a/x/pairing/keeper/msg_server_relay_payment.go b/x/pairing/keeper/msg_server_relay_payment.go index df4fb3bd14..9c2f8a317a 100644 --- a/x/pairing/keeper/msg_server_relay_payment.go +++ b/x/pairing/keeper/msg_server_relay_payment.go @@ -501,7 +501,7 @@ func (k Keeper) aggregateReputationEpochQosScore(ctx sdk.Context, subscription s } syncFactor := k.ReputationLatencyOverSyncFactor(ctx) - score, err := relay.QosExcellenceReport.ComputeQoSExcellence(types.WithSyncFactor(syncFactor)) + score, err := relay.QosExcellenceReport.ComputeReputation(types.WithSyncFactor(syncFactor)) if err != nil { return utils.LavaFormatWarning("RelayPayment: could not compute qos excellence score", err, utils.LogAttr("consumer", subscription), diff --git a/x/pairing/types/qos_report.go b/x/pairing/types/qos_report.go index 01367e71ec..a1c5eb5ea4 100644 --- a/x/pairing/types/qos_report.go +++ b/x/pairing/types/qos_report.go @@ -100,7 +100,7 @@ func WithBlockErrorProbability(probability sdk.Dec) Option { } } -// ComputeQoSExcellence calculates a score from the QoS excellence report by the following formula: +// ComputeReputation calculates a score from the QoS excellence report by the following formula: // If the requested block is the latest block or "not applicable" (called from the node's code): // // score = latency + sync*syncFactor + ((1/availability) - 1) * FailureCost @@ -114,7 +114,7 @@ func WithBlockErrorProbability(probability sdk.Dec) Option { // Important: when using this function from the node's code, do not configure the block error probability // (in default mode, it's unused) // TODO: after the reputation feature is merged, use this method to calculate the QoS excellence score -func (qos *QualityOfServiceReport) ComputeQoSExcellence(opts ...Option) (sdk.Dec, error) { +func (qos *QualityOfServiceReport) ComputeReputation(opts ...Option) (sdk.Dec, error) { if err := qos.Validate(); err != nil { return sdk.ZeroDec(), err } @@ -138,8 +138,8 @@ func (qos *QualityOfServiceReport) ComputeQoSExcellence(opts ...Option) (sdk.Dec return latency.Add(sync).Add(availability), nil } -func (qos *QualityOfServiceReport) ComputeQoSExcellenceFloat64(opts ...Option) (float64, error) { - scoreDec, err := qos.ComputeQoSExcellence(opts...) +func (qos *QualityOfServiceReport) ComputeReputationFloat64(opts ...Option) (float64, error) { + scoreDec, err := qos.ComputeReputation(opts...) if err != nil { return 0, err } diff --git a/x/pairing/types/qos_report_test.go b/x/pairing/types/qos_report_test.go index 6f7eaaa0f1..74a937e45a 100644 --- a/x/pairing/types/qos_report_test.go +++ b/x/pairing/types/qos_report_test.go @@ -110,7 +110,7 @@ func TestQosCompute(t *testing.T) { for _, tt := range template { t.Run(tt.name, func(t *testing.T) { - score, err := qos.ComputeQoSExcellence(tt.opts...) + score, err := qos.ComputeReputation(tt.opts...) require.NoError(t, err) require.True(t, tt.expectedScore.Equal(score)) }) @@ -122,15 +122,15 @@ func TestQosFailureCost(t *testing.T) { qos := types.QualityOfServiceReport{Latency: sdk.OneDec(), Sync: sdk.OneDec(), Availability: sdk.NewDecWithPrec(5, 1)} failureCost, highFailureCost := int64(1), int64(3) - score, err := qos.ComputeQoSExcellence(types.WithFailureCost(failureCost)) + score, err := qos.ComputeReputation(types.WithFailureCost(failureCost)) require.NoError(t, err) - scoreHighFailure, err := qos.ComputeQoSExcellence(types.WithFailureCost(highFailureCost)) + scoreHighFailure, err := qos.ComputeReputation(types.WithFailureCost(highFailureCost)) require.NoError(t, err) require.True(t, scoreHighFailure.GT(score)) - scoreWithProb, err := qos.ComputeQoSExcellence(types.WithFailureCost(failureCost), types.WithBlockErrorProbability(sdk.OneDec())) + scoreWithProb, err := qos.ComputeReputation(types.WithFailureCost(failureCost), types.WithBlockErrorProbability(sdk.OneDec())) require.NoError(t, err) - scoreHighFailureWithProb, err := qos.ComputeQoSExcellence(types.WithFailureCost(highFailureCost), types.WithBlockErrorProbability(sdk.OneDec())) + scoreHighFailureWithProb, err := qos.ComputeReputation(types.WithFailureCost(highFailureCost), types.WithBlockErrorProbability(sdk.OneDec())) require.NoError(t, err) require.True(t, scoreHighFailureWithProb.GT(scoreWithProb)) } @@ -140,9 +140,9 @@ func TestQosSyncFactor(t *testing.T) { qos := types.QualityOfServiceReport{Latency: sdk.OneDec(), Sync: sdk.OneDec(), Availability: sdk.NewDecWithPrec(5, 1)} syncFactor, highSyncFactor := sdk.NewDecWithPrec(5, 1), sdk.NewDecWithPrec(8, 1) - score, err := qos.ComputeQoSExcellence(types.WithSyncFactor(syncFactor)) + score, err := qos.ComputeReputation(types.WithSyncFactor(syncFactor)) require.NoError(t, err) - scoreHighSyncFactor, err := qos.ComputeQoSExcellence(types.WithSyncFactor(highSyncFactor)) + scoreHighSyncFactor, err := qos.ComputeReputation(types.WithSyncFactor(highSyncFactor)) require.NoError(t, err) require.True(t, scoreHighSyncFactor.GT(score)) } @@ -156,18 +156,18 @@ func TestQosStrategyFactor(t *testing.T) { // we get the balancedScore with a balanced strategy and subtract the latency component of the balancedScore // this way, our balancedScore will only be syncFactor*sync (syncFactor = configuredSyncFactor * strategyFactor) - balancedScore, err := qos.ComputeQoSExcellence(types.WithStrategyFactor(types.BalancedStrategyFactor)) + balancedScore, err := qos.ComputeReputation(types.WithStrategyFactor(types.BalancedStrategyFactor)) require.NoError(t, err) balancedScore = balancedScore.Sub(sdk.OneDec()) // calculate score with latency strategy - sync component should be smaller than the component in balancedScore - latencyScore, err := qos.ComputeQoSExcellence(types.WithStrategyFactor(types.LatencyStrategyFactor)) + latencyScore, err := qos.ComputeReputation(types.WithStrategyFactor(types.LatencyStrategyFactor)) require.NoError(t, err) latencyScore = latencyScore.Sub(sdk.OneDec()) require.True(t, balancedScore.GT(latencyScore)) // calculate score with sync freshness strategy - sync component should be bigger than the component in balancedScore - syncScore, err := qos.ComputeQoSExcellence(types.WithStrategyFactor(types.SyncFreshnessStrategyFactor)) + syncScore, err := qos.ComputeReputation(types.WithStrategyFactor(types.SyncFreshnessStrategyFactor)) require.NoError(t, err) syncScore = syncScore.Sub(sdk.OneDec()) require.True(t, balancedScore.LT(syncScore)) @@ -178,9 +178,9 @@ func TestQosBlockErrorProbability(t *testing.T) { qos := types.QualityOfServiceReport{Latency: sdk.OneDec(), Sync: sdk.OneDec(), Availability: sdk.OneDec()} probabililty, highProbabililty := sdk.NewDecWithPrec(5, 1), sdk.NewDecWithPrec(8, 1) - score, err := qos.ComputeQoSExcellence(types.WithBlockErrorProbability(probabililty)) + score, err := qos.ComputeReputation(types.WithBlockErrorProbability(probabililty)) require.NoError(t, err) - scoreHighProbabililty, err := qos.ComputeQoSExcellence(types.WithBlockErrorProbability(highProbabililty)) + scoreHighProbabililty, err := qos.ComputeReputation(types.WithBlockErrorProbability(highProbabililty)) require.NoError(t, err) require.True(t, scoreHighProbabililty.GT(score)) } @@ -207,10 +207,10 @@ func TestQosReport(t *testing.T) { Sync: sdk.MustNewDecFromStr("0.5"), } - qos1Res, errQos1 := qos1.ComputeQoSExcellence() - qos2Res, errQos2 := qos2.ComputeQoSExcellence() - qos3Res, errQos3 := qos3.ComputeQoSExcellence() - qos4Res, errQos4 := qos4.ComputeQoSExcellence() + qos1Res, errQos1 := qos1.ComputeReputation() + qos2Res, errQos2 := qos2.ComputeReputation() + qos3Res, errQos3 := qos3.ComputeReputation() + qos4Res, errQos4 := qos4.ComputeReputation() require.NoError(t, errQos1) require.NoError(t, errQos2) require.NoError(t, errQos3) From e2aa7b50dc99ac45fb8778d11ed1a7984097b154 Mon Sep 17 00:00:00 2001 From: Ran Mishael Date: Tue, 28 Jan 2025 15:12:43 +0100 Subject: [PATCH 23/24] removed unused interfaces --- protocol/qos/common.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/protocol/qos/common.go b/protocol/qos/common.go index 5cd14854ef..231214afd6 100644 --- a/protocol/qos/common.go +++ b/protocol/qos/common.go @@ -8,7 +8,3 @@ const ( PercentileToCalculateLatency = 0.9 MinProvidersForSync = 0.6 ) - -type DegradeAvailabilityReputation interface{} - -type SendQoSUpdate interface{} From 9535f6e3d0ba27db6fc5d0ab6c58b1d49880e1c2 Mon Sep 17 00:00:00 2001 From: Ran Mishael Date: Tue, 28 Jan 2025 15:16:18 +0100 Subject: [PATCH 24/24] fixing comments --- protocol/rpcconsumer/rpcconsumer_server.go | 25 +++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 4c158ddc89..dd8fde3397 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -863,18 +863,19 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( ) } - lastQoSReport := singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId) - if rpccs.debugRelays && lastQoSReport != nil && - lastQoSReport.Sync.BigInt() != nil && - lastQoSReport.Sync.LT(sdk.MustNewDecFromStr("0.9")) { - utils.LavaFormatDebug("identified QoS mismatch", - utils.Attribute{Key: "expectedBH", Value: expectedBH}, - utils.Attribute{Key: "latestServicedBlock", Value: latestBlock}, - utils.Attribute{Key: "session_id", Value: singleConsumerSession.SessionId}, - utils.Attribute{Key: "provider_address", Value: singleConsumerSession.Parent.PublicLavaAddress}, - utils.Attribute{Key: "providersCount", Value: pairingAddressesLen}, - utils.Attribute{Key: "singleConsumerSession.QoSInfo", Value: singleConsumerSession.QoSManager}, - ) + if rpccs.debugRelays { + lastQoSReport := singleConsumerSession.QoSManager.GetLastQoSReport(epoch, singleConsumerSession.SessionId) + if lastQoSReport != nil && lastQoSReport.Sync.BigInt() != nil && + lastQoSReport.Sync.LT(sdk.MustNewDecFromStr("0.9")) { + utils.LavaFormatDebug("identified QoS mismatch", + utils.Attribute{Key: "expectedBH", Value: expectedBH}, + utils.Attribute{Key: "latestServicedBlock", Value: latestBlock}, + utils.Attribute{Key: "session_id", Value: singleConsumerSession.SessionId}, + utils.Attribute{Key: "provider_address", Value: singleConsumerSession.Parent.PublicLavaAddress}, + utils.Attribute{Key: "providersCount", Value: pairingAddressesLen}, + utils.Attribute{Key: "singleConsumerSession.QoSInfo", Value: singleConsumerSession.QoSManager}, + ) + } } errResponse = rpccs.consumerSessionManager.OnSessionDone(singleConsumerSession, latestBlock, chainlib.GetComputeUnits(protocolMessage), relayLatency, singleConsumerSession.CalculateExpectedLatency(expectedRelayTimeoutForQOS), expectedBH, numOfProviders, pairingAddressesLen, protocolMessage.GetApi().Category.HangingApi, extensions) // session done successfully