Skip to content

Commit

Permalink
feat: add chunk application stats (near#12797)
Browse files Browse the repository at this point in the history
This is the first step towards per-chunk metrics
(near#12758).

This PR adds a new struct - `ChunkApplyStats` - which keeps information
about things that happened
during chunk application. For example how many transactions there were,
how many receipts, what were
the outgoing limits, how many receipts were forwarded, buffered, etc,
etc.

For now `ChunkApplyStats` contain mainly data relevant to the bandwidth
scheduler, in the future
more stats can be added to measure other things that we're interested
in. I didn't want to add too
much stuff at once to keep the PR size reasonable.

There was already a struct called `ApplyStats`, but it was used only for
the balance checker. I
replaced it with `BalanceStats` inside `ChunkApplyStats`.

`ChunkApplyStats` are returned in `ApplyChunkResult` and saved to the
database for later use. A new
database column is added to keep the chunk application stats. The column
is included in the standard
garbage collection logic to keep the size of saved data reasonable.

Running `neard view-state chunk-apply-stats` allows node operator to
view chunk application stats
for a given chunk. Example output for a mainnet chunk:
<details>
<summary> Click to expand </summary>

```rust
$ ./neard view-state chunk-apply-stats --block-hash GKzyP7DVNw5ctUcBhRRkABMaC2giNSKK5oHCrRc9hnXH --shard-id 0
...
V0(
    ChunkApplyStatsV0 {
        height: 138121896,
        shard_id: 0,
        is_chunk_missing: false,
        transactions_num: 35,
        incoming_receipts_num: 103,
        receipt_sink: ReceiptSinkStats {
            outgoing_limits: {
                0: OutgoingLimitStats {
                    size: 102400,
                    gas: 18446744073709551615,
                },
                1: OutgoingLimitStats {
                    size: 4718592,
                    gas: 300000000000000000,
                },
                2: OutgoingLimitStats {
                    size: 102400,
                    gas: 300000000000000000,
                },
                3: OutgoingLimitStats {
                    size: 102400,
                    gas: 300000000000000000,
                },
                4: OutgoingLimitStats {
                    size: 102400,
                    gas: 300000000000000000,
                },
                5: OutgoingLimitStats {
                    size: 102400,
                    gas: 300000000000000000,
                },
            },
            forwarded_receipts: {
                0: ReceiptsStats {
                    num: 24,
                    total_size: 6801,
                    total_gas: 515985143008901,
                },
                2: ReceiptsStats {
                    num: 21,
                    total_size: 6962,
                    total_gas: 639171080456467,
                },
                3: ReceiptsStats {
                    num: 58,
                    total_size: 17843,
                    total_gas: 1213382619794847,
                },
                4: ReceiptsStats {
                    num: 20,
                    total_size: 6278,
                    total_gas: 235098003759589,
                },
                5: ReceiptsStats {
                    num: 4,
                    total_size: 2089,
                    total_gas: 245101556851946,
                },
            },
            buffered_receipts: {},
            final_outgoing_buffers: {
                0: ReceiptsStats {
                    num: 0,
                    total_size: 0,
                    total_gas: 0,
                },
                2: ReceiptsStats {
                    num: 0,
                    total_size: 0,
                    total_gas: 0,
                },
                3: ReceiptsStats {
                    num: 0,
                    total_size: 0,
                    total_gas: 0,
                },
                4: ReceiptsStats {
                    num: 0,
                    total_size: 0,
                    total_gas: 0,
                },
                5: ReceiptsStats {
                    num: 0,
                    total_size: 0,
                    total_gas: 0,
                },
            },
            is_outgoing_metadata_ready: {
                0: false,
                2: false,
                3: false,
                4: false,
                5: false,
            },
            all_outgoing_metadatas_ready: false,
        },
        bandwidth_scheduler: BandwidthSchedulerStats {
            params: None,
            prev_bandwidth_requests: {},
            prev_bandwidth_requests_num: 0,
            time_to_run_ms: 0,
            granted_bandwidth: {},
            new_bandwidth_requests: {},
        },
        balance: BalanceStats {
            tx_burnt_amount: 4115983319195000000000,
            slashed_burnt_amount: 0,
            other_burnt_amount: 0,
            gas_deficit_amount: 0,
        },
    },
)
```

</details>

The stats are also available in `ChainStore`, making it easy to read
them from tests.
In the future we could also add an RPC endpoint to make the stats
available in `debug-ui`.

The PR is divided into commits for easier review.
  • Loading branch information
jancionear authored and marcelo-gonzalez committed Feb 11, 2025
1 parent 133331f commit 6780628
Show file tree
Hide file tree
Showing 23 changed files with 510 additions and 61 deletions.
10 changes: 10 additions & 0 deletions chain/chain/src/chain_update.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,11 @@ impl<'a> ChainUpdate<'a> {
apply_result.contract_updates,
);
}
self.chain_store_update.save_chunk_apply_stats(
*block_hash,
shard_id,
apply_result.stats,
);
}
ShardUpdateResult::OldChunk(OldChunkResult { shard_uid, apply_result }) => {
// The chunk is missing but some fields may need to be updated
Expand Down Expand Up @@ -175,6 +180,11 @@ impl<'a> ChainUpdate<'a> {
apply_result.contract_updates,
);
}
self.chain_store_update.save_chunk_apply_stats(
*block_hash,
shard_uid.shard_id(),
apply_result.stats,
);
}
};
Ok(())
Expand Down
4 changes: 4 additions & 0 deletions chain/chain/src/garbage_collection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,7 @@ impl<'a> ChainStoreUpdate<'a> {
self.gc_outgoing_receipts(&block_hash, shard_id);
self.gc_col(DBCol::IncomingReceipts, &block_shard_id);
self.gc_col(DBCol::StateTransitionData, &block_shard_id);
self.gc_col(DBCol::ChunkApplyStats, &block_shard_id);

// For incoming State Parts it's done in chain.clear_downloaded_parts()
// The following code is mostly for outgoing State Parts.
Expand Down Expand Up @@ -1016,6 +1017,9 @@ impl<'a> ChainStoreUpdate<'a> {
DBCol::StateSyncNewChunks => {
store_update.delete(col, key);
}
DBCol::ChunkApplyStats => {
store_update.delete(col, key);
}
DBCol::DbVersion
| DBCol::BlockMisc
| DBCol::_GCCount
Expand Down
6 changes: 4 additions & 2 deletions chain/chain/src/runtime/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -354,9 +354,10 @@ impl NightshadeRuntime {

let total_balance_burnt = apply_result
.stats
.balance
.tx_burnt_amount
.checked_add(apply_result.stats.other_burnt_amount)
.and_then(|result| result.checked_add(apply_result.stats.slashed_burnt_amount))
.checked_add(apply_result.stats.balance.other_burnt_amount)
.and_then(|result| result.checked_add(apply_result.stats.balance.slashed_burnt_amount))
.ok_or_else(|| {
Error::Other("Integer overflow during burnt balance summation".to_string())
})?;
Expand Down Expand Up @@ -386,6 +387,7 @@ impl NightshadeRuntime {
bandwidth_requests: apply_result.bandwidth_requests,
bandwidth_scheduler_state_hash: apply_result.bandwidth_scheduler_state_hash,
contract_updates: apply_result.contract_updates,
stats: apply_result.stats,
};

Ok(result)
Expand Down
44 changes: 44 additions & 0 deletions chain/chain/src/store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use near_chain_primitives::error::Error;
use near_epoch_manager::EpochManagerAdapter;
use near_primitives::block::Tip;
use near_primitives::checked_feature;
use near_primitives::chunk_apply_stats::{ChunkApplyStats, ChunkApplyStatsV0};
use near_primitives::errors::InvalidTxError;
use near_primitives::hash::CryptoHash;
use near_primitives::merkle::{MerklePath, PartialMerkleTree};
Expand Down Expand Up @@ -133,6 +134,11 @@ pub trait ChainStoreAccess {
block_hash: &CryptoHash,
shard_uid: &ShardUId,
) -> Result<Arc<ChunkExtra>, Error>;
fn get_chunk_apply_stats(
&self,
block_hash: &CryptoHash,
shard_id: &ShardId,
) -> Result<Option<ChunkApplyStats>, Error>;
/// Get block header.
fn get_block_header(&self, h: &CryptoHash) -> Result<BlockHeader, Error>;
/// Returns hash of the block on the main chain for given height.
Expand Down Expand Up @@ -1068,6 +1074,14 @@ impl ChainStoreAccess for ChainStore {
ChainStoreAdapter::get_chunk_extra(self, block_hash, shard_uid)
}

fn get_chunk_apply_stats(
&self,
block_hash: &CryptoHash,
shard_id: &ShardId,
) -> Result<Option<ChunkApplyStats>, Error> {
ChainStoreAdapter::get_chunk_apply_stats(&self, block_hash, shard_id)
}

/// Get block header.
fn get_block_header(&self, h: &CryptoHash) -> Result<BlockHeader, Error> {
ChainStoreAdapter::get_block_header(self, h)
Expand Down Expand Up @@ -1211,6 +1225,7 @@ pub struct ChainStoreUpdate<'a> {
add_state_sync_infos: Vec<StateSyncInfo>,
remove_state_sync_infos: Vec<CryptoHash>,
challenged_blocks: HashSet<CryptoHash>,
chunk_apply_stats: HashMap<(CryptoHash, ShardId), ChunkApplyStats>,
}

impl<'a> ChainStoreUpdate<'a> {
Expand All @@ -1234,6 +1249,7 @@ impl<'a> ChainStoreUpdate<'a> {
add_state_sync_infos: vec![],
remove_state_sync_infos: vec![],
challenged_blocks: HashSet::default(),
chunk_apply_stats: HashMap::default(),
}
}
}
Expand Down Expand Up @@ -1361,6 +1377,18 @@ impl<'a> ChainStoreAccess for ChainStoreUpdate<'a> {
}
}

fn get_chunk_apply_stats(
&self,
block_hash: &CryptoHash,
shard_id: &ShardId,
) -> Result<Option<ChunkApplyStats>, Error> {
if let Some(stats) = self.chunk_apply_stats.get(&(*block_hash, *shard_id)) {
Ok(Some(stats.clone()))
} else {
self.chain_store.get_chunk_apply_stats(block_hash, shard_id)
}
}

/// Get block header.
fn get_block_header(&self, hash: &CryptoHash) -> Result<BlockHeader, Error> {
if let Some(header) = self.chain_store_cache_update.headers.get(hash).cloned() {
Expand Down Expand Up @@ -1881,6 +1909,15 @@ impl<'a> ChainStoreUpdate<'a> {
self.chain_store_cache_update.processed_block_heights.insert(height);
}

pub fn save_chunk_apply_stats(
&mut self,
block_hash: CryptoHash,
shard_id: ShardId,
stats: ChunkApplyStatsV0,
) {
self.chunk_apply_stats.insert((block_hash, shard_id), ChunkApplyStats::V0(stats));
}

pub fn inc_block_refcount(&mut self, block_hash: &CryptoHash) -> Result<(), Error> {
let refcount = match self.get_block_refcount(block_hash) {
Ok(refcount) => refcount,
Expand Down Expand Up @@ -2409,6 +2446,13 @@ impl<'a> ChainStoreUpdate<'a> {
&(),
)?;
}
for ((block_hash, shard_id), stats) in self.chunk_apply_stats.iter() {
store_update.set_ser(
DBCol::ChunkApplyStats,
&get_block_shard_id(block_hash, *shard_id),
stats,
)?;
}
for other in self.store_updates.drain(..) {
store_update.merge(other);
}
Expand Down
2 changes: 2 additions & 0 deletions chain/chain/src/test_utils/kv_runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ use near_primitives::account::{AccessKey, Account};
use near_primitives::apply::ApplyChunkReason;
use near_primitives::bandwidth_scheduler::BandwidthRequests;
use near_primitives::block::Tip;
use near_primitives::chunk_apply_stats::ChunkApplyStatsV0;
use near_primitives::congestion_info::{CongestionInfo, ExtendedCongestionInfo};
use near_primitives::epoch_block_info::BlockInfo;
use near_primitives::epoch_info::EpochInfo;
Expand Down Expand Up @@ -1280,6 +1281,7 @@ impl RuntimeAdapter for KeyValueRuntime {
bandwidth_requests: BandwidthRequests::default_for_protocol_version(PROTOCOL_VERSION),
bandwidth_scheduler_state_hash: CryptoHash::default(),
contract_updates: Default::default(),
stats: ChunkApplyStatsV0::dummy(),
})
}

Expand Down
3 changes: 3 additions & 0 deletions chain/chain/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use near_primitives::bandwidth_scheduler::BlockBandwidthRequests;
pub use near_primitives::block::{Block, BlockHeader, Tip};
use near_primitives::challenge::{ChallengesResult, PartialState};
use near_primitives::checked_feature;
use near_primitives::chunk_apply_stats::ChunkApplyStatsV0;
use near_primitives::congestion_info::BlockCongestionInfo;
use near_primitives::congestion_info::CongestionInfo;
use near_primitives::congestion_info::ExtendedCongestionInfo;
Expand Down Expand Up @@ -114,6 +115,8 @@ pub struct ApplyChunkResult {
pub bandwidth_scheduler_state_hash: CryptoHash,
/// Contracts accessed and deployed while applying the chunk.
pub contract_updates: ContractUpdates,
/// Extra information gathered during chunk application.
pub stats: ChunkApplyStatsV0,
}

impl ApplyChunkResult {
Expand Down
2 changes: 1 addition & 1 deletion core/primitives/src/bandwidth_scheduler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ pub struct LinkAllowance {
}

/// Parameters used in the bandwidth scheduler algorithm.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, BorshSerialize, BorshDeserialize)]
pub struct BandwidthSchedulerParams {
/// This much bandwidth is granted by default.
/// base_bandwidth = (max_shard_bandwidth - max_single_grant) / (num_shards - 1)
Expand Down
Loading

0 comments on commit 6780628

Please sign in to comment.