Skip to content

Commit

Permalink
pageserver: add pageserver_deltas_per_read_global metric (#10570)
Browse files Browse the repository at this point in the history
## Problem

We suspect that Postgres checkpoints will limit the number of page
deltas necessary to reconstruct a page, but don't know for certain.

Touches neondatabase/cloud#23283.

## Summary of changes

Add `pageserver_deltas_per_read_global` metric.

This pairs with `pageserver_layers_per_read_global` from #10573.
  • Loading branch information
erikgrinaker authored Jan 30, 2025
1 parent b247271 commit d3db96c
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 1 deletion.
11 changes: 11 additions & 0 deletions pageserver/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,16 @@ pub(crate) static LAYERS_PER_READ_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
.expect("failed to define a metric")
});

pub(crate) static DELTAS_PER_READ_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
// We expect this to be low because of Postgres checkpoints. Let's see if that holds.
register_histogram!(
"pageserver_deltas_per_read_global",
"Number of delta pages applied to image page per read",
vec![0.0, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0],
)
.expect("failed to define a metric")
});

pub(crate) static CONCURRENT_INITDBS: Lazy<UIntGauge> = Lazy::new(|| {
register_uint_gauge!(
"pageserver_concurrent_initdb",
Expand Down Expand Up @@ -3919,6 +3929,7 @@ pub fn preinitialize_metrics(conf: &'static PageServerConf) {
// histograms
[
&LAYERS_PER_READ_GLOBAL,
&DELTAS_PER_READ_GLOBAL,
&WAIT_LSN_TIME,
&WAL_REDO_TIME,
&WAL_REDO_RECORDS_HISTOGRAM,
Expand Down
10 changes: 10 additions & 0 deletions pageserver/src/tenant/storage_layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,16 @@ pub(crate) struct ValueReconstructState {
pub(crate) img: Option<(Lsn, Bytes)>,
}

impl ValueReconstructState {
/// Returns the number of page deltas applied to the page image.
pub fn num_deltas(&self) -> usize {
match self.img {
Some(_) => self.records.len(),
None => self.records.len() - 1, // omit will_init record
}
}
}

#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub(crate) enum ValueReconstructSituation {
Complete,
Expand Down
3 changes: 2 additions & 1 deletion pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ use pageserver_api::config::tenant_conf_defaults::DEFAULT_PITR_INTERVAL;

use crate::config::PageServerConf;
use crate::keyspace::{KeyPartitioning, KeySpace};
use crate::metrics::{TimelineMetrics, LAYERS_PER_READ_GLOBAL};
use crate::metrics::{TimelineMetrics, DELTAS_PER_READ_GLOBAL, LAYERS_PER_READ_GLOBAL};
use crate::pgdatadir_mapping::CalculateLogicalSizeError;
use crate::tenant::config::TenantConfOpt;
use pageserver_api::reltag::RelTag;
Expand Down Expand Up @@ -1195,6 +1195,7 @@ impl Timeline {
return (key, Err(err));
}
};
DELTAS_PER_READ_GLOBAL.observe(converted.num_deltas() as f64);

// The walredo module expects the records to be descending in terms of Lsn.
// And we submit the IOs in that order, so, there shuold be no need to sort here.
Expand Down

1 comment on commit d3db96c

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

7565 tests run: 7204 passed, 0 failed, 361 skipped (full report)


Flaky tests (7)

Postgres 17

Code coverage* (full report)

  • functions: 33.4% (8514 of 25501 functions)
  • lines: 49.1% (71497 of 145524 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
d3db96c at 2025-01-30T13:18:19.494Z :recycle:

Please sign in to comment.