From 9953f2a8b39ccd25176eb4ed2ea324d05d99f68b Mon Sep 17 00:00:00 2001 From: Stephen Dolan Date: Wed, 19 Feb 2025 13:47:55 +0000 Subject: [PATCH] Separate the mechanisms and APIs for dependent memory and custom blocks Previously, allocating a block with caml_alloc_custom or caml_alloc_custom_mem would both register a custom block finaliser and accelerate the GC. Now, caml_alloc_custom(_mem) has no effect on GC, and the functions caml_adjust_gc_speed and caml_adjust_minor_gc_speed become noops. The GC speed increase for dependent memory happens only through caml_alloc_dependent_memory and caml_free_dependent_memory. The function caml_alloc_custom_dep is available to call both caml_alloc_custom and caml_alloc_dependent_memory. However, the user must ensure to call caml_free_dependent_memory in their finaliser. (Bigarrays have already been updated to use this API in a previous patch) This is to pave the way for a new GC pacing policy. However, no change to pacing is made by this patch - the GC should perform as previously on e.g. bigarray values, which have been ported to use the new API. (Most of the code in this patch was written by @damiendoligez) --- otherlibs/runtime_events/runtime_events.ml | 5 ++ otherlibs/runtime_events/runtime_events.mli | 4 ++ runtime/caml/bigarray.h | 2 +- runtime/caml/custom.h | 1 + runtime/caml/domain_state.tbl | 14 +++-- runtime/caml/gc_stats.h | 4 ++ runtime/caml/minor_gc.h | 27 +++++++-- runtime/caml/runtime_events.h | 4 +- runtime/caml/shared_heap.h | 3 + runtime/custom.c | 66 +++++++-------------- runtime/domain.c | 7 +-- runtime/gc_stats.c | 11 ++++ runtime/intern.c | 2 +- runtime/major_gc.c | 35 ++++------- runtime/memory.c | 61 +++++++++---------- runtime/minor_gc.c | 50 ++++++++++++++-- runtime/shared_heap.c | 5 ++ 17 files changed, 178 insertions(+), 123 deletions(-) diff --git a/otherlibs/runtime_events/runtime_events.ml b/otherlibs/runtime_events/runtime_events.ml index cbcb2bb13d3..c063422ef87 100644 --- a/otherlibs/runtime_events/runtime_events.ml +++ b/otherlibs/runtime_events/runtime_events.ml @@ -33,6 +33,7 @@ type runtime_counter = | EV_C_MAJOR_HEAP_POOL_FRAG_WORDS | EV_C_MAJOR_HEAP_POOL_LIVE_BLOCKS | EV_C_MAJOR_HEAP_LARGE_BLOCKS +| EV_C_REQUEST_MINOR_REALLOC_DEPENDENT_TABLE type runtime_phase = | EV_EXPLICIT_GC_SET @@ -83,6 +84,7 @@ type runtime_phase = | EV_COMPACT_EVACUATE | EV_COMPACT_FORWARD | EV_COMPACT_RELEASE +| EV_MINOR_DEPENDENT type lifecycle = EV_RING_START @@ -121,6 +123,8 @@ let runtime_counter_name counter = "major_heap_pool_live_blocks" | EV_C_MAJOR_HEAP_LARGE_BLOCKS -> "major_heap_large_blocks" + | EV_C_REQUEST_MINOR_REALLOC_DEPENDENT_TABLE -> + "request_minor_realloc_dependent_table" let runtime_phase_name phase = match phase with @@ -172,6 +176,7 @@ let runtime_phase_name phase = | EV_COMPACT_EVACUATE -> "compaction_evacuate" | EV_COMPACT_FORWARD -> "compaction_forward" | EV_COMPACT_RELEASE -> "compaction_release" + | EV_MINOR_DEPENDENT -> "minor_dependent" let lifecycle_name lifecycle = match lifecycle with diff --git a/otherlibs/runtime_events/runtime_events.mli b/otherlibs/runtime_events/runtime_events.mli index 9347beea510..e546badb8cb 100644 --- a/otherlibs/runtime_events/runtime_events.mli +++ b/otherlibs/runtime_events/runtime_events.mli @@ -87,6 +87,9 @@ Live blocks of a Domain's major heap pools. (** Live blocks of a Domain's major heap large allocations. @since 5.1 *) +| EV_C_REQUEST_MINOR_REALLOC_DEPENDENT_TABLE +(** Reallocation of the table of dependent memory from minor heap + @since 5.3 *) (** The type for span events emitted by the runtime. *) type runtime_phase = @@ -138,6 +141,7 @@ type runtime_phase = | EV_COMPACT_EVACUATE | EV_COMPACT_FORWARD | EV_COMPACT_RELEASE +| EV_MINOR_DEPENDENT (** Lifecycle events for the ring itself. *) type lifecycle = diff --git a/runtime/caml/bigarray.h b/runtime/caml/bigarray.h index 60a22309c18..a663550a671 100644 --- a/runtime/caml/bigarray.h +++ b/runtime/caml/bigarray.h @@ -81,7 +81,7 @@ enum caml_ba_subarray { struct caml_ba_proxy { atomic_uintnat refcount; /* Reference count */ void * data; /* Pointer to base of actual data */ - uintnat size; /* Size of data in bytes (if mapped file) */ + uintnat size; /* Size of data in bytes */ }; struct caml_ba_array { diff --git a/runtime/caml/custom.h b/runtime/caml/custom.h index fce4cd33cb6..42d23d205a3 100644 --- a/runtime/caml/custom.h +++ b/runtime/caml/custom.h @@ -53,6 +53,7 @@ extern "C" { CAMLextern uintnat caml_custom_major_ratio; +CAMLextern uintnat caml_custom_minor_ratio; CAMLextern value caml_alloc_custom(const struct custom_operations * ops, uintnat size, /*size in bytes*/ diff --git a/runtime/caml/domain_state.tbl b/runtime/caml/domain_state.tbl index 915a9bbb090..7184c84916f 100644 --- a/runtime/caml/domain_state.tbl +++ b/runtime/caml/domain_state.tbl @@ -90,6 +90,10 @@ DOMAIN_STATE(uintnat, allocated_words_direct) /* Number of words allocated directly to the major heap since the latest slice. (subset of allocated_words) */ +DOMAIN_STATE(uintnat, allocated_dependent_bytes) +/* Number of external bytes whose pointers were promoted or allocated in + the major heap since latest slice. */ + DOMAIN_STATE(uintnat, swept_words) DOMAIN_STATE(uintnat, major_slice_epoch) @@ -138,11 +142,8 @@ DOMAIN_STATE(int, unique_id) DOMAIN_STATE(value, dls_root) /* Domain-local state */ -DOMAIN_STATE(double, extra_heap_resources) -DOMAIN_STATE(double, extra_heap_resources_minor) - -DOMAIN_STATE(uintnat, dependent_size) -DOMAIN_STATE(uintnat, dependent_allocated) +/* How much external memory is currenty held by the minor and major heap. */ +DOMAIN_STATE(uintnat, minor_dependent_bsz) /* How much work needs to be done (by all domains) before we stop this slice. */ DOMAIN_STATE(intnat, slice_target) @@ -165,6 +166,9 @@ DOMAIN_STATE(struct caml_intern_state*, intern_state) DOMAIN_STATE(uintnat, stat_minor_words) DOMAIN_STATE(uintnat, stat_promoted_words) DOMAIN_STATE(uintnat, stat_major_words) +DOMAIN_STATE(uintnat, stat_minor_dependent_bytes) +DOMAIN_STATE(uintnat, stat_promoted_dependent_bytes) +DOMAIN_STATE(uintnat, stat_major_dependent_bytes) DOMAIN_STATE(intnat, stat_forced_major_collections) DOMAIN_STATE(uintnat, stat_blocks_marked) diff --git a/runtime/caml/gc_stats.h b/runtime/caml/gc_stats.h index 8aff709b633..b3e33c334aa 100644 --- a/runtime/caml/gc_stats.h +++ b/runtime/caml/gc_stats.h @@ -41,6 +41,7 @@ struct heap_stats { intnat large_words; intnat large_max_words; intnat large_blocks; + intnat dependent_bytes; }; /* Note: accumulating stats then removing them is not a no-op, as @@ -56,6 +57,9 @@ struct alloc_stats { uint64_t minor_words; uint64_t promoted_words; uint64_t major_words; + uint64_t minor_dependent_bytes; + uint64_t promoted_dependent_bytes; + uint64_t major_dependent_bytes; uint64_t forced_major_collections; }; void caml_accum_alloc_stats( diff --git a/runtime/caml/minor_gc.h b/runtime/caml/minor_gc.h index 2d7bf42bab4..ad99ff2d8c5 100644 --- a/runtime/caml/minor_gc.h +++ b/runtime/caml/minor_gc.h @@ -57,15 +57,20 @@ struct caml_ephe_ref_table CAML_TABLE_STRUCT(struct caml_ephe_ref_elt); struct caml_custom_elt { value block; /* The finalized block in the minor heap. */ - mlsize_t mem; /* The parameters for adjusting GC speed. */ - mlsize_t max; }; struct caml_custom_table CAML_TABLE_STRUCT(struct caml_custom_elt); +struct caml_dependent_elt { + value block; /* The finalized block in the minor heap. */ + mlsize_t mem; /* The size in bytes of the dependent memory. */ +}; +struct caml_dependent_table CAML_TABLE_STRUCT(struct caml_dependent_elt); + struct caml_minor_tables { struct caml_ref_table major_ref; struct caml_ephe_ref_table ephe_ref; struct caml_custom_table custom; + struct caml_dependent_table dependent; }; CAMLextern void caml_minor_collection (void); @@ -85,6 +90,7 @@ void caml_alloc_table (struct caml_ref_table *tbl, asize_t sz, asize_t rsv); extern void caml_realloc_ref_table (struct caml_ref_table *); extern void caml_realloc_ephe_ref_table (struct caml_ephe_ref_table *); extern void caml_realloc_custom_table (struct caml_custom_table *); +extern void caml_realloc_dependent_table (struct caml_dependent_table *); struct caml_minor_tables* caml_alloc_minor_tables(void); void caml_free_minor_tables(struct caml_minor_tables*); void caml_empty_minor_heap_setup(caml_domain_state* domain, void*); @@ -117,8 +123,7 @@ Caml_inline void add_to_ephe_ref_table (struct caml_ephe_ref_table *tbl, CAMLassert(ephe_ref->offset < Wosize_val(ephe_ref->ephe)); } -Caml_inline void add_to_custom_table (struct caml_custom_table *tbl, value v, - mlsize_t mem, mlsize_t max) +Caml_inline void add_to_custom_table (struct caml_custom_table *tbl, value v) { struct caml_custom_elt *elt; if (tbl->ptr >= tbl->limit){ @@ -127,8 +132,20 @@ Caml_inline void add_to_custom_table (struct caml_custom_table *tbl, value v, } elt = tbl->ptr++; elt->block = v; +} + +Caml_inline void add_to_dependent_table (struct caml_dependent_table *tbl, + value v, + mlsize_t mem) +{ + struct caml_dependent_elt *elt; + if (tbl->ptr >= tbl->limit){ + CAMLassert (tbl->ptr == tbl->limit); + caml_realloc_dependent_table (tbl); + } + elt = tbl->ptr++; + elt->block = v; elt->mem = mem; - elt->max = max; } #endif /* CAML_INTERNALS */ diff --git a/runtime/caml/runtime_events.h b/runtime/caml/runtime_events.h index 93996a0330f..b6e36f06c0c 100644 --- a/runtime/caml/runtime_events.h +++ b/runtime/caml/runtime_events.h @@ -118,7 +118,8 @@ typedef enum { EV_COMPACT, EV_COMPACT_EVACUATE, EV_COMPACT_FORWARD, - EV_COMPACT_RELEASE + EV_COMPACT_RELEASE, + EV_MINOR_DEPENDENT, } ev_runtime_phase; typedef enum { @@ -139,6 +140,7 @@ typedef enum { EV_C_MAJOR_HEAP_POOL_FRAG_WORDS, EV_C_MAJOR_HEAP_POOL_LIVE_BLOCKS, EV_C_MAJOR_HEAP_LARGE_BLOCKS, + EV_C_REQUEST_MINOR_REALLOC_DEPENDENT_TABLE, } ev_runtime_counter; typedef enum { diff --git a/runtime/caml/shared_heap.h b/runtime/caml/shared_heap.h index f7268b05522..7eb8576fdfd 100644 --- a/runtime/caml/shared_heap.h +++ b/runtime/caml/shared_heap.h @@ -39,6 +39,9 @@ value* caml_shared_try_alloc(struct caml_heap_state*, /* If we were to grow the shared heap, how much would we grow it? */ uintnat caml_shared_heap_grow_bsize(void); +/* Update the dependent_bytes field of the heap stats. */ +void caml_add_dependent_bytes (struct caml_heap_state *local, intnat n); + /* Copy the domain-local heap stats into a heap stats sample. */ void caml_collect_heap_stats_sample( struct caml_heap_state* local, diff --git a/runtime/custom.c b/runtime/custom.c index 25113a24072..55c52fb148a 100644 --- a/runtime/custom.c +++ b/runtime/custom.c @@ -51,22 +51,8 @@ mlsize_t caml_custom_get_max_major (void) * caml_custom_major_ratio; } -/* [mem] is an amount of out-of-heap resources, in the same units as - [max_major] and [max_minor]. When the cumulated amount of such - resources reaches [max_minor] (for resources held by the minor - heap) we do a minor collection; when it reaches [max_major] (for - resources held by the major heap), we guarantee that a major cycle - is done. - - If [max_major] is 0, then [mem] is a number of bytes and the actual - limit is [caml_custom_get_max_major ()] computed at the - time when the custom block is promoted to the major heap. -*/ static value alloc_custom_gen (const struct custom_operations * ops, uintnat bsz, - mlsize_t mem, - mlsize_t max_major, - mlsize_t max_minor, int minor_ok, int local) { @@ -83,21 +69,13 @@ static value alloc_custom_gen (const struct custom_operations * ops, else if (wosize <= Max_young_wosize && minor_ok) { result = caml_alloc_small(wosize, Custom_tag); Custom_ops_val(result) = ops; - if (ops->finalize != NULL || mem != 0) { + if (ops->finalize != NULL) { /* Record the extra resources in case the block gets promoted. */ - add_to_custom_table (&Caml_state->minor_tables->custom, result, - mem, max_major); - /* Keep track of extra resources held by custom block in - minor heap. */ - if (mem != 0) { - caml_adjust_minor_gc_speed (mem, max_minor); - } + add_to_custom_table (&Caml_state->minor_tables->custom, result); } } else { result = caml_alloc_shr(wosize, Custom_tag); Custom_ops_val(result) = ops; - caml_adjust_gc_speed(mem, max_major); - result = caml_check_urgent_gc(result); } CAMLreturn(result); } @@ -108,23 +86,12 @@ Caml_inline mlsize_t get_max_minor (void) Bsize_wsize (Caml_state->minor_heap_wsz) / 100 * caml_custom_minor_ratio; } -static value caml_alloc_custom0(const struct custom_operations * ops, - uintnat bsz, - mlsize_t mem, - mlsize_t max, - int local) -{ - mlsize_t max_major = max; - mlsize_t max_minor = max == 0 ? get_max_minor() : max; - return alloc_custom_gen (ops, bsz, mem, max_major, max_minor, 1, local); -} - CAMLexport value caml_alloc_custom(const struct custom_operations * ops, uintnat bsz, mlsize_t mem, mlsize_t max) { - return caml_alloc_custom0(ops, bsz, mem, max, 0); + return alloc_custom_gen(ops, bsz, /* minor_ok: */ 1, /* local: */ 0); } CAMLexport value caml_alloc_custom_local(const struct custom_operations * ops, @@ -136,7 +103,7 @@ CAMLexport value caml_alloc_custom_local(const struct custom_operations * ops, caml_invalid_argument( "caml_alloc_custom_local: finalizers not supported"); - return caml_alloc_custom0(ops, bsz, mem, max, 1); + return alloc_custom_gen(ops, bsz, /* minor_ok: */ 1, /* local: */ 1); } CAMLexport value caml_alloc_custom_mem(const struct custom_operations * ops, @@ -150,19 +117,30 @@ CAMLexport value caml_alloc_custom_mem(const struct custom_operations * ops, } else { max_minor_single = max_minor * caml_custom_minor_max_bsz / 100; } - value v = alloc_custom_gen (ops, bsz, mem, 0, - max_minor, (mem < max_minor_single), 0); + + value v = alloc_custom_gen (ops, bsz, + /* minor_ok: */ (mem <= max_minor_single), + /* local: */ 0); size_t mem_words = (mem + sizeof(value) - 1) / sizeof(value); caml_memprof_sample_block(v, mem_words, mem_words, CAML_MEMPROF_SRC_CUSTOM); return v; } -CAMLexport value caml_alloc_custom_dep(const struct custom_operations * ops, - uintnat size, mlsize_t mem) +/* For each block allocated with [caml_alloc_custom_dep], + the finalizer must call [caml_free_dependent_memory]. + [bsz] is the size in bytes of the payload inside the heap-allocated + block, and [mem] is the size in bytes of the external memory + held by this block. +*/ +CAMLexport value caml_alloc_custom_dep (const struct custom_operations * ops, + uintnat bsz, + mlsize_t mem) { - /* For now, alias caml_alloc_custom_mem, but this implementation - is to be replaced */ - return caml_alloc_custom_mem(ops, size, mem); + CAMLparam0(); + CAMLlocal1(result); + result = caml_alloc_custom_mem(ops, bsz, mem); + caml_alloc_dependent_memory (result, mem); + CAMLreturn(result); } struct custom_operations_list { diff --git a/runtime/domain.c b/runtime/domain.c index e2380caae27..445e9154550 100644 --- a/runtime/domain.c +++ b/runtime/domain.c @@ -658,11 +658,8 @@ static void domain_create(uintnat initial_minor_heap_wsize, CAMLassert(!interruptor_has_pending(s)); - domain_state->extra_heap_resources = 0.0; - domain_state->extra_heap_resources_minor = 0.0; - - domain_state->dependent_size = 0; - domain_state->dependent_allocated = 0; + domain_state->allocated_dependent_bytes = 0; + domain_state->minor_dependent_bsz = 0; domain_state->major_work_done_between_slices = 0; diff --git a/runtime/gc_stats.c b/runtime/gc_stats.c index 5a09bda4664..eb1d05e6b54 100644 --- a/runtime/gc_stats.c +++ b/runtime/gc_stats.c @@ -38,6 +38,7 @@ void caml_accum_heap_stats(struct heap_stats* acc, const struct heap_stats* h) acc->large_max_words = intnat_max(acc->large_max_words, acc->large_words); acc->large_max_words = intnat_max(acc->large_max_words, h->large_max_words); acc->large_blocks += h->large_blocks; + acc->dependent_bytes += h->dependent_bytes; } void caml_remove_heap_stats(struct heap_stats* acc, const struct heap_stats* h) @@ -48,6 +49,7 @@ void caml_remove_heap_stats(struct heap_stats* acc, const struct heap_stats* h) acc->pool_frag_words -= h->pool_frag_words; acc->large_words -= h->large_words; acc->large_blocks -= h->large_blocks; + acc->dependent_bytes -= h->dependent_bytes; } void caml_accum_alloc_stats( @@ -57,6 +59,9 @@ void caml_accum_alloc_stats( acc->minor_words += s->minor_words; acc->promoted_words += s->promoted_words; acc->major_words += s->major_words; + acc->minor_dependent_bytes += s->minor_dependent_bytes; + acc->promoted_dependent_bytes += s->promoted_dependent_bytes; + acc->major_dependent_bytes += s->major_dependent_bytes; acc->forced_major_collections += s->forced_major_collections; } @@ -67,6 +72,9 @@ void caml_collect_alloc_stats_sample( sample->minor_words = local->stat_minor_words; sample->promoted_words = local->stat_promoted_words; sample->major_words = local->stat_major_words; + sample->minor_dependent_bytes = local->stat_minor_dependent_bytes; + sample->promoted_dependent_bytes = local->stat_promoted_dependent_bytes; + sample->major_dependent_bytes = local->stat_major_dependent_bytes; sample->forced_major_collections = local->stat_forced_major_collections; } @@ -75,6 +83,9 @@ void caml_reset_domain_alloc_stats(caml_domain_state *local) local->stat_minor_words = 0; local->stat_promoted_words = 0; local->stat_major_words = 0; + local->stat_minor_dependent_bytes = 0; + local->stat_promoted_dependent_bytes = 0; + local->stat_major_dependent_bytes = 0; local->stat_forced_major_collections = 0; } diff --git a/runtime/intern.c b/runtime/intern.c index 4db6560e065..81a2b1c367b 100644 --- a/runtime/intern.c +++ b/runtime/intern.c @@ -715,7 +715,7 @@ static void intern_rec(struct caml_intern_state* s, s->intern_obj_table[s->obj_counter++] = v; if (ops->finalize != NULL && Is_young(v)) { /* Remember that the block has a finalizer. */ - add_to_custom_table (&d->minor_tables->custom, v, 0, 1); + add_to_custom_table (&d->minor_tables->custom, v); } break; } diff --git a/runtime/major_gc.c b/runtime/major_gc.c index e528b4120ec..bac7de8e4ae 100644 --- a/runtime/major_gc.c +++ b/runtime/major_gc.c @@ -39,6 +39,7 @@ #include "caml/shared_heap.h" #include "caml/startup_aux.h" #include "caml/weak.h" +#include "caml/custom.h" /* NB the MARK_STACK_INIT_SIZE must be larger than the number of objects that can be in a pool, see POOL_WSIZE */ @@ -664,21 +665,22 @@ static void update_major_slice_work(intnat howmuch, int may_access_gc_phase) { double heap_words; - intnat alloc_work, dependent_work, extra_work, new_work; - intnat my_alloc_count, my_alloc_direct_count, my_dependent_count; + intnat alloc_work, extra_work, new_work; + intnat my_alloc_count, my_alloc_direct_count; double my_extra_count; caml_domain_state *dom_st = Caml_state; uintnat heap_size, heap_sweep_words, total_cycle_work; my_alloc_count = dom_st->allocated_words; my_alloc_direct_count = dom_st->allocated_words_direct; - my_dependent_count = dom_st->dependent_allocated; - my_extra_count = dom_st->extra_heap_resources; + my_extra_count = + (double)dom_st->allocated_dependent_bytes / + (double)caml_custom_get_max_major (); + if (my_extra_count > 1.0) my_extra_count = 1.0; dom_st->stat_major_words += dom_st->allocated_words; dom_st->allocated_words = 0; dom_st->allocated_words_direct = 0; - dom_st->dependent_allocated = 0; - dom_st->extra_heap_resources = 0.0; + dom_st->allocated_dependent_bytes = 0; /* Free memory at the start of the GC cycle (garbage + free list) (assumed): FM = heap_words * caml_percent_free @@ -730,16 +732,6 @@ static void update_major_slice_work(intnat howmuch, alloc_work = 0; } - if (dom_st->dependent_size > 0) { - double dependent_ratio = - total_cycle_work - * (100 + caml_percent_free) - / dom_st-> dependent_size / caml_percent_free; - dependent_work = (intnat) (my_dependent_count * dependent_ratio); - }else{ - dependent_work = 0; - } - extra_work = (intnat) (my_extra_count * (double) total_cycle_work); caml_gc_message (0x40, "heap_words = %" @@ -754,12 +746,6 @@ static void update_major_slice_work(intnat howmuch, caml_gc_message (0x40, "alloc work-to-do = %" ARCH_INTNAT_PRINTF_FORMAT "d\n", alloc_work); - caml_gc_message (0x40, "dependent_words = %" - ARCH_INTNAT_PRINTF_FORMAT "u\n", - my_dependent_count); - caml_gc_message (0x40, "dependent work-to-do = %" - ARCH_INTNAT_PRINTF_FORMAT "d\n", - dependent_work); caml_gc_message (0x40, "extra_heap_resources = %" ARCH_INTNAT_PRINTF_FORMAT "uu\n", (uintnat) (my_extra_count * 1000000)); @@ -767,7 +753,7 @@ static void update_major_slice_work(intnat howmuch, ARCH_INTNAT_PRINTF_FORMAT "d\n", extra_work); - intnat offheap_work = max2 (dependent_work, extra_work); + intnat offheap_work = extra_work; intnat clamp = alloc_work * caml_custom_work_max_multiplier; if (offheap_work > clamp) { caml_gc_message(0x40, "Work clamped to %" @@ -794,7 +780,6 @@ static void update_major_slice_work(intnat howmuch, " %"ARCH_INTNAT_PRINTF_FORMAT "u heap_words, " " %"ARCH_INTNAT_PRINTF_FORMAT "u allocated, " " %"ARCH_INTNAT_PRINTF_FORMAT "d alloc_work, " - " %"ARCH_INTNAT_PRINTF_FORMAT "d dependent_work, " " %"ARCH_INTNAT_PRINTF_FORMAT "d extra_work, " " %"ARCH_INTNAT_PRINTF_FORMAT "u work counter %s, " " %"ARCH_INTNAT_PRINTF_FORMAT "u alloc counter, " @@ -803,7 +788,7 @@ static void update_major_slice_work(intnat howmuch, , caml_gc_phase_char(may_access_gc_phase), (uintnat)heap_words, my_alloc_count, - alloc_work, dependent_work, extra_work, + alloc_work, extra_work, atomic_load (&work_counter), atomic_load (&work_counter) > atomic_load (&alloc_counter) ? "[ahead]" : "[behind]", diff --git a/runtime/memory.c b/runtime/memory.c index 5f78b4d4f9c..473b47cf7f8 100644 --- a/runtime/memory.c +++ b/runtime/memory.c @@ -238,50 +238,47 @@ CAMLexport CAMLweakdef void caml_modify (volatile value *fp, value val) */ CAMLexport void caml_alloc_dependent_memory (value v, mlsize_t nbytes) { - /* No-op for now */ + if (nbytes == 0) return; + CAMLassert (Is_block (v)); + if (Is_young (v)){ + Caml_state->stat_minor_dependent_bytes += nbytes; + add_to_dependent_table (&Caml_state->minor_tables->dependent, v, nbytes); + Caml_state->minor_dependent_bsz += nbytes; + if (Caml_state->minor_dependent_bsz > + Bsize_wsize (Caml_state->minor_heap_wsz) + / 100 * caml_custom_minor_ratio){ + caml_request_minor_gc (); + } + }else{ + caml_add_dependent_bytes (Caml_state->shared_heap, nbytes); + Caml_state->allocated_dependent_bytes += nbytes; + /* FIXME sdolan: what's the right condition here? */ + if (Caml_state->allocated_dependent_bytes + >= caml_custom_get_max_major() / 5){ + CAML_EV_COUNTER (EV_C_REQUEST_MAJOR_ALLOC_SHR, 1); + caml_request_major_slice(1); + } + } } CAMLexport void caml_free_dependent_memory (value v, mlsize_t nbytes) { - /* No-op for now */ + CAMLassert (Is_block (v)); + if (Is_young (v)){ + Caml_state->minor_dependent_bsz -= nbytes; + }else{ + caml_add_dependent_bytes (Caml_state->shared_heap, -nbytes); + } } -/* Use this function to tell the major GC to speed up when you use - finalized blocks to automatically deallocate resources (other - than memory). The GC will do at least one cycle every [max] - allocated resources; [res] is the number of resources allocated - this time. - Note that only [res/max] is relevant. The units (and kind of - resource) can change between calls to [caml_adjust_gc_speed]. - - If [max] = 0, then we use a number proportional to the major heap - size and [caml_custom_major_ratio]. In this case, [mem] should - be a number of bytes and the trade-off between GC work and space - overhead is under the control of the user through - [caml_custom_major_ratio]. -*/ CAMLexport void caml_adjust_gc_speed (mlsize_t res, mlsize_t max) { - if (max == 0) max = caml_custom_get_max_major (); - if (res > max) res = max; - Caml_state->extra_heap_resources += (double) res / (double) max; - if (Caml_state->extra_heap_resources > 0.2){ - CAML_EV_COUNTER (EV_C_REQUEST_MAJOR_ADJUST_GC_SPEED, 1); - caml_request_major_slice (1); - } + /* No-op, present only for compatibility */ } -/* This function is analogous to [caml_adjust_gc_speed]. When the - accumulated sum of [res/max] values reaches 1, a minor GC is - triggered. -*/ CAMLexport void caml_adjust_minor_gc_speed (mlsize_t res, mlsize_t max) { - if (max == 0) max = 1; - Caml_state->extra_heap_resources_minor += (double) res / (double) max; - if (Caml_state->extra_heap_resources_minor > 1.0) { - caml_request_minor_gc (); - } + /* No-op, present only for compatibility */ } /* You must use [caml_intialize] to store the initial value in a field of a diff --git a/runtime/minor_gc.c b/runtime/minor_gc.c index 33177aa8206..7deff7aa642 100644 --- a/runtime/minor_gc.c +++ b/runtime/minor_gc.c @@ -114,6 +114,7 @@ static void reset_minor_tables(struct caml_minor_tables* r) reset_table((struct generic_table *)&r->major_ref); reset_table((struct generic_table *)&r->ephe_ref); reset_table((struct generic_table *)&r->custom); + reset_table((struct generic_table *)&r->dependent); } void caml_free_minor_tables(struct caml_minor_tables* r) @@ -519,8 +520,11 @@ void caml_empty_minor_heap_domain_clear(caml_domain_state* domain) clear_table ((struct generic_table *)&minor_tables->custom, sizeof(struct caml_custom_elt), "custom"); + clear_table ((struct generic_table *)&minor_tables->dependent, + sizeof(struct caml_dependent_elt), + "dependent"); - domain->extra_heap_resources_minor = 0.0; + domain->minor_dependent_bsz = 0; } /* Try to do a major slice, returns nonzero if there was any work available, @@ -769,9 +773,7 @@ static void custom_finalize_minor (caml_domain_state * domain) elt < domain->minor_tables->custom.ptr; elt++) { value *v = &elt->block; if (Is_block(*v) && Is_young(*v)) { - if (get_header_val(*v) == 0) { /* value copied to major heap */ - caml_adjust_gc_speed(elt->mem, elt->max); - } else { + if (get_header_val(*v) != 0) { /* value not copied to major heap */ void (*final_fun)(value) = Custom_ops_val(*v)->finalize; if (final_fun != NULL) final_fun(*v); } @@ -779,6 +781,31 @@ static void custom_finalize_minor (caml_domain_state * domain) } } +static void dependent_finalize_minor (caml_domain_state *domain) +{ + struct caml_dependent_elt *elt; + for (elt = domain->minor_tables->dependent.base; + elt < domain->minor_tables->dependent.ptr; elt++) { + value *v = &elt->block; + CAMLassert (Is_block (*v)); + if (Is_young(*v)) { + if (get_header_val(*v) == 0) { /* value copied to major heap */ +#ifdef DEBUG + domain->minor_dependent_bsz -= elt->mem; + /* see assertion below */ +#endif + /* inlined version of [caml_alloc_dependent_memory] */ + domain->allocated_dependent_bytes += elt->mem; + domain->stat_promoted_dependent_bytes += elt->mem; + caml_add_dependent_bytes (domain->shared_heap, elt->mem); + } + } + } + /* At this point, everything must be finalized or promoted. */ + CAMLassert (domain->minor_dependent_bsz == 0); + domain->minor_dependent_bsz = 0; +} + /* Increment the counter non-atomically, when it is already known that this thread is alone in trying to increment it. */ static void nonatomic_increment_counter(atomic_uintnat* counter) { @@ -881,6 +908,11 @@ caml_stw_empty_minor_heap_no_major_slice(caml_domain_state* domain, custom_finalize_minor(domain); CAML_EV_END(EV_MINOR_FINALIZED); + CAML_EV_BEGIN(EV_MINOR_DEPENDENT); + caml_gc_log("accounting for minor blocks with dependent memory"); + dependent_finalize_minor(domain); + CAML_EV_END(EV_MINOR_DEPENDENT); + CAML_EV_BEGIN(EV_MINOR_FINALIZERS_ADMIN); caml_gc_log("running finalizer data structure book-keeping"); caml_final_update_last_minor(domain); @@ -1111,3 +1143,13 @@ void caml_realloc_custom_table (struct caml_custom_table *tbl) "Growing custom_table to %" ARCH_INTNAT_PRINTF_FORMAT "dk bytes\n", "custom_table overflow"); } + +void caml_realloc_dependent_table (struct caml_dependent_table *tbl) +{ + realloc_generic_table + ((struct generic_table *) tbl, sizeof (struct caml_dependent_elt), + EV_C_REQUEST_MINOR_REALLOC_DEPENDENT_TABLE, + "dependent_table threshold crossed\n", + "Growing dependent_table to %" ARCH_INTNAT_PRINTF_FORMAT "dk bytes\n", + "dependent_table overflow"); +} diff --git a/runtime/shared_heap.c b/runtime/shared_heap.c index b973556de06..97b1efcdd22 100644 --- a/runtime/shared_heap.c +++ b/runtime/shared_heap.c @@ -760,6 +760,11 @@ static void adopt_all_pool_stats_with_lock(struct caml_heap_state *adopter) { memset(&pool_freelist.stats, 0, sizeof(pool_freelist.stats)); } +void caml_add_dependent_bytes (struct caml_heap_state *local, intnat n) +{ + local->stats.dependent_bytes += n; +} + void caml_collect_heap_stats_sample( struct caml_heap_state* local, struct heap_stats* sample)