Skip to content

Commit

Permalink
8325553: Parallel: Use per-marker cache for marking stats during Full GC
Browse files Browse the repository at this point in the history
  • Loading branch information
zhengyu.gu committed Jul 11, 2024
1 parent f9acc53 commit 722586a
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 48 deletions.
2 changes: 2 additions & 0 deletions src/hotspot/share/gc/parallel/psCompactionManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ ParCompactionManager::ParCompactionManager() {
_region_stack.initialize();

reset_bitmap_query_cache();

_marking_stats_cache = NULL;
}

void ParCompactionManager::initialize(ParMarkBitMap* mbm) {
Expand Down
34 changes: 33 additions & 1 deletion src/hotspot/share/gc/parallel/psCompactionManager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,35 @@ class ParCompactionManager : public CHeapObj<mtGC> {
// Do not implement an equivalent stack_pop. Deal with the
// marking stack and overflow stack directly.

public:
// To collect per-region live-words in a worker local cache in order to
// reduce threads contention.
class MarkingStatsCache : public CHeapObj<mtGC> {
constexpr static size_t num_entries = 1024;
static_assert(is_power_of_2(num_entries), "inv");
static_assert(num_entries > 0, "inv");

constexpr static size_t entry_mask = num_entries - 1;

struct CacheEntry {
size_t region_id;
size_t live_words;
};

CacheEntry entries[num_entries] = {};

inline void push(size_t region_id, size_t live_words);

public:
inline void push(oop obj, size_t live_words);

inline void evict(size_t index);

inline void evict_all();
};

MarkingStatsCache* _marking_stats_cache;

public:
static const size_t InvalidShadow = ~0;
static size_t pop_shadow_region_mt_safe(PSParallelCompact::RegionData* region_ptr);
static void push_shadow_region_mt_safe(size_t shadow_region);
Expand Down Expand Up @@ -195,6 +223,10 @@ class ParCompactionManager : public CHeapObj<mtGC> {
virtual void do_void();
};

inline void create_marking_stats_cache();

inline void flush_and_destroy_marking_stats_cache();

// Called after marking.
static void verify_all_marking_stack_empty() NOT_DEBUG_RETURN;

Expand Down
71 changes: 71 additions & 0 deletions src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ inline void ParCompactionManager::mark_and_push(T* p) {
assert(ParallelScavengeHeap::heap()->is_in(obj), "should be in heap");

if (mark_bitmap()->is_unmarked(obj) && PSParallelCompact::mark_obj(obj)) {
assert(_marking_stats_cache != nullptr, "inv");
_marking_stats_cache->push(obj, obj->size());
push(obj);
}
}
Expand Down Expand Up @@ -176,4 +178,73 @@ inline void ParCompactionManager::follow_contents(oop obj) {
}
}

inline void ParCompactionManager::MarkingStatsCache::push(size_t region_id, size_t live_words) {
size_t index = (region_id & entry_mask);
if (entries[index].region_id == region_id) {
// Hit
entries[index].live_words += live_words;
return;
}
// Miss
if (entries[index].live_words != 0) {
evict(index);
}
entries[index].region_id = region_id;
entries[index].live_words = live_words;
}

inline void ParCompactionManager::MarkingStatsCache::push(oop obj, size_t live_words) {
ParallelCompactData& data = PSParallelCompact::summary_data();
const size_t region_size = ParallelCompactData::RegionSize;

HeapWord* addr = cast_from_oop<HeapWord*>(obj);
const size_t start_region_id = data.addr_to_region_idx(addr);
const size_t end_region_id = data.addr_to_region_idx(addr + live_words - 1);
if (start_region_id == end_region_id) {
// Completely inside this region
push(start_region_id, live_words);
return;
}

// First region
push(start_region_id, region_size - data.region_offset(addr));

// Middle regions; bypass cache
for (size_t i = start_region_id + 1; i < end_region_id; ++i) {
data.region(i)->set_partial_obj_size(region_size);
data.region(i)->set_partial_obj_addr(addr);
}

// Last region; bypass cache
const size_t end_offset = data.region_offset(addr + live_words - 1);
data.region(end_region_id)->set_partial_obj_size(end_offset + 1);
data.region(end_region_id)->set_partial_obj_addr(addr);
}

inline void ParCompactionManager::MarkingStatsCache::evict(size_t index) {
ParallelCompactData& data = PSParallelCompact::summary_data();
// flush to global data
data.region(entries[index].region_id)->add_live_obj(entries[index].live_words);
}

inline void ParCompactionManager::MarkingStatsCache::evict_all() {
for (size_t i = 0; i < num_entries; ++i) {
if (entries[i].live_words != 0) {
evict(i);
entries[i].live_words = 0;
}
}
}

inline void ParCompactionManager::create_marking_stats_cache() {
assert(_marking_stats_cache == nullptr, "precondition");
_marking_stats_cache = new MarkingStatsCache();
}

inline void ParCompactionManager::flush_and_destroy_marking_stats_cache() {
_marking_stats_cache->evict_all();
delete _marking_stats_cache;
_marking_stats_cache = nullptr;
}

#endif // SHARE_GC_PARALLEL_PSCOMPACTIONMANAGER_INLINE_HPP
70 changes: 27 additions & 43 deletions src/hotspot/share/gc/parallel/psParallelCompact.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1792,7 +1792,7 @@ bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
bool marked_for_unloading = false;

marking_start.update();
marking_phase(vmthread_cm, maximum_heap_compaction, &_gc_tracer);
marking_phase(&_gc_tracer);

bool max_on_system_gc = UseMaximumCompactionOnSystemGC
&& GCCause::is_user_requested_gc(gc_cause);
Expand Down Expand Up @@ -1966,36 +1966,6 @@ class PCAddThreadRootsMarkingTaskClosure : public ThreadClosure {
}
};

static void mark_from_roots_work(ParallelRootType::Value root_type, uint worker_id) {
assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc");

ParCompactionManager* cm =
ParCompactionManager::gc_thread_compaction_manager(worker_id);
PCMarkAndPushClosure mark_and_push_closure(cm);

switch (root_type) {
case ParallelRootType::class_loader_data:
{
CLDToOopClosure cld_closure(&mark_and_push_closure, ClassLoaderData::_claim_strong);
ClassLoaderDataGraph::always_strong_cld_do(&cld_closure);
}
break;

case ParallelRootType::code_cache:
// Do not treat nmethods as strong roots for mark/sweep, since we can unload them.
//ScavengableNMethods::scavengable_nmethods_do(CodeBlobToOopClosure(&mark_and_push_closure));
break;

case ParallelRootType::sentinel:
DEBUG_ONLY(default:) // DEBUG_ONLY hack will create compile error on release builds (-Wswitch) and runtime check on debug builds
fatal("Bad enumeration value: %u", root_type);
break;
}

// Do the real work
cm->follow_marking_stacks();
}

void steal_marking_work(TaskTerminator& terminator, uint worker_id) {
assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc");

Expand All @@ -2019,32 +1989,35 @@ void steal_marking_work(TaskTerminator& terminator, uint worker_id) {
class MarkFromRootsTask : public AbstractGangTask {
StrongRootsScope _strong_roots_scope; // needed for Threads::possibly_parallel_threads_do
OopStorageSetStrongParState<false /* concurrent */, false /* is_const */> _oop_storage_set_par_state;
SequentialSubTasksDone _subtasks;
TaskTerminator _terminator;
uint _active_workers;

public:
MarkFromRootsTask(uint active_workers) :
AbstractGangTask("MarkFromRootsTask"),
_strong_roots_scope(active_workers),
_subtasks(ParallelRootType::sentinel),
_terminator(active_workers, ParCompactionManager::oop_task_queues()),
_active_workers(active_workers) {
}
_active_workers(active_workers) {}

virtual void work(uint worker_id) {
for (uint task = 0; _subtasks.try_claim_task(task); /*empty*/ ) {
mark_from_roots_work(static_cast<ParallelRootType::Value>(task), worker_id);
ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(worker_id);
cm->create_marking_stats_cache();
PCMarkAndPushClosure mark_and_push_closure(cm);

{
CLDToOopClosure cld_closure(&mark_and_push_closure, ClassLoaderData::_claim_strong);
ClassLoaderDataGraph::always_strong_cld_do(&cld_closure);

// Do the real work
cm->follow_marking_stacks();
}

PCAddThreadRootsMarkingTaskClosure closure(worker_id);
Threads::possibly_parallel_threads_do(true /*parallel */, &closure);

// Mark from OopStorages
{
ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(worker_id);
PCMarkAndPushClosure closure(cm);
_oop_storage_set_par_state.oops_do(&closure);
_oop_storage_set_par_state.oops_do(&mark_and_push_closure);
// Do the real work
cm->follow_marking_stacks();
}
Expand Down Expand Up @@ -2077,9 +2050,14 @@ class ParallelCompactRefProcProxyTask : public RefProcProxyTask {
}
};

void PSParallelCompact::marking_phase(ParCompactionManager* cm,
bool maximum_heap_compaction,
ParallelOldTracer *gc_tracer) {
static void flush_marking_stats_cache(const uint num_workers) {
for (uint i = 0; i < num_workers; ++i) {
ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(i);
cm->flush_and_destroy_marking_stats_cache();
}
}

void PSParallelCompact::marking_phase(ParallelOldTracer *gc_tracer) {
// Recursively traverse all live objects and mark them
GCTraceTime(Info, gc, phases) tm("Marking Phase", &_gc_timer);

Expand Down Expand Up @@ -2110,6 +2088,12 @@ void PSParallelCompact::marking_phase(ParCompactionManager* cm,
pt.print_all_references();
}

{
GCTraceTime(Debug, gc, phases) tm("Flush Marking Stats", &_gc_timer);

flush_marking_stats_cache(active_gc_threads);
}

// This is the point where the entire marking should have completed.
ParCompactionManager::verify_all_marking_stack_empty();

Expand Down
4 changes: 1 addition & 3 deletions src/hotspot/share/gc/parallel/psParallelCompact.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1044,9 +1044,7 @@ class PSParallelCompact : AllStatic {
static void post_compact();

// Mark live objects
static void marking_phase(ParCompactionManager* cm,
bool maximum_heap_compaction,
ParallelOldTracer *gc_tracer);
static void marking_phase(ParallelOldTracer *gc_tracer);

// Compute the dense prefix for the designated space. This is an experimental
// implementation currently not used in production.
Expand Down
1 change: 0 additions & 1 deletion src/hotspot/share/gc/parallel/psParallelCompact.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ inline void PSParallelCompact::check_new_location(HeapWord* old_addr, HeapWord*
inline bool PSParallelCompact::mark_obj(oop obj) {
const int obj_size = obj->size();
if (mark_bitmap()->mark_obj(obj, obj_size)) {
_summary_data.add_obj(obj, obj_size);
return true;
} else {
return false;
Expand Down

0 comments on commit 722586a

Please sign in to comment.