diff -r 9c3642cc96c2 src/share/vm/gc/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc/g1/g1CollectedHeap.cpp Thu Jan 28 10:18:45 2016 +0100 +++ b/src/share/vm/gc/g1/g1CollectedHeap.cpp Thu Jan 28 17:38:52 2016 +0100 @@ -4728,7 +4728,8 @@ void G1CollectedHeap::free_region(HeapRegion* hr, FreeRegionList* free_list, - bool par, + bool keep_remset, + bool keep_hot_card_cache, bool locked) { assert(!hr->is_free(), "the region should not be free"); assert(!hr->is_empty(), "the region should not be empty"); @@ -4743,20 +4744,20 @@ // Clear the card counts for this region. // Note: we only need to do this if the region is not young // (since we don't refine cards in young regions). - if (!hr->is_young()) { + if (!keep_hot_card_cache && !hr->is_young()) { _cg1r->hot_card_cache()->reset_card_counts(hr); } - hr->hr_clear(par, true /* clear_space */, locked /* locked */); + hr->hr_clear(keep_remset, true /* clear_space */, locked /* locked */); free_list->add_ordered(hr); } void G1CollectedHeap::free_humongous_region(HeapRegion* hr, FreeRegionList* free_list, - bool par) { + bool keep_remset) { assert(hr->is_humongous(), "this is only for humongous regions"); assert(free_list != NULL, "pre-condition"); hr->clear_humongous(); - free_region(hr, free_list, par); + free_region(hr, free_list, keep_remset); } void G1CollectedHeap::remove_from_old_sets(const uint old_regions_removed, @@ -4851,7 +4852,238 @@ g1_policy()->phase_times()->record_clear_ct_time(elapsed * 1000.0); } +class G1FreeCollectionSetTask : public AbstractGangTask { + private: + volatile size_t _young_work_claim_idx; + char _young_work_claim_idx_padding[DEFAULT_CACHE_LINE_SIZE - sizeof(size_t)]; + + volatile size_t _old_work_claim_idx; + char _old_work_claim_idx_padding[DEFAULT_CACHE_LINE_SIZE - sizeof(size_t)]; + volatile size_t _rs_lengths; + char _rs_lengths_padding[DEFAULT_CACHE_LINE_SIZE - sizeof(size_t)]; + + jbyte volatile _serial_work_claim; + char _serial_work_claim_padding[DEFAULT_CACHE_LINE_SIZE - sizeof(jbyte)]; + + G1CollectedHeap* _g1h; + + struct work_info_t { + uint _region_idx; + bool _evac_failed; + + work_info_t(HeapRegion* hr) { + _region_idx = hr->hrm_index(); + _evac_failed = hr->evacuation_failed(); + } + }; + + work_info_t* _young_work; + size_t _young_work_length; + work_info_t* _old_work; + size_t _old_work_length; + + uint _regions_freed; + size_t _used_after; + + FreeRegionList _local_old_set; + + FreeRegionList _local_free_list; + size_t _pre_used; + EvacuationInfo* _evacuation_info; + + const size_t* _surviving_young_words; + + HeapRegion* _collection_set_head; + + double do_serial_work() { + double start_time = os::elapsedTime(); + + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + // Since the collection set is a superset of the the young list, + // all we need to do to clear the young list is clear its + // head and length, and unlink any young regions in the code below + g1h->_young_list->clear(); + HeapRegion* cur = _collection_set_head; + int age_bound = -1; + + while (cur != NULL) { + + HeapRegion* next = cur->next_in_collection_set(); + assert(cur->in_collection_set(), "bad CS"); + cur->set_next_in_collection_set(NULL); + g1h->clear_in_cset(cur); + + if (cur->is_young()) { + int index = cur->young_index_in_cset(); + assert(index != -1, "invariant"); + assert((uint) index < g1h->g1_policy()->young_cset_region_length(), "invariant"); + size_t words_survived = _surviving_young_words[index]; + cur->record_surv_words_in_group(words_survived); + + // At this point the we have 'popped' cur from the collection set + // (linked via next_in_collection_set()) but it is still in the + // young list (linked via next_young_region()). Clear the + // _next_young_region field. + cur->set_next_young_region(NULL); + } else { + int index = cur->young_index_in_cset(); + assert(index == -1, "invariant"); + } + + assert( (cur->is_young() && cur->young_index_in_cset() > -1) || + (!cur->is_young() && cur->young_index_in_cset() == -1), + "invariant" ); + + if (!cur->evacuation_failed()) { + MemRegion used_mr = cur->used_region(); + + // And the region is empty. + assert(!used_mr.is_empty(), "Should not have empty regions in a CS."); + _pre_used += cur->used(); + + cur->rem_set()->reset_for_par_iteration(); + g1h->free_region(cur, + &_local_free_list, + true, /* keep_remset */ + true, /* keep_hot_card_cache */ + true /* locked */); + } else { + cur->uninstall_surv_rate_group(); + if (cur->is_young()) { + cur->set_young_index_in_cset(-1); + } + cur->set_old(); + cur->set_evacuation_failed(false); + // The region is now considered to be old. + _local_old_set.add_ordered(cur); + _used_after += cur->used(); + } + cur = next; + } + return (os::elapsedTime() - start_time); + } + + void do_parallel_work_for_region(work_info_t t, bool is_young, uint worker_id) { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + HeapRegion* cur = g1h->region_at(t._region_idx); + Atomic::add(cur->rem_set()->occupied_locked(), &_rs_lengths); + if (!t._evac_failed) { + cur->rem_set()->clear_locked(); + } + if (!is_young) { + g1h->concurrent_g1_refine()->hot_card_cache()->reset_card_counts(cur); + } + } + + double claim_work(volatile size_t* claim_idx, work_info_t* work, size_t max_length, bool young, uint worker_id) { + const size_t claim_size = 1; + + double start_time = os::elapsedTime(); + for (;;) { + size_t start_idx = MAX2(Atomic::add(claim_size, claim_idx) - claim_size, (size_t)0); + if (start_idx > max_length) { + break; + } + size_t end_idx = MIN2(start_idx + claim_size, max_length); + // Now that we have determined the work unit, do the work. + for (size_t i = start_idx; i < end_idx; i++) { + do_parallel_work_for_region(work[i], young, worker_id); + } + } + return (os::elapsedTime() - start_time); + } + + void do_parallel_work(uint worker_id) { + G1GCPhaseTimes* timers = G1CollectedHeap::heap()->g1_policy()->phase_times(); + + double young_time = claim_work(&_young_work_claim_idx, _young_work, _young_work_length, true, worker_id); + timers->record_time_secs(G1GCPhaseTimes::YoungFreeCSet, worker_id, young_time); + + double old_time = claim_work(&_old_work_claim_idx, _old_work, _old_work_length, false, worker_id); + timers->record_time_secs(G1GCPhaseTimes::NonYoungFreeCSet, worker_id, old_time); + } + + void setup_for_parallel_processing() { + G1CollectorPolicy* policy = _g1h->g1_policy(); + + _young_work_length = policy->young_cset_region_length(); + _young_work = (work_info_t*)NEW_C_HEAP_ARRAY(work_info_t, _young_work_length, mtGC); + + _old_work_length = policy->old_cset_region_length(); + _old_work = (work_info_t*)NEW_C_HEAP_ARRAY(work_info_t, _old_work_length, mtGC); + + size_t young_work_idx = 0; + size_t old_work_idx = 0; + HeapRegion* collection_set_cur = _collection_set_head; + while (collection_set_cur != NULL) { + work_info_t next(collection_set_cur); + if (collection_set_cur->is_young()) { + _young_work[young_work_idx++] = next; + assert(young_work_idx <= _young_work_length, + "Tried to add more work than there is space in young region work index"); + } else { + _old_work[old_work_idx++] = next; + assert(old_work_idx <= _old_work_length, + "Tried to add more work than there is space in old region work index"); + } + collection_set_cur = collection_set_cur->next_in_collection_set(); + } + } + + public: + + G1FreeCollectionSetTask(G1CollectedHeap* g1h, HeapRegion* collection_set_head, + EvacuationInfo* evacuation_info, const size_t* surviving_young_words) : + AbstractGangTask("G1 free collection set"), _g1h(g1h), + _young_work_claim_idx(0), _old_work_claim_idx(0), + _young_work_length(0), _old_work_length(0), + _young_work(NULL), _old_work(NULL), + _rs_lengths(0), _collection_set_head(collection_set_head), + _local_free_list("Local List for CSet Freeing"), _pre_used(0), + _serial_work_claim(0), _surviving_young_words(surviving_young_words), + _evacuation_info(evacuation_info), _local_old_set("Local Cleanup Old Proxy Set", false, false) { + setup_for_parallel_processing(); + } + + ~G1FreeCollectionSetTask() { + if (_young_work != NULL) { + FREE_C_HEAP_ARRAY(work_info_t, _young_work); + } + if (_old_work != NULL) { + FREE_C_HEAP_ARRAY(work_info_t, _old_work); + } + } + + void work(uint worker_id) { + if ((_serial_work_claim == 0) && (Atomic::cmpxchg(1, &_serial_work_claim, 0) == 0)) { + G1GCPhaseTimes* timers = G1CollectedHeap::heap()->g1_policy()->phase_times(); + double serial_time = do_serial_work(); + timers->record_serial_free_cset_time_ms(serial_time); + } + do_parallel_work(worker_id); + } + + // Performs work after all potential parallel processing has been performed. + void complete_work() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + while (!_local_old_set.is_empty()) { + HeapRegion* hr = _local_old_set.remove_region(true); + g1h->_old_set.add(hr); + } + g1h->prepend_to_freelist(&_local_free_list); + g1h->decrement_summary_bytes(_pre_used); + + _evacuation_info->increment_collectionset_used_after(_used_after); + _evacuation_info->set_regions_freed(_regions_freed); + + g1h->g1_policy()->cset_regions_freed(); + g1h->g1_policy()->record_max_rs_lengths(_rs_lengths); + } +}; + void G1CollectedHeap::free_collection_set(HeapRegion* cs_head, EvacuationInfo& evacuation_info, const size_t* surviving_young_words) { +/* size_t pre_used = 0; FreeRegionList local_free_list("Local List for CSet Freeing"); @@ -4928,7 +5160,9 @@ // And the region is empty. assert(!used_mr.is_empty(), "Should not have empty regions in a CS."); pre_used += cur->used(); - free_region(cur, &local_free_list, false /* par */, true /* locked */); + free_region(cur, &local_free_list, false // par + , true // locked + ); } else { cur->uninstall_surv_rate_group(); if (cur->is_young()) { @@ -4973,6 +5207,26 @@ decrement_summary_bytes(pre_used); policy->phase_times()->record_young_free_cset_time_ms(young_time_ms); policy->phase_times()->record_non_young_free_cset_time_ms(non_young_time_ms); +*/ + double free_cset_start_time = os::elapsedTime(); + + G1FreeCollectionSetTask t(this, cs_head, &evacuation_info, surviving_young_words); + + uint old_worker_num = workers()->active_workers(); + if (!FLAG_IS_DEFAULT(G1ParallelCSetThreads)) { + workers()->set_active_workers(MIN2(workers()->total_workers(), G1ParallelCSetThreads)); + } + + log_debug(gc)("Using %u workers to free cset ", workers()->active_workers()); + workers()->run_task(&t); + if (!FLAG_IS_DEFAULT(G1ParallelCSetThreads)) { + workers()->set_active_workers(old_worker_num); + } + + t.complete_work(); + + double free_cset_end_time = os::elapsedTime(); + g1_policy()->phase_times()->record_total_free_cset_time_ms((free_cset_end_time - free_cset_start_time) * 1000.0); } class G1FreeHumongousRegionClosure : public HeapRegionClosure { diff -r 9c3642cc96c2 src/share/vm/gc/g1/g1CollectedHeap.hpp --- a/src/share/vm/gc/g1/g1CollectedHeap.hpp Thu Jan 28 10:18:45 2016 +0100 +++ b/src/share/vm/gc/g1/g1CollectedHeap.hpp Thu Jan 28 17:38:52 2016 +0100 @@ -112,6 +112,7 @@ }; class G1CollectedHeap : public CollectedHeap { + friend class G1FreeCollectionSetTask; friend class VM_CollectForMetadataAllocation; friend class VM_G1CollectForAllocation; friend class VM_G1CollectFull; @@ -631,13 +632,15 @@ // adding it to the free list that's passed as a parameter (this is // usually a local list which will be appended to the master free // list later). The used bytes of freed regions are accumulated in - // pre_used. If par is true, the region's RSet will not be freed - // up. The assumption is that this will be done later. + // pre_used. If keep_remset is true, the region's RSet will not be freed + // up. If keep_hot_card_cache is true, the region's hot card cache will not + // be freed up. The assumption is that this will be done later. // The locked parameter indicates if the caller has already taken // care of proper synchronization. This may allow some optimizations. void free_region(HeapRegion* hr, FreeRegionList* free_list, - bool par, + bool keep_remset, + bool keep_hot_card_cache = false, bool locked = false); // It dirties the cards that cover the block so that the post @@ -651,11 +654,11 @@ // will be added to the free list that's passed as a parameter (this // is usually a local list which will be appended to the master free // list later). The used bytes of freed regions are accumulated in - // pre_used. If par is true, the region's RSet will not be freed + // pre_used. If keep_remset is true, the region's RSet will not be freed // up. The assumption is that this will be done later. void free_humongous_region(HeapRegion* hr, FreeRegionList* free_list, - bool par); + bool keep_remset); // Facility for allocating in 'archive' regions in high heap memory and // recording the allocated ranges. These should all be called from the diff -r 9c3642cc96c2 src/share/vm/gc/g1/g1CollectorPolicy.cpp --- a/src/share/vm/gc/g1/g1CollectorPolicy.cpp Thu Jan 28 10:18:45 2016 +0100 +++ b/src/share/vm/gc/g1/g1CollectorPolicy.cpp Thu Jan 28 17:38:52 2016 +0100 @@ -937,13 +937,12 @@ double G1CollectorPolicy::young_other_time_ms() const { return phase_times()->young_cset_choice_time_ms() + - phase_times()->young_free_cset_time_ms(); + phase_times()->total_free_cset_time_ms(); } double G1CollectorPolicy::non_young_other_time_ms() const { return phase_times()->non_young_cset_choice_time_ms() + - phase_times()->non_young_free_cset_time_ms(); - + phase_times()->average_time_ms(G1GCPhaseTimes::NonYoungFreeCSet); } double G1CollectorPolicy::other_time_ms(double pause_time_ms) const { diff -r 9c3642cc96c2 src/share/vm/gc/g1/g1CollectorPolicy.hpp --- a/src/share/vm/gc/g1/g1CollectorPolicy.hpp Thu Jan 28 10:18:45 2016 +0100 +++ b/src/share/vm/gc/g1/g1CollectorPolicy.hpp Thu Jan 28 17:38:52 2016 +0100 @@ -269,7 +269,6 @@ uint eden_cset_region_length() const { return _eden_cset_region_length; } uint survivor_cset_region_length() const { return _survivor_cset_region_length; } - uint old_cset_region_length() const { return _old_cset_region_length; } uint _free_regions_at_end_of_collection; @@ -370,6 +369,7 @@ old_cset_region_length(); } uint young_cset_region_length() const { return eden_cset_region_length() + survivor_cset_region_length(); } + uint old_cset_region_length() const { return _old_cset_region_length; } double predict_survivor_regions_evac_time() const; diff -r 9c3642cc96c2 src/share/vm/gc/g1/g1GCPhaseTimes.cpp --- a/src/share/vm/gc/g1/g1GCPhaseTimes.cpp Thu Jan 28 10:18:45 2016 +0100 +++ b/src/share/vm/gc/g1/g1GCPhaseTimes.cpp Thu Jan 28 17:38:52 2016 +0100 @@ -128,6 +128,9 @@ _gc_par_phases[RedirtyCards] = new WorkerDataArray(max_gc_threads, "Parallel Redirty", true, 3); _redirtied_cards = new WorkerDataArray(max_gc_threads, "Redirtied Cards:", true, 3); _gc_par_phases[RedirtyCards]->link_thread_work_items(_redirtied_cards); + + _gc_par_phases[YoungFreeCSet] = new WorkerDataArray(max_gc_threads, "Young Free CSet", true, 3); + _gc_par_phases[NonYoungFreeCSet] = new WorkerDataArray(max_gc_threads, "NonYoung Free CSet", true, 3); } void G1GCPhaseTimes::note_gc_start(uint active_gc_threads) { @@ -401,11 +404,17 @@ print_stats(Indents[2], "Humongous Reclaim", _cur_fast_reclaim_humongous_time_ms); log_trace(gc, phases)("%sHumongous Reclaimed: " SIZE_FORMAT, Indents[3], _cur_fast_reclaim_humongous_reclaimed); } + print_stats(Indents[2], "Free CSet", _recorded_total_free_cset_time_ms); + print_stats(Indents[3], "Free CSet Serial", _recorded_serial_free_cset_time_ms); + par_phase_printer.print(YoungFreeCSet); + par_phase_printer.print(NonYoungFreeCSet); +/* print_stats(Indents[2], "Free CSet", (_recorded_young_free_cset_time_ms + _recorded_non_young_free_cset_time_ms)); log_trace(gc, phases)("%sYoung Free CSet: %.1lf ms", Indents[3], _recorded_young_free_cset_time_ms); log_trace(gc, phases)("%sNon-Young Free CSet: %.1lf ms", Indents[3], _recorded_non_young_free_cset_time_ms); +*/ if (_cur_verify_after_time_ms > 0.0) { print_stats(Indents[2], "Verify After", _cur_verify_after_time_ms); } diff -r 9c3642cc96c2 src/share/vm/gc/g1/g1GCPhaseTimes.hpp --- a/src/share/vm/gc/g1/g1GCPhaseTimes.hpp Thu Jan 28 10:18:45 2016 +0100 +++ b/src/share/vm/gc/g1/g1GCPhaseTimes.hpp Thu Jan 28 17:38:52 2016 +0100 @@ -67,6 +67,8 @@ StringDedupQueueFixup, StringDedupTableFixup, RedirtyCards, + YoungFreeCSet, + NonYoungFreeCSet, GCParPhasesSentinel }; @@ -106,8 +108,9 @@ double _recorded_redirty_logged_cards_time_ms; - double _recorded_young_free_cset_time_ms; - double _recorded_non_young_free_cset_time_ms; + double _recorded_total_free_cset_time_ms; + + double _recorded_serial_free_cset_time_ms; double _cur_fast_reclaim_humongous_time_ms; double _cur_fast_reclaim_humongous_register_time_ms; @@ -201,12 +204,12 @@ _root_region_scan_wait_time_ms = time_ms; } - void record_young_free_cset_time_ms(double time_ms) { - _recorded_young_free_cset_time_ms = time_ms; + void record_total_free_cset_time_ms(double time_ms) { + _recorded_total_free_cset_time_ms = time_ms; } - void record_non_young_free_cset_time_ms(double time_ms) { - _recorded_non_young_free_cset_time_ms = time_ms; + void record_serial_free_cset_time_ms(double time_ms) { + _recorded_serial_free_cset_time_ms = time_ms; } void record_fast_reclaim_humongous_stats(double time_ms, size_t total, size_t candidates) { @@ -274,18 +277,14 @@ return _recorded_young_cset_choice_time_ms; } - double young_free_cset_time_ms() { - return _recorded_young_free_cset_time_ms; + double total_free_cset_time_ms() { + return _recorded_total_free_cset_time_ms; } double non_young_cset_choice_time_ms() { return _recorded_non_young_cset_choice_time_ms; } - double non_young_free_cset_time_ms() { - return _recorded_non_young_free_cset_time_ms; - } - double fast_reclaim_humongous_time_ms() { return _cur_fast_reclaim_humongous_time_ms; } diff -r 9c3642cc96c2 src/share/vm/gc/g1/g1_globals.hpp --- a/src/share/vm/gc/g1/g1_globals.hpp Thu Jan 28 10:18:45 2016 +0100 +++ b/src/share/vm/gc/g1/g1_globals.hpp Thu Jan 28 17:38:52 2016 +0100 @@ -45,6 +45,8 @@ "of the optimal occupancy to start marking.") \ range(1, max_intx) \ \ + product(bool, G1UseParallelCSetFreeing, false, "") \ + product(uint, G1ParallelCSetThreads, 0, "") \ product(uintx, G1ConfidencePercent, 50, \ "Confidence level for MMU/pause predictions") \ range(0, 100) \ diff -r 9c3642cc96c2 src/share/vm/gc/g1/heapRegion.cpp --- a/src/share/vm/gc/g1/heapRegion.cpp Thu Jan 28 10:18:45 2016 +0100 +++ b/src/share/vm/gc/g1/heapRegion.cpp Thu Jan 28 17:38:52 2016 +0100 @@ -160,7 +160,7 @@ init_top_at_mark_start(); } -void HeapRegion::hr_clear(bool par, bool clear_space, bool locked) { +void HeapRegion::hr_clear(bool keep_remset, bool clear_space, bool locked) { assert(_humongous_start_region == NULL, "we should have already filtered out humongous regions"); assert(!in_collection_set(), @@ -172,7 +172,7 @@ set_free(); reset_pre_dummy_top(); - if (!par) { + if (!keep_remset) { // If this is parallel, this will be done later. HeapRegionRemSet* hrrs = rem_set(); if (locked) { diff -r 9c3642cc96c2 src/share/vm/gc/g1/heapRegionSet.hpp --- a/src/share/vm/gc/g1/heapRegionSet.hpp Thu Jan 28 10:18:45 2016 +0100 +++ b/src/share/vm/gc/g1/heapRegionSet.hpp Thu Jan 28 17:38:52 2016 +0100 @@ -190,7 +190,10 @@ HeapRegionSetBase(name, false /* humongous */, true /* empty */, mt_safety_checker) { clear(); } - + FreeRegionList(const char* name, bool humongous, bool empty, HRSMtSafeChecker* mt_safety_checker = NULL): + HeapRegionSetBase(name, humongous, empty, mt_safety_checker) { + clear(); + } void verify_list(); #ifdef ASSERT