# HG changeset patch # Parent ce40f0a487c4ac149d645563a5124dc237725918 # User tschatzl diff -r ce40f0a487c4 -r 5b50e75ca7b1 src/share/vm/gc/g1/g1ConcurrentMark.cpp --- a/src/share/vm/gc/g1/g1ConcurrentMark.cpp Mon Feb 22 12:10:45 2016 +0100 +++ b/src/share/vm/gc/g1/g1ConcurrentMark.cpp Mon Feb 22 17:38:54 2016 +0100 @@ -49,6 +49,7 @@ #include "gc/shared/vmGCOperations.hpp" #include "logging/log.hpp" #include "memory/allocation.hpp" +#include "memory/padded.inline.hpp" #include "memory/resourceArea.hpp" #include "oops/oop.inline.hpp" #include "runtime/atomic.inline.hpp" @@ -419,10 +420,8 @@ _sleep_factor(0.0), _marking_task_overhead(1.0), _cleanup_list("Cleanup List"), - _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), - _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> - CardTableModRefBS::card_shift, - false /* in_resource_area*/), + _region_bm(), + _card_bm(), _prevMarkBitMap(&_markBitMap1), _nextMarkBitMap(&_markBitMap2), @@ -566,25 +565,37 @@ return; } + // Allocate internal bitmaps. Temporarily decrease the allocation limit for the + // ArrayAllocator to put large bitmaps into virtual memory. + // + // This improves startup time by not immediately backing them with memory on + // large heaps. + // The new limit is set so that the loss due to page granularity is <= 1%. + + size_t const new_array_allocator_malloc_limit = MIN2((size_t)os::vm_allocation_granularity() * 100, ArrayAllocatorMallocLimit); + + SizeTFlagSetting fs(ArrayAllocatorMallocLimit, new_array_allocator_malloc_limit); + + _region_bm.resize((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/); + _card_bm.resize(align_size_up(g1h->reserved_region().byte_size(), CardTableModRefBS::card_size) / CardTableModRefBS::card_size, false); + _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC); _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); - _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); + _count_marked_bytes = Padded2DArray::create_unfreeable(_max_worker_id, _g1h->max_regions()); BitMap::idx_t card_bm_size = _card_bm.size(); // so that the assertion in MarkingTaskQueue::task_queue doesn't fail _active_tasks = _max_worker_id; - uint max_regions = _g1h->max_regions(); for (uint i = 0; i < _max_worker_id; ++i) { G1CMTaskQueue* task_queue = new G1CMTaskQueue(); task_queue->initialize(); _task_queues->register_queue(i, task_queue); _count_card_bitmaps[i] = BitMap(card_bm_size, false); - _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); _tasks[i] = new G1CMTask(i, this, _count_marked_bytes[i], @@ -601,7 +612,8 @@ CardTableModRefBS::card_shift); // Clear all the liveness counting data - clear_all_count_data(); + // First-time allocation of the bitmaps ensures that they are zeroed out. There + // is no need to do that here. // so that the call below can read a sensible value _heap_start = g1h->reserved_region().start(); @@ -719,6 +731,7 @@ // call already did that. if (cl.complete()) { clear_all_count_data(); + DEBUG_ONLY(verify_all_count_data()); } // Repeat the asserts from above. @@ -2532,29 +2545,81 @@ _g1h->workers()->run_task(&g1_par_agg_task); } +class G1ClearCountDataTask : public AbstractGangTask { + + G1ConcurrentMark* _cm; + BitMap* _card_bm; + BitMap* _region_bm; + size_t _num_workers; + + size_t volatile _cur_task; + public: + G1ClearCountDataTask(G1ConcurrentMark* cm, BitMap* card_bm, BitMap* region_bm, size_t num_structs) : AbstractGangTask("Clear Count"), _cm(cm), _card_bm(card_bm), _region_bm(region_bm), _cur_task(0), _num_workers(num_structs) { } + + virtual void work(uint worker_id) { + while (true) { + size_t task = Atomic::add((size_t)1, &_cur_task) - 1; + if (task >= _num_workers) { + break; + } + if (task == 0) { + // Clear the global card bitmap - it will be filled during + // liveness count aggregation (during remark) and the + // final counting task. + _card_bm->clear(); + + // Clear the global region bitmap - it will be filled as part + // of the final counting task. + _region_bm->clear(); + } + + uint max_regions = G1CollectedHeap::heap()->max_regions(); + + BitMap* task_card_bm = _cm->count_card_bitmap_for((uint)task); + assert(task_card_bm->size() == _card_bm->size(), "size mismatch"); + task_card_bm->clear(); + + size_t* marked_bytes_array = _cm->count_marked_bytes_array_for((uint)task); + assert(marked_bytes_array != NULL, "uninitialized"); + memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); + } + } +}; + // Clear the per-worker arrays used to store the per-region counting data void G1ConcurrentMark::clear_all_count_data() { - // Clear the global card bitmap - it will be filled during - // liveness count aggregation (during remark) and the - // final counting task. - _card_bm.clear(); - - // Clear the global region bitmap - it will be filled as part - // of the final counting task. - _region_bm.clear(); + guarantee(Universe::is_fully_initialized(), "Should not call this during initialization."); + + G1ClearCountDataTask cl(this, &_card_bm, &_region_bm, _max_worker_id); + + if (SafepointSynchronize::is_at_safepoint()) { + assert_at_safepoint(true); // Also check that this method is called by the VM thread. + log_debug(gc)("Clear count data workers %u", _g1h->workers()->active_workers()); + _g1h->workers()->run_task(&cl); + } else { + log_debug(gc)("Clear count data workers %u", _parallel_workers->active_workers()); + _parallel_workers->run_task(&cl); + } +} + +void G1ConcurrentMark::verify_all_count_data() { + assert(_card_bm.count_one_bits() == 0, "Master card bitmap not clear"); + assert(_region_bm.count_one_bits() == 0, "Master region bitmap not clear"); uint max_regions = _g1h->max_regions(); assert(_max_worker_id > 0, "uninitialized"); - for (uint i = 0; i < _max_worker_id; i += 1) { + for (uint i = 0; i < _max_worker_id; i++) { BitMap* task_card_bm = count_card_bitmap_for(i); size_t* marked_bytes_array = count_marked_bytes_array_for(i); assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); assert(marked_bytes_array != NULL, "uninitialized"); - memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); - task_card_bm->clear(); + for (uint j = 0; j < max_regions; j++) { + assert(marked_bytes_array[j] == 0, "Marked bytes array for %u at %u is " SIZE_FORMAT, i, j, marked_bytes_array[j]); + } + assert(task_card_bm->count_one_bits() == 0, "Task card bitmap for %u not clear", i); } } @@ -2569,7 +2634,6 @@ } } -// abandon current marking iteration due to a Full GC void G1ConcurrentMark::abort() { if (!cmThread()->during_cycle() || _has_aborted) { // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. @@ -2586,6 +2650,7 @@ // Clear the liveness counting data clear_all_count_data(); + DEBUG_ONLY(verify_all_count_data()); // Empty mark stack reset_marking_state(); for (uint i = 0; i < _max_worker_id; ++i) { diff -r ce40f0a487c4 -r 5b50e75ca7b1 src/share/vm/gc/g1/g1ConcurrentMark.hpp --- a/src/share/vm/gc/g1/g1ConcurrentMark.hpp Mon Feb 22 12:10:45 2016 +0100 +++ b/src/share/vm/gc/g1/g1ConcurrentMark.hpp Mon Feb 22 17:38:54 2016 +0100 @@ -299,7 +299,11 @@ G1CMBitMapRO* _prevMarkBitMap; // Completed mark bitmap G1CMBitMap* _nextMarkBitMap; // Under-construction mark bitmap + // A set bit indicates whether the given region contains any live object. This + // is the "master" bitmap used for remembered set scrubbing. BitMap _region_bm; + // A set bit indicates that the given card contains a live object. This is the + // "master" bitmap used for remembered set scrubbing. BitMap _card_bm; // Heap bounds @@ -630,7 +634,7 @@ inline bool do_yield_check(uint worker_i = 0); - // Called to abort the marking cycle after a Full GC takes place. + // Abandon current marking iteration due to a Full GC. void abort(); bool has_aborted() { return _has_aborted; } @@ -721,6 +725,9 @@ // counting data. void clear_all_count_data(); + // Verify all of the above data structures that they are in initial state. + void verify_all_count_data(); + // Aggregates the counting data for each worker/task // that was constructed while marking. Also sets // the amount of marked bytes for each region and diff -r ce40f0a487c4 -r 5b50e75ca7b1 src/share/vm/memory/allocation.hpp --- a/src/share/vm/memory/allocation.hpp Mon Feb 22 12:10:45 2016 +0100 +++ b/src/share/vm/memory/allocation.hpp Mon Feb 22 17:38:54 2016 +0100 @@ -745,6 +745,9 @@ } } + // Indicates whether the last (re-)allocation used the C-heap malloc or virtual space. + bool use_malloc() const { return _use_malloc; } + E* allocate(size_t length); E* reallocate(size_t new_length); void free(); diff -r ce40f0a487c4 -r 5b50e75ca7b1 src/share/vm/memory/padded.inline.hpp --- a/src/share/vm/memory/padded.inline.hpp Mon Feb 22 12:10:45 2016 +0100 +++ b/src/share/vm/memory/padded.inline.hpp Mon Feb 22 17:38:54 2016 +0100 @@ -60,10 +60,13 @@ // Total size consists of the indirection table plus the rows. size_t total_size = table_size + rows * row_size + alignment; + ArrayAllocator allocator(false); // Allocate a chunk of memory large enough to allow alignment of the chunk. - void* chunk = AllocateHeap(total_size, flags); - // Clear the allocated memory. - memset(chunk, 0, total_size); + void* chunk = allocator.allocate(total_size); + // Clear the allocated memory if needed. + if (allocator.use_malloc()) { + memset(chunk, 0, total_size); + } // Align the chunk of memory. T** result = (T**)align_ptr_up(chunk, alignment); void* data_start = (void*)((uintptr_t)result + table_size); diff -r ce40f0a487c4 -r 5b50e75ca7b1 src/share/vm/utilities/bitMap.cpp --- a/src/share/vm/utilities/bitMap.cpp Mon Feb 22 12:10:45 2016 +0100 +++ b/src/share/vm/utilities/bitMap.cpp Mon Feb 22 17:38:54 2016 +0100 @@ -57,7 +57,10 @@ _map = _map_allocator.reallocate(new_size_in_words); } - if (new_size_in_words > old_size_in_words) { + // If we used virtual memory directly to allocate the bitmap, we do not need to + // clear the bits ourselves. They will be zeroed out by the OS. + // This also avoids prematurely touching the memory. + if ((in_resource_area || _map_allocator.use_malloc()) && new_size_in_words > old_size_in_words) { clear_range_of_words(old_size_in_words, new_size_in_words); } } diff -r ce40f0a487c4 -r 5b50e75ca7b1 src/share/vm/utilities/bitMap.inline.hpp --- a/src/share/vm/utilities/bitMap.inline.hpp Mon Feb 22 12:10:45 2016 +0100 +++ b/src/share/vm/utilities/bitMap.inline.hpp Mon Feb 22 17:38:54 2016 +0100 @@ -30,7 +30,7 @@ #ifdef ASSERT inline void BitMap::verify_index(idx_t index) const { - assert(index < _size, "BitMap index out of bounds"); + assert(index < _size, "BitMap index " SIZE_FORMAT" out of bounds " SIZE_FORMAT, index, _size); } inline void BitMap::verify_range(idx_t beg_index, idx_t end_index) const {