# HG changeset patch
# Parent 1ae81f0b3b535427d68e14e317359556bce785e1
# User tschatzl

diff -r 1ae81f0b3b53 src/share/vm/gc/g1/g1ConcurrentMark.cpp
--- a/src/share/vm/gc/g1/g1ConcurrentMark.cpp	Thu Aug 04 17:08:30 2016 +0200
+++ b/src/share/vm/gc/g1/g1ConcurrentMark.cpp	Thu Aug 04 17:14:41 2016 +0200
@@ -133,10 +133,9 @@
 }
 
 G1CMMarkStack::G1CMMarkStack() :
-  _max_capacity(0),
-  _reserved_space(),
+  _max_chunk_capacity(0),
   _base(NULL),
-  _capacity(0),
+  _chunk_capacity(0),
   _out_of_memory(false),
   _should_expand(false) {
   set_empty();
@@ -144,40 +143,22 @@
 
 bool G1CMMarkStack::resize(size_t new_capacity) {
   assert(is_empty(), "Only resize when stack is empty.");
-  assert(new_capacity <= _max_capacity,
-         "Trying to resize stack to " SIZE_FORMAT " elements when the maximum is " SIZE_FORMAT, new_capacity, _max_capacity);
-
-  assert(new_capacity % capacity_alignment() == 0,
-         "New capacity " SIZE_FORMAT " should be evenly divisible by the requested alignment of " SIZE_FORMAT, new_capacity, capacity_alignment());
-
-  size_t reservation_size = ReservedSpace::allocation_align_size_up(new_capacity * sizeof(void*));
-
-  ReservedSpace rs(reservation_size);
-  if (!rs.is_reserved()) {
-    log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " elements and size " SIZE_FORMAT "B.", new_capacity, reservation_size);
+  assert(new_capacity <= _max_chunk_capacity,
+         "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
+
+  OopChunk* new_base = MmapArrayAllocator<OopChunk, mtGC>::allocate_or_null(new_capacity);
+
+  if (new_base == NULL) {
+    log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(OopChunk));
     return false;
   }
-
-  VirtualSpace vs;
-
-  if (!vs.initialize(rs, rs.size())) {
-    rs.release();
-    log_warning(gc)("Failed to commit memory for new overflow mark stack of size " SIZE_FORMAT "B.", rs.size());
-    return false;
+  // Release old mapping.
+  if (_base != NULL) {
+    MmapArrayAllocator<OopChunk, mtGC>::free(_base, _chunk_capacity);
   }
 
-  assert(vs.committed_size() == rs.size(), "Failed to commit all of the mark stack.");
-
-  // Release old mapping.
-  _reserved_space.release();
-
-  // Save new mapping for future unmapping.
-  _reserved_space = rs;
-
-  MemTracker::record_virtual_memory_type((address)_reserved_space.base(), mtGC);
-
-  _base = (void**) vs.low();
-  _capacity = new_capacity;
+  _base = new_base;
+  _chunk_capacity = new_capacity;
   set_empty();
   _should_expand = false;
 
@@ -189,47 +170,48 @@
 }
 
 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
-  guarantee(_max_capacity == 0, "G1CMMarkStack already initialized.");
-
-  _max_capacity = (size_t)align_size_up(max_capacity, capacity_alignment());
-  size_t aligned_initial_size = (size_t)align_size_up(initial_capacity, capacity_alignment());
-
-  guarantee(aligned_initial_size <= _max_capacity,
-            "Maximum capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
-            _max_capacity,
-            aligned_initial_size);
-
-  log_debug(gc)("Initialize global mark stack with " SIZE_FORMAT " elements, maximum " SIZE_FORMAT,
-                aligned_initial_size, _max_capacity);
-
-  return resize(aligned_initial_size);
+  guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
+
+  size_t const OopChunkSizeInVoidStar = sizeof(OopChunk) / sizeof(void*);
+  
+  _max_chunk_capacity = (size_t)align_size_up(max_capacity, capacity_alignment()) / OopChunkSizeInVoidStar;
+  size_t initial_chunk_capacity = (size_t)align_size_up(initial_capacity, capacity_alignment()) / OopChunkSizeInVoidStar;
+
+  guarantee(initial_chunk_capacity <= _max_chunk_capacity,
+            "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
+            _max_chunk_capacity,
+            initial_chunk_capacity);
+
+  log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
+                initial_chunk_capacity, _max_chunk_capacity);
+
+  return resize(initial_chunk_capacity);
 }
 
 void G1CMMarkStack::expand() {
   // Clear expansion flag
   _should_expand = false;
 
-  if (_capacity == _max_capacity) {
-    log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " elements.", _capacity);
+  if (_chunk_capacity == _max_chunk_capacity) {
+    log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
     return;
   }
-  size_t old_capacity = _capacity;
+  size_t old_capacity = _chunk_capacity;
   // Double capacity if possible
-  size_t new_capacity = MIN2(old_capacity * 2, _max_capacity);
+  size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
 
   if (resize(new_capacity)) {
-    log_debug(gc)("Expanded marking stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " elements",
+    log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
                   old_capacity, new_capacity);
   } else {
-    log_warning(gc)("Failed to expand marking stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " elements",
+    log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
                     old_capacity, new_capacity);
   }
 }
 
 G1CMMarkStack::~G1CMMarkStack() {
   if (_base != NULL) {
-    _base = NULL;
-    _reserved_space.release();
+    MmapArrayAllocator<OopChunk, mtGC>::free(_base, _chunk_capacity);
   }
 }
 
@@ -260,24 +242,18 @@
 }
 
 G1CMMarkStack::OopChunk* G1CMMarkStack::allocate_new_chunk() {
-  size_t cur_idx = _hwm;
-
-  OopChunk* result = NULL;
-  
-  while (cur_idx < _capacity) {
-    size_t next_idx = cur_idx + sizeof(OopChunk);
-    size_t old_idx = (size_t)Atomic::cmpxchg_ptr((intptr_t)next_idx, (intptr_t*)&_hwm, cur_idx);
-    if (old_idx == cur_idx) {
-      // success. Allocated at cur_idx
-      result = (OopChunk*)&_base[cur_idx];
-      result->next = NULL;
-      break;
-    } else {
-      cur_idx = old_idx;
-    }
+  // This dirty read is okay because we only ever increase the _hwm in parallel code.
+  if (_hwm >= _chunk_capacity) {
+    return NULL;
   }
-  guarantee(cur_idx < _capacity || result == NULL, "Should not be able to allocate chunk if out of space.");
-
+
+  size_t cur_idx = Atomic::add(1, &_hwm) - 1;
+  if (cur_idx >= _chunk_capacity) {
+    return NULL;
+  }
+
+  OopChunk* result = ::new (&_base[cur_idx]) OopChunk;
+  result->next = NULL;
   return result;
 }
 
@@ -297,7 +273,7 @@
 
   for (size_t i = 0; i < OopsPerChunk; i++) {
     new_chunk->data[i] = ptr_arr[i];
-  }
+}
 
   add_chunk_to_list(&_chunk_list, new_chunk);
   Atomic::inc(&_chunks_in_chunk_list);
@@ -2433,13 +2409,13 @@
   decrease_limits();
 }
 
-void G1CMTask::get_entries_from_global_stack() {
+bool G1CMTask::get_entries_from_global_stack() {
   // Local array where we'll store the entries that will be popped
   // from the global stack.
   oop buffer[G1CMMarkStack::OopsPerChunk];
 
   if (!_cm->mark_stack_pop(buffer)) {
-    return;
+    return false;
   }
 
   // We did actually pop at least one entry.
@@ -2456,6 +2432,7 @@
 
   // This operation was quite expensive, so decrease the limits
   decrease_limits();
+  return true;
 }
 
 void G1CMTask::drain_local_queue(bool partially) {
@@ -2499,20 +2476,21 @@
 
   // Decide what the target size is, depending whether we're going to
   // drain it partially (so that other tasks can steal if they run out
-  // of things to do) or totally (at the very end).  Notice that,
-  // because we move entries from the global stack in chunks or
-  // because another task might be doing the same, we might in fact
-  // drop below the target. But, this is not a problem.
-  size_t target_size;
+  // of things to do) or totally (at the very end).
+  // Notice that when draining the global mark stack partially, due to the racyness
+  // of the mark stack size update we might in fact drop below the target. But,
+  // this is not a problem.
+  // In case of total draining, we simply process until the global mark stack is
+  // totally empty, disregarding the size counter.
   if (partially) {
-    target_size = _cm->partial_mark_stack_size_target();
+    size_t const target_size = _cm->partial_mark_stack_size_target();
+    while (!has_aborted() && _cm->mark_stack_size() > target_size) {
+      if (get_entries_from_global_stack()) {
+        drain_local_queue(partially);
+      }
+    }
   } else {
-    target_size = 0;
-  }
-
-  if (_cm->mark_stack_size() > target_size) {
-    while (!has_aborted() && _cm->mark_stack_size() > target_size) {
-      get_entries_from_global_stack();
+    while (!has_aborted() && get_entries_from_global_stack()) {
       drain_local_queue(partially);
     }
   }
diff -r 1ae81f0b3b53 src/share/vm/gc/g1/g1ConcurrentMark.hpp
--- a/src/share/vm/gc/g1/g1ConcurrentMark.hpp	Thu Aug 04 17:08:30 2016 +0200
+++ b/src/share/vm/gc/g1/g1ConcurrentMark.hpp	Thu Aug 04 17:14:41 2016 +0200
@@ -172,13 +172,10 @@
     oop data[OopsPerChunk];
   };
 
-  ReservedSpace _reserved_space; // Space currently reserved for the mark stack.
+  size_t _max_chunk_capacity;    // Maximum number of OopChunk elements on the stack.
 
-  size_t _max_capacity;          // Maximum number of elements.
-
-  void** _base;                  // Bottom address of allocated memory area.
-  size_t _capacity;              // Current maximum number of elements.
-  size_t _index;                 // One more than last occupied index.
+  OopChunk* _base;               // Bottom address of allocated memory area.
+  size_t _chunk_capacity;        // Current maximum number of OopChunk elements.
 
   char _pad0[DEFAULT_CACHE_LINE_SIZE];
   OopChunk* volatile _free_list;  // Linked list of free chunks that can be allocated by users.
@@ -212,10 +209,10 @@
   G1CMMarkStack();
   ~G1CMMarkStack();
 
-  // Alignment and minimum capacity of this mark stack in elements.
+  // Alignment and minimum capacity of this mark stack in number of oops.
   static size_t capacity_alignment();
 
-  // Allocate and initialize the mark stack with the given number of elements.
+  // Allocate and initialize the mark stack with the given number of oops.
   bool initialize(size_t initial_capacity, size_t max_capacity);
 
   // Pushes the given buffer containing at most OopsPerChunk elements on the mark
@@ -230,7 +227,7 @@
 
   bool is_empty() const { return _chunk_list == NULL && _chunks_in_chunk_list == 0; }
 
-  size_t capacity() const  { return _capacity; }
+  size_t capacity() const  { return _chunk_capacity; }
 
   bool is_out_of_memory() const { return _out_of_memory; }
   void clear_out_of_memory() { _out_of_memory = false; }
@@ -241,13 +238,15 @@
   // Expand the stack, typically in response to an overflow condition
   void expand();
 
+  // Return the approximate number of oops on this mark stack. Racy due to
+  // unsynchronized access to _chunks_in_chunk_list.
   size_t size() const { return _chunks_in_chunk_list * OopsPerChunk; }
  
   void set_empty();
 
   // Apply Fn to every oop on the mark stack. The mark stack must not
   // be modified while iterating.
-  template<typename Fn> void iterate(Fn fn);
+  template<typename Fn> void iterate(Fn fn) PRODUCT_RETURN;
 };
 
 // Root Regions are regions that are not empty at the beginning of a
@@ -893,9 +892,10 @@
   // It pushes an object on the local queue.
   inline void push(oop obj);
 
-  // These two move entries to/from the global stack.
+  // Move entries to the global stack.
   void move_entries_to_global_stack();
-  void get_entries_from_global_stack();
+  // Move entries from the global stack, return true if we were successful to do so.
+  bool get_entries_from_global_stack();
 
   // It pops and scans objects from the local queue. If partially is
   // true, then it stops when the queue size is of a given limit. If
diff -r 1ae81f0b3b53 src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp
--- a/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp	Thu Aug 04 17:08:30 2016 +0200
+++ b/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp	Thu Aug 04 17:14:41 2016 +0200
@@ -89,6 +89,7 @@
 
 #undef check_mark
 
+#ifndef PRODUCT
 template<typename Fn>
 inline void G1CMMarkStack::iterate(Fn fn) {
   size_t num_chunks = 0;
@@ -107,6 +108,7 @@
     num_chunks++;
   }
 }
+#endif
 
 // It scans an object and visits its children.
 inline void G1CMTask::scan_object(oop obj) { process_grey_object<true>(obj); }
diff -r 1ae81f0b3b53 src/share/vm/memory/allocation.hpp
--- a/src/share/vm/memory/allocation.hpp	Thu Aug 04 17:08:30 2016 +0200
+++ b/src/share/vm/memory/allocation.hpp	Thu Aug 04 17:14:41 2016 +0200
@@ -738,6 +738,7 @@
   static size_t size_for(size_t length);
 
  public:
+  static E* allocate_or_null(size_t length);
   static E* allocate(size_t length);
   static void free(E* addr, size_t length);
 };
diff -r 1ae81f0b3b53 src/share/vm/memory/allocation.inline.hpp
--- a/src/share/vm/memory/allocation.inline.hpp	Thu Aug 04 17:08:30 2016 +0200
+++ b/src/share/vm/memory/allocation.inline.hpp	Thu Aug 04 17:14:41 2016 +0200
@@ -153,6 +153,24 @@
 }
 
 template <class E, MEMFLAGS F>
+E* MmapArrayAllocator<E, F>::allocate_or_null(size_t length) {
+  size_t size = size_for(length);
+  int alignment = os::vm_allocation_granularity();
+
+  char* addr = os::reserve_memory(size, NULL, alignment, F);
+  if (addr == NULL) {
+    return NULL;
+  }
+
+  if (os::commit_memory(addr, size, !ExecMem, "Allocator (commit)")) {
+    return (E*)addr;
+  } else {
+    os::release_memory(addr, size);
+    return NULL;
+  }
+}
+
+template <class E, MEMFLAGS F>
 E* MmapArrayAllocator<E, F>::allocate(size_t length) {
   size_t size = size_for(length);
   int alignment = os::vm_allocation_granularity();