< prev index next >

src/hotspot/share/gc/epsilon/epsilonHeap.cpp

Print this page
rev 56085 : Epsilon, Sliding Mark-Compact

@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 2017, 2018, Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2017, 2019, Red Hat, Inc. All rights reserved.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.
  *

@@ -20,19 +20,38 @@
  * questions.
  *
  */
 
 #include "precompiled.hpp"
+#include "classfile/classLoaderDataGraph.hpp"
+#include "classfile/stringTable.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "code/codeCache.hpp"
 #include "gc/epsilon/epsilonHeap.hpp"
 #include "gc/epsilon/epsilonMemoryPool.hpp"
 #include "gc/epsilon/epsilonThreadLocalData.hpp"
 #include "gc/shared/gcArguments.hpp"
-#include "memory/allocation.hpp"
+#include "gc/shared/barrierSet.inline.hpp"
+#include "gc/shared/gcTraceTime.inline.hpp"
+#include "gc/shared/markBitMap.inline.hpp"
+#include "gc/shared/strongRootsScope.hpp"
+#include "gc/shared/preservedMarks.inline.hpp"
+#include "gc/shared/weakProcessor.hpp"
 #include "memory/allocation.inline.hpp"
+#include "memory/iterator.inline.hpp"
 #include "memory/resourceArea.hpp"
 #include "memory/universe.hpp"
+#include "oops/compressedOops.inline.hpp"
+#include "oops/markWord.inline.hpp"
+#include "runtime/biasedLocking.hpp"
 #include "runtime/globals.hpp"
+#include "runtime/objectMonitor.inline.hpp"
+#include "runtime/thread.hpp"
+#include "runtime/vmOperations.hpp"
+#include "runtime/vmThread.hpp"
+#include "utilities/stack.inline.hpp"
+#include "services/management.hpp"
 
 jint EpsilonHeap::initialize() {
   size_t align = HeapAlignment;
   size_t init_byte_size = align_up(InitialHeapSize, align);
   size_t max_byte_size  = align_up(MaxHeapSize, align);

@@ -61,10 +80,23 @@
   _last_heap_print = 0;
 
   // Install barrier set
   BarrierSet::set_barrier_set(new EpsilonBarrierSet());
 
+  size_t bitmap_page_size = UseLargePages ? (size_t)os::large_page_size() : (size_t)os::vm_page_size();
+  size_t _bitmap_size = MarkBitMap::compute_size(heap_rs.size());
+  _bitmap_size = align_up(_bitmap_size, bitmap_page_size);
+
+  // Initialize marking bitmap, but not commit it yet
+  if (EpsilonSlidingGC) {
+    ReservedSpace bitmap(_bitmap_size, bitmap_page_size);
+    MemTracker::record_virtual_memory_type(bitmap.base(), mtGC);
+    _bitmap_region = MemRegion((HeapWord *) bitmap.base(), bitmap.size() / HeapWordSize);
+    MemRegion heap_region = MemRegion((HeapWord *) heap_rs.base(), heap_rs.size() / HeapWordSize);
+    _bitmap.initialize(heap_region, _bitmap_region);
+  }
+
   // All done, print out the configuration
   if (init_byte_size != max_byte_size) {
     log_info(gc)("Resizeable heap; starting at " SIZE_FORMAT "M, max: " SIZE_FORMAT "M, step: " SIZE_FORMAT "M",
                  init_byte_size / M, max_byte_size / M, EpsilonMinHeapExpand / M);
   } else {

@@ -237,11 +269,11 @@
                   ergo_tlab * HeapWordSize / K,
                   size * HeapWordSize / K);
   }
 
   // All prepared, let's do it!
-  HeapWord* res = allocate_work(size);
+  HeapWord* res = allocate_or_collect_work(size);
 
   if (res != NULL) {
     // Allocation successful
     *actual_size = size;
     if (EpsilonElasticTLABDecay) {

@@ -261,11 +293,11 @@
   return res;
 }
 
 HeapWord* EpsilonHeap::mem_allocate(size_t size, bool *gc_overhead_limit_was_exceeded) {
   *gc_overhead_limit_was_exceeded = false;
-  return allocate_work(size);
+  return allocate_or_collect_work(size);
 }
 
 void EpsilonHeap::collect(GCCause::Cause cause) {
   switch (cause) {
     case GCCause::_metadata_GC_threshold:

@@ -278,12 +310,20 @@
       log_info(gc)("GC request for \"%s\" is handled", GCCause::to_string(cause));
       MetaspaceGC::compute_new_size();
       print_metaspace_info();
       break;
     default:
+      if (EpsilonSlidingGC) {
+        if (SafepointSynchronize::is_at_safepoint()) {
+          entry_collect(cause);
+        } else {
+          vmentry_collect(cause);
+        }
+      } else {
       log_info(gc)("GC request for \"%s\" is ignored", GCCause::to_string(cause));
   }
+  }
   _monitoring_support->update_counters();
 }
 
 void EpsilonHeap::do_full_collection(bool clear_all_soft_refs) {
   collect(gc_cause());

@@ -342,5 +382,468 @@
             used * 100.0 / reserved);
   } else {
     log_info(gc, metaspace)("Metaspace: no reliable data");
   }
 }
+
+// ------------------ EXPERIMENTAL MARK-COMPACT -------------------------------
+//
+// This implements a trivial Lisp2-style sliding collector:
+//     https://en.wikipedia.org/wiki/Mark-compact_algorithm#LISP2_algorithm
+//
+// The goal for this implementation is to be as simple as possible, ignoring
+// non-trivial performance optimizations. This collector does not implement
+// reference processing: no soft/weak/phantom/finalizeable references are ever
+// cleared. It also does not implement class unloading and other runtime
+// cleanups.
+//
+
+// VM operation that executes collection cycle under safepoint
+class VM_EpsilonCollect: public VM_Operation {
+private:
+  const GCCause::Cause _cause;
+  EpsilonHeap* const _heap;
+  static size_t _last_used;
+public:
+  VM_EpsilonCollect(GCCause::Cause cause) : VM_Operation(),
+                                            _cause(cause),
+                                            _heap(EpsilonHeap::heap()) {};
+
+  VM_Operation::VMOp_Type type() const { return VMOp_EpsilonCollect; }
+  const char* name()             const { return "Epsilon Collection"; }
+
+  virtual bool doit_prologue() {
+    // Need to take the Heap lock before managing backing storage.
+    // This also naturally serializes GC requests, and allows us to coalesce
+    // back-to-back allocation failure requests from many threads. There is no
+    // need to handle allocation failure that comes without allocations since
+    // last complete GC. Waiting for 1% of heap allocated before starting next
+    // GC seems to resolve most races.
+    Heap_lock->lock();
+    size_t used = _heap->used();
+    size_t capacity = _heap->capacity();
+    size_t allocated = used > _last_used ? used - _last_used : 0;
+    if (_cause != GCCause::_allocation_failure || allocated > capacity / 100) {
+      return true;
+    } else {
+      Heap_lock->unlock();
+      return false;
+    }
+  }
+
+  virtual void doit() {
+    _heap->entry_collect(_cause);
+  }
+
+  virtual void doit_epilogue() {
+    _last_used = _heap->used();
+    Heap_lock->unlock();
+  }
+};
+
+size_t VM_EpsilonCollect::_last_used = 0;
+
+void EpsilonHeap::vmentry_collect(GCCause::Cause cause) {
+  VM_EpsilonCollect vmop(cause);
+  VMThread::execute(&vmop);
+}
+
+HeapWord* EpsilonHeap::allocate_or_collect_work(size_t size) {
+  HeapWord* res = allocate_work(size);
+  if (res == NULL && EpsilonSlidingGC) {
+    vmentry_collect(GCCause::_allocation_failure);
+    res = allocate_work(size);
+  }
+  return res;
+}
+
+typedef Stack<oop, mtGC> EpsilonMarkStack;
+
+void EpsilonHeap::do_roots(OopClosure* cl, bool everything) {
+  // Need to tell runtime we are about to walk the roots with 1 thread
+  StrongRootsScope scope(1);
+
+  // Need to adapt oop closure for some special root types.
+  CLDToOopClosure clds(cl, ClassLoaderData::_claim_none);
+  MarkingCodeBlobClosure blobs(cl, CodeBlobToOopClosure::FixRelocations);
+
+  // Walk all these different parts of runtime roots. Some roots require
+  // holding the lock when walking them.
+  {
+    MutexLocker lock(CodeCache_lock, Mutex::_no_safepoint_check_flag);
+    CodeCache::blobs_do(&blobs);
+  }
+  {
+    MutexLocker lock(ClassLoaderDataGraph_lock);
+    ClassLoaderDataGraph::cld_do(&clds);
+  }
+  Universe::oops_do(cl);
+  Management::oops_do(cl);
+  JvmtiExport::oops_do(cl);
+  JNIHandles::oops_do(cl);
+  WeakProcessor::oops_do(cl);
+  ObjectSynchronizer::oops_do(cl);
+  SystemDictionary::oops_do(cl);
+  Threads::possibly_parallel_oops_do(false, cl, &blobs);
+
+  // This is implicitly handled by other roots, and we only want to
+  // touch these during verification.
+  if (everything) {
+    StringTable::oops_do(cl);
+  }
+}
+
+// Walk the marking bitmap and call object closure on every marked object.
+// This is much faster that walking a (very sparse) parsable heap, but it
+// takes up to 1/64-th of heap size for the bitmap.
+void EpsilonHeap::walk_bitmap(ObjectClosure* cl) {
+   HeapWord* limit = _space->top();
+   HeapWord* addr = _bitmap.get_next_marked_addr(_space->bottom(), limit);
+   while (addr < limit) {
+     oop obj = oop(addr);
+     assert(_bitmap.is_marked(obj), "sanity");
+     cl->do_object(obj);
+     addr += 1;
+     if (addr < limit) {
+       addr = _bitmap.get_next_marked_addr(addr, limit);
+     }
+   }
+}
+
+class EpsilonScanOopClosure : public BasicOopIterateClosure {
+private:
+  EpsilonMarkStack* const _stack;
+  MarkBitMap* const _bitmap;
+
+  template <class T>
+  void do_oop_work(T* p) {
+    // p is the pointer to memory location where oop is, load the value
+    // from it, unpack the compressed reference, if needed:
+    T o = RawAccess<>::oop_load(p);
+    if (!CompressedOops::is_null(o)) {
+      oop obj = CompressedOops::decode_not_null(o);
+
+      // Object is discovered. See if it is marked already. If not,
+      // mark and push it on mark stack for further traversal. Non-atomic
+      // check and set would do, as this closure is called by single thread.
+      if (!_bitmap->is_marked(obj)) {
+        _bitmap->mark((HeapWord*)obj);
+        _stack->push(obj);
+      }
+    }
+  }
+
+public:
+  EpsilonScanOopClosure(EpsilonMarkStack* stack, MarkBitMap* bitmap) :
+                        _stack(stack), _bitmap(bitmap) {}
+  virtual void do_oop(oop* p)       { do_oop_work(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+};
+
+class EpsilonCalcNewLocationObjectClosure : public ObjectClosure {
+private:
+  HeapWord* _compact_point;
+  PreservedMarks* const _preserved_marks;
+
+public:
+  EpsilonCalcNewLocationObjectClosure(HeapWord* start, PreservedMarks* pm) :
+                                      _compact_point(start),
+                                      _preserved_marks(pm) {}
+
+  void do_object(oop obj) {
+    // Record the new location of the object: it is current compaction point.
+    // If object stays at the same location (which is true for objects in
+    // dense prefix, that we would normally get), do not bother recording the
+    // move, letting downstream code ignore it.
+    if ((HeapWord*)obj != _compact_point) {
+      markWord mark = obj->mark_raw();
+      if (mark.must_be_preserved(obj->klass())) {
+        _preserved_marks->push(obj, mark);
+      }
+      obj->forward_to(oop(_compact_point));
+    }
+    _compact_point += obj->size();
+  }
+
+  HeapWord* compact_point() {
+    return _compact_point;
+  }
+};
+
+class EpsilonAdjustPointersOopClosure : public BasicOopIterateClosure {
+private:
+  template <class T>
+  void do_oop_work(T* p) {
+    // p is the pointer to memory location where oop is, load the value
+    // from it, unpack the compressed reference, if needed:
+    T o = RawAccess<>::oop_load(p);
+    if (!CompressedOops::is_null(o)) {
+      oop obj = CompressedOops::decode_not_null(o);
+
+      // Rewrite the current pointer to the object with its forwardee.
+      // Skip the write if update is not needed.
+      if (obj->is_forwarded()) {
+        oop fwd = obj->forwardee();
+        assert(fwd != NULL, "just checking");
+        RawAccess<>::oop_store(p, fwd);
+      }
+    }
+  }
+
+public:
+  virtual void do_oop(oop* p)       { do_oop_work(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+};
+
+class EpsilonAdjustPointersObjectClosure : public ObjectClosure {
+private:
+  EpsilonAdjustPointersOopClosure _cl;
+public:
+  void do_object(oop obj) {
+    // Apply the updates to all references reachable from current object:
+    obj->oop_iterate(&_cl);
+  }
+};
+
+class EpsilonMoveObjectsObjectClosure : public ObjectClosure {
+private:
+  size_t _moved;
+public:
+  EpsilonMoveObjectsObjectClosure() : ObjectClosure(), _moved(0) {}
+
+  void do_object(oop obj) {
+    // Copy the object to its new location, if needed. This is final step,
+    // so we have to re-initialize its new mark word, dropping the forwardee
+    // data from it.
+    if (obj->is_forwarded()) {
+      oop fwd = obj->forwardee();
+      assert(fwd != NULL, "just checking");
+      Copy::aligned_conjoint_words((HeapWord*)obj, (HeapWord*)fwd, obj->size());
+      fwd->init_mark_raw();
+      _moved++;
+    }
+  }
+
+  size_t moved() {
+    return _moved;
+  }
+};
+
+class EpsilonVerifyOopClosure : public BasicOopIterateClosure {
+private:
+  EpsilonHeap* const _heap;
+  EpsilonMarkStack* const _stack;
+  MarkBitMap* const _bitmap;
+
+  template <class T>
+  void do_oop_work(T* p) {
+    T o = RawAccess<>::oop_load(p);
+    if (!CompressedOops::is_null(o)) {
+      oop obj = CompressedOops::decode_not_null(o);
+      if (!_bitmap->is_marked(obj)) {
+        _bitmap->mark((HeapWord*)obj);
+
+        guarantee(_heap->is_in(obj),        "Is in heap: "   PTR_FORMAT, p2i(obj));
+        guarantee(oopDesc::is_oop(obj),     "Is an object: " PTR_FORMAT, p2i(obj));
+        guarantee(!obj->mark().is_marked(), "Mark is gone: " PTR_FORMAT, p2i(obj));
+
+        _stack->push(obj);
+      }
+    }
+  }
+
+public:
+  EpsilonVerifyOopClosure(EpsilonMarkStack* stack, MarkBitMap* bitmap) :
+    _heap(EpsilonHeap::heap()), _stack(stack), _bitmap(bitmap) {}
+  virtual void do_oop(oop* p)       { do_oop_work(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+};
+
+void EpsilonHeap::entry_collect(GCCause::Cause cause) {
+  GCIdMark mark;
+  GCTraceTime(Info, gc) time("Lisp2-style Mark-Compact", NULL, cause, true);
+
+  // Some statistics, for fun and profit:
+  size_t stat_reachable_roots = 0;
+  size_t stat_reachable_heap = 0;
+  size_t stat_moved = 0;
+  size_t stat_preserved_marks = 0;
+
+  {
+    GCTraceTime(Info, gc) time("Step 0: Prologue", NULL);
+
+    // Commit marking bitmap memory. There are several upsides of doing this
+    // before the cycle: no memory is taken if GC is not happening, the memory
+    // is "cleared" on first touch, and untouched parts of bitmap are mapped
+    // to zero page, boosting performance on sparse heaps.
+    if (!os::commit_memory((char*)_bitmap_region.start(), _bitmap_region.byte_size(), false)) {
+      log_warning(gc)("Could not commit native memory for marking bitmap, GC failed");
+      return;
+    }
+
+    // We do not need parsable heap for this algorithm to work, but we want
+    // threads to give up their TLABs.
+    ensure_parsability(true);
+
+    // Tell various parts of runtime we are doing GC.
+    BiasedLocking::preserve_marks();
+
+    // Derived pointers would be re-discovered during the mark.
+    // Clear and activate the table for them.
+    DerivedPointerTable::clear();
+  }
+
+  {
+    GCTraceTime(Info, gc) time("Step 1: Mark", NULL);
+
+    // Marking stack and the closure that does most of the work. The closure
+    // would scan the outgoing references, mark them, and push newly-marked
+    // objects to stack for further processing.
+    EpsilonMarkStack stack;
+    EpsilonScanOopClosure cl(&stack, &_bitmap);
+
+    // Seed the marking with roots.
+    process_roots(&cl);
+    stat_reachable_roots = stack.size();
+
+    // Scan the rest of the heap until we run out of objects. Termination is
+    // guaranteed, because all reachable objects would be marked eventually.
+    while (!stack.is_empty()) {
+      oop obj = stack.pop();
+      obj->oop_iterate(&cl);
+      stat_reachable_heap++;
+    }
+
+    // No more derived pointers discovered after marking is done.
+    DerivedPointerTable::set_active(false);
+  }
+
+  // We are going to store forwarding information (where the new copy resides)
+  // in mark words. Some of those mark words need to be carefully preserved.
+  // This is an utility that maintains the list of those special mark words.
+  PreservedMarks preserved_marks;
+
+  // New top of the allocated space.
+  HeapWord* new_top;
+
+  {
+    GCTraceTime(Info, gc) time("Step 2: Calculate new locations", NULL);
+
+    // Walk all alive objects, compute their new addresses and store those
+    // addresses in mark words. Optionally preserve some marks.
+    EpsilonCalcNewLocationObjectClosure cl(_space->bottom(), &preserved_marks);
+    walk_bitmap(&cl);
+
+    // After addresses are calculated, we know the new top for the allocated
+    // space. We cannot set it just yet, because some asserts check that objects
+    // are "in heap" based on current "top".
+    new_top = cl.compact_point();
+
+    stat_preserved_marks = preserved_marks.size();
+  }
+
+  {
+    GCTraceTime(Info, gc) time("Step 3: Adjust pointers", NULL);
+
+    // Walk all alive objects _and their reference fields_, and put "new
+    // addresses" there. We know the new addresses from the forwarding data
+    // in mark words. Take care of the heap objects first.
+    EpsilonAdjustPointersObjectClosure cl;
+    walk_bitmap(&cl);
+
+    // Now do the same, but for all VM roots, which reference the objects on
+    // their own: their references should also be updated.
+    EpsilonAdjustPointersOopClosure cli;
+    process_roots(&cli);
+
+    // Finally, make sure preserved marks know the objects are about to move.
+    preserved_marks.adjust_during_full_gc();
+  }
+
+  {
+    GCTraceTime(Info, gc) time("Step 4: Move objects", NULL);
+
+    // Move all alive objects to their new locations. All the references are
+    // already adjusted at previous step.
+    EpsilonMoveObjectsObjectClosure cl;
+    walk_bitmap(&cl);
+    stat_moved = cl.moved();
+
+    // Now we moved all objects to their relevant locations, we can retract
+    // the "top" of the allocation space to the end of the compacted prefix.
+    _space->set_top(new_top);
+  }
+
+  {
+    GCTraceTime(Info, gc) time("Step 5: Epilogue", NULL);
+
+    // Restore all special mark words.
+    preserved_marks.restore();
+
+    // Tell the rest of runtime we have finished the GC.
+    DerivedPointerTable::update_pointers();
+    BiasedLocking::restore_marks();
+
+    // Verification code walks entire heap and verifies nothing is broken.
+    if (EpsilonVerify) {
+      // The basic implementation turns heap into entirely parsable one with
+      // only alive objects, which mean we could just walked the heap object
+      // by object and verify it. But, it would be inconvenient for verification
+      // to assume heap has only alive objects. Any future change that leaves
+      // at least one dead object with dead outgoing references would fail the
+      // verification. Therefore, it makes more sense to mark through the heap
+      // again, not assuming objects are all alive.
+      EpsilonMarkStack stack;
+      EpsilonVerifyOopClosure cl(&stack, &_bitmap);
+
+      _bitmap.clear();
+
+      // Verify all roots are correct, and that we have the same number of
+      // object reachable from roots.
+      process_all_roots(&cl);
+
+      size_t verified_roots = stack.size();
+      guarantee(verified_roots == stat_reachable_roots,
+                "Verification discovered " SIZE_FORMAT " roots out of " SIZE_FORMAT,
+                verified_roots, stat_reachable_roots);
+
+      // Verify the rest of the heap is correct, and that we have the same
+      // number of objects reachable from heap.
+      size_t verified_heap = 0;
+      while (!stack.is_empty()) {
+        oop obj = stack.pop();
+        obj->oop_iterate(&cl);
+        verified_heap++;
+      }
+
+      guarantee(verified_heap == stat_reachable_heap,
+                "Verification discovered " SIZE_FORMAT " heap objects out of " SIZE_FORMAT,
+                verified_heap, stat_reachable_heap);
+
+      // Ask parts of runtime to verify themselves too
+      Universe::verify(VerifyOption_Default, "");
+    }
+
+    // Marking bitmap is not needed anymore
+    if (!os::uncommit_memory((char*)_bitmap_region.start(), _bitmap_region.byte_size())) {
+      log_warning(gc)("Could not uncommit native memory for marking bitmap");
+    }
+
+    // Return all memory back if so requested. On large heaps, this would
+    // take a while.
+    if (EpsilonUncommit) {
+      _virtual_space.shrink_by((_space->end() - new_top) * HeapWordSize);
+      _space->set_end((HeapWord*)_virtual_space.high());
+    }
+  }
+
+  size_t stat_reachable = stat_reachable_roots + stat_reachable_heap;
+  log_info(gc)("GC Stats: " SIZE_FORMAT " (%.2f%%) reachable from roots, " SIZE_FORMAT " (%.2f%%) reachable from heap, "
+               SIZE_FORMAT " (%.2f%%) moved, " SIZE_FORMAT " (%.2f%%) markwords preserved",
+               stat_reachable_roots, 100.0 * stat_reachable_roots / stat_reachable,
+               stat_reachable_heap,  100.0 * stat_reachable_heap  / stat_reachable,
+               stat_moved,           100.0 * stat_moved           / stat_reachable,
+               stat_preserved_marks, 100.0 * stat_preserved_marks / stat_reachable);
+
+  print_heap_info(used());
+  print_metaspace_info();
+}
< prev index next >