X-Git-Url: https://git.llucax.com/software/dgc/cdgc.git/blobdiff_plain/e74b77edc55c73d1f1656131a645eed30a59727f..3389bc359d65b04c74bb41ef4b640367cb9aa617:/rt/gc/cdgc/gc.d?ds=sidebyside

diff --git a/rt/gc/cdgc/gc.d b/rt/gc/cdgc/gc.d
index 96c6a86..11944d8 100644
--- a/rt/gc/cdgc/gc.d
+++ b/rt/gc/cdgc/gc.d
@@ -51,6 +51,7 @@ import opts = rt.gc.cdgc.opts;
 import cstdlib = tango.stdc.stdlib;
 import cstring = tango.stdc.string;
 import cstdio = tango.stdc.stdio;
+debug(COLLECT_PRINTF) alias cstdio.printf printf;
 
 /*
  * This is a small optimization that proved it's usefulness. For small chunks
@@ -98,6 +99,11 @@ package bool has_pointermap(uint attrs)
     return !opts.options.conservative && !(attrs & BlkAttr.NO_SCAN);
 }
 
+private size_t round_up(size_t n, size_t to)
+{
+    return (n + to - 1) / to;
+}
+
 private
 {
     alias void delegate(Object) DEvent;
@@ -202,6 +208,9 @@ struct GC
     /// Turn off collections if > 0
     int disabled;
 
+    // PID of the fork()ed process doing the mark() (0 if is not running)
+    int mark_proc_pid;
+
     /// min(pool.baseAddr)
     byte *min_addr;
     /// max(pool.topAddr)
@@ -254,10 +263,12 @@ bool Invariant()
 
         for (size_t i = 0; i < B_PAGE; i++) {
             for (List *list = gc.free_list[i]; list; list = list.next) {
-                assert (list.pool !is null);
+                auto pool = list.pool;
+                assert (pool !is null);
                 auto p = cast(byte*) list;
-                assert (p >= list.pool.baseAddr);
-                assert (p < list.pool.topAddr);
+                assert (p >= pool.baseAddr);
+                assert (p < pool.topAddr);
+                assert (pool.freebits.test((p - pool.baseAddr) / 16));
             }
         }
     }
@@ -311,7 +322,8 @@ BlkInfo getInfo(void* p)
     if (info.base is null)
         return BlkInfo.init;
     info.size = pool.findSize(info.base);
-    info.attr = getAttr(pool, cast(size_t)(info.base - pool.baseAddr) / 16u);
+    size_t bit_i = (info.base - pool.baseAddr) / 16;
+    info.attr = getAttr(pool, bit_i);
     if (has_pointermap(info.attr)) {
         info.size -= size_t.sizeof; // PointerMap bitmask
         // Points to the PointerMap bitmask pointer, not user data
@@ -385,7 +397,7 @@ Bins findBin(size_t size)
 size_t reserve(size_t size)
 {
     assert(size != 0);
-    size_t npages = (size + PAGESIZE - 1) / PAGESIZE;
+    size_t npages = round_up(size, PAGESIZE);
     Pool*  pool = newPool(npages);
 
     if (!pool)
@@ -399,6 +411,11 @@ size_t reserve(size_t size)
  */
 void minimize()
 {
+    // Disabled if a parallel collection is in progress because the shared mark
+    // bits of the freed pool might be used by the mark process
+    if (gc.mark_proc_pid != 0)
+        return;
+
     size_t n;
     size_t pn;
     Pool* pool;
@@ -427,87 +444,50 @@ void minimize()
  * Allocate a chunk of memory that is larger than a page.
  * Return null if out of memory.
  */
-void* bigAlloc(size_t size, out Pool* pool)
+void* bigAlloc(size_t npages, out Pool* pool, size_t* pn)
 {
-    size_t npages;
-    size_t n;
-    size_t pn;
-    size_t freedpages;
-    void*  p;
-    int    state;
+    // This code could use some refinement when repeatedly
+    // allocating very large arrays.
 
-    npages = (size + PAGESIZE - 1) / PAGESIZE;
-
-    for (state = 0; ; )
+    void* find_block()
     {
-        // This code could use some refinement when repeatedly
-        // allocating very large arrays.
-
-        for (n = 0; n < gc.pools.length; n++)
+        for (size_t n = 0; n < gc.pools.length; n++)
         {
             pool = gc.pools[n];
-            pn = pool.allocPages(npages);
-            if (pn != OPFAIL)
-                goto L1;
+            *pn = pool.allocPages(npages);
+            if (*pn != OPFAIL)
+                return pool.baseAddr + *pn * PAGESIZE;
         }
+        return null;
+    }
 
-        // Failed
-        switch (state)
-        {
-        case 0:
-            if (gc.disabled)
-            {
-                state = 1;
-                continue;
-            }
-            // Try collecting
-            freedpages = fullcollectshell();
-            if (freedpages >= gc.pools.length * ((POOLSIZE / PAGESIZE) / 4))
-            {
-                state = 1;
-                continue;
-            }
-            // Release empty pools to prevent bloat
-            minimize();
-            // Allocate new pool
-            pool = newPool(npages);
-            if (!pool)
-            {
-                state = 2;
-                continue;
-            }
-            pn = pool.allocPages(npages);
-            assert(pn != OPFAIL);
-            goto L1;
-        case 1:
-            // Release empty pools to prevent bloat
-            minimize();
-            // Allocate new pool
-            pool = newPool(npages);
-            if (!pool)
-                goto Lnomemory;
-            pn = pool.allocPages(npages);
-            assert(pn != OPFAIL);
-            goto L1;
-        case 2:
-            goto Lnomemory;
-        default:
-            assert(false);
-        }
+    void* alloc_more()
+    {
+        // Release empty pools to prevent bloat
+        minimize();
+        // Allocate new pool
+        pool = newPool(npages);
+        if (!pool)
+            return null; // let malloc handle the error
+        *pn = pool.allocPages(npages);
+        assert(*pn != OPFAIL);
+        return pool.baseAddr + *pn * PAGESIZE;
     }
 
-  L1:
-    pool.pagetable[pn] = B_PAGE;
-    if (npages > 1)
-        memset(&pool.pagetable[pn + 1], B_PAGEPLUS, npages - 1);
-    p = pool.baseAddr + pn * PAGESIZE;
-    memset(cast(char *)p + size, 0, npages * PAGESIZE - size);
-    if (opts.options.mem_stomp)
-        memset(p, 0xF1, size);
-    return p;
+    if (void* p = find_block())
+        return p;
+
+    if (gc.disabled)
+        return alloc_more();
+
+    // Try collecting
+    size_t freedpages = fullcollectshell();
+    if (freedpages >= gc.pools.length * ((POOLSIZE / PAGESIZE) / 4)) {
+        if (void* p = find_block())
+            return p;
+    }
 
-  Lnomemory:
-    return null; // let mallocNoSync handle the error
+    return alloc_more();
 }
 
 
@@ -570,12 +550,9 @@ Pool *newPool(size_t npages)
 int allocPage(Bins bin)
 {
     Pool*  pool;
-    size_t n;
     size_t pn;
-    byte*  p;
-    byte*  ptop;
 
-    for (n = 0; n < gc.pools.length; n++)
+    for (size_t n = 0; n < gc.pools.length; n++)
     {
         pool = gc.pools[n];
         pn = pool.allocPages(1);
@@ -591,8 +568,10 @@ int allocPage(Bins bin)
     size_t size = binsize[bin];
     auto list_head = &gc.free_list[bin];
 
-    p = pool.baseAddr + pn * PAGESIZE;
-    ptop = p + PAGESIZE;
+    byte* p = pool.baseAddr + pn * PAGESIZE;
+    byte*  ptop = p + PAGESIZE;
+    size_t bit_i = pn * (PAGESIZE / 16);
+    pool.freebits.set_group(bit_i, PAGESIZE / 16);
     for (; p < ptop; p += size)
     {
         List* l = cast(List *) p;
@@ -781,18 +760,56 @@ size_t fullcollect(void *stackTop)
 {
     debug(COLLECT_PRINTF) printf("Gcx.fullcollect()\n");
 
-    // we always need to stop the world to make threads save the CPU registers
+    // If eager allocation is used, we need to check first if there is a mark
+    // process running. If there isn't, we start a new one (see the next code
+    // block). If there is, we check if it's still running or already finished.
+    // If it's still running, we tell the caller process no memory has been
+    // recovered (it will allocated more to fulfill the current request).  If
+    // the mark process is done, we lunch the sweep phase and hope enough
+    // memory is freed (if that not the case, the caller will allocate more
+    // memory and the next time it's exhausted it will run a new collection).
+    if (opts.options.eager_alloc) {
+        if (gc.mark_proc_pid != 0) { // there is a mark process in progress
+            os.WRes r = os.wait_pid(gc.mark_proc_pid, false); // don't block
+            assert (r != os.WRes.ERROR);
+            switch (r) {
+            case os.WRes.DONE:
+                debug(COLLECT_PRINTF) printf("\t\tmark proc DONE\n");
+                gc.mark_proc_pid = 0;
+                return sweep();
+            case os.WRes.RUNNING:
+                debug(COLLECT_PRINTF) printf("\t\tmark proc RUNNING\n");
+                return 0;
+            case os.WRes.ERROR:
+                debug(COLLECT_PRINTF) printf("\t\tmark proc ERROR\n");
+                disable_fork(); // Try to keep going without forking
+                break;
+            }
+        }
+    }
+
+    // We always need to stop the world to make threads save the CPU registers
     // in the stack and prepare themselves for thread_scanAll()
     thread_suspendAll();
     gc.stats.world_stopped();
 
+    // If forking is enabled, we fork() and start a new mark phase in the
+    // child. The parent process will tell the caller that no memory could be
+    // recycled if eager allocation is used, allowing the mutator to keep going
+    // almost instantly (at the expense of more memory consumption because
+    // a new allocation will be triggered to fulfill the current request). If
+    // no eager allocation is used, the parent will wait for the mark phase to
+    // finish before returning control to the mutator, but other threads are
+    // restarted and may run in parallel with the mark phase (unless they
+    // allocate or use the GC themselves, in which case the global GC lock will
+    // stop them).
     if (opts.options.fork) {
         cstdio.fflush(null); // avoid duplicated FILE* output
         os.pid_t child_pid = os.fork();
         assert (child_pid != -1); // don't accept errors in non-release mode
         switch (child_pid) {
-        case -1: // if fork() fails, fallback to stop-the-world
-            opts.options.fork = false;
+        case -1: // if fork() fails, fall-back to stop-the-world
+            disable_fork();
             break;
         case 0: // child process (i.e. the collectors mark phase)
             mark(stackTop);
@@ -802,15 +819,28 @@ size_t fullcollect(void *stackTop)
             // start the world again and wait for the mark phase to finish
             thread_resumeAll();
             gc.stats.world_started();
-            int status = void;
-            os.pid_t wait_pid = os.waitpid(child_pid, &status, 0);
-            assert (wait_pid == child_pid);
-            return sweep();
+            if (opts.options.eager_alloc) {
+                gc.mark_proc_pid = child_pid;
+                return 0;
+            }
+            os.WRes r = os.wait_pid(child_pid); // block until it finishes
+            assert (r == os.WRes.DONE);
+            debug(COLLECT_PRINTF) printf("\t\tmark proc DONE (block)\n");
+            if (r == os.WRes.DONE)
+                return sweep();
+            debug(COLLECT_PRINTF) printf("\tmark() proc ERROR\n");
+            // If there was some error, try to keep going without forking
+            disable_fork();
+            // Re-suspend the threads to do the marking in this process
+            thread_suspendAll();
+            gc.stats.world_stopped();
         }
 
     }
 
-    // if we reach here, we are using the standard stop-the-world collection
+    // If we reach here, we are using the standard stop-the-world collection,
+    // either because fork was disabled in the first place, or because it was
+    // disabled because of some error.
     mark(stackTop);
     thread_resumeAll();
     gc.stats.world_started();
@@ -827,33 +857,12 @@ void mark(void *stackTop)
     debug(COLLECT_PRINTF) printf("\tmark()\n");
 
     gc.any_changes = false;
-    for (size_t n = 0; n < gc.pools.length; n++)
-    {
-        Pool* pool = gc.pools[n];
-        pool.mark.zero();
-        pool.scan.zero();
-        pool.freebits.zero();
-    }
-
-    // Mark each free entry, so it doesn't get scanned
-    for (size_t n = 0; n < B_PAGE; n++)
-    {
-        for (List *list = gc.free_list[n]; list; list = list.next)
-        {
-            Pool* pool = list.pool;
-            auto ptr = cast(byte*) list;
-            assert (pool);
-            assert (pool.baseAddr <= ptr);
-            assert (ptr < pool.topAddr);
-            size_t bit_i = cast(size_t)(ptr - pool.baseAddr) / 16;
-            pool.freebits.set(bit_i);
-        }
-    }
 
     for (size_t n = 0; n < gc.pools.length; n++)
     {
         Pool* pool = gc.pools[n];
         pool.mark.copy(&pool.freebits);
+        pool.scan.zero();
     }
 
     /// Marks a range of memory in conservative mode.
@@ -1043,7 +1052,8 @@ version(none) // BUG: doesn't work because freebits() must also be cleared
             }
             else if (bin == B_PAGE)
             {
-                size_t bit_i = pn * (PAGESIZE / 16);
+                size_t bit_stride = PAGESIZE / 16;
+                size_t bit_i = pn * bit_stride;
                 if (!pool.mark.test(bit_i))
                 {
                     byte *p = pool.baseAddr + pn * PAGESIZE;
@@ -1057,8 +1067,9 @@ version(none) // BUG: doesn't work because freebits() must also be cleared
                     }
                     clrAttr(pool, bit_i, BlkAttr.ALL_BITS);
 
-                    debug(COLLECT_PRINTF) printf("\tcollecting big %x\n", p);
+                    debug(COLLECT_PRINTF) printf("\tcollecting big %p\n", p);
                     pool.pagetable[pn] = B_FREE;
+                    pool.freebits.set_group(bit_i, PAGESIZE / 16);
                     freedpages++;
                     if (opts.options.mem_stomp)
                         memset(p, 0xF3, PAGESIZE);
@@ -1066,6 +1077,8 @@ version(none) // BUG: doesn't work because freebits() must also be cleared
                     {
                         pn++;
                         pool.pagetable[pn] = B_FREE;
+                        bit_i += bit_stride;
+                        pool.freebits.set_group(bit_i, PAGESIZE / 16);
                         freedpages++;
 
                         if (opts.options.mem_stomp)
@@ -1109,6 +1122,7 @@ version(none) // BUG: doesn't work because freebits() must also be cleared
                         goto Lnotfree;
                 }
                 pool.pagetable[pn] = B_FREE;
+                pool.freebits.set_group(bit_base, PAGESIZE / 16);
                 recoveredpages++;
                 continue;
 
@@ -1208,6 +1222,13 @@ body
 }
 
 
+void disable_fork()
+{
+    // we have to disable both options, as eager_alloc assumes fork is enabled
+    opts.options.fork = false;
+    opts.options.eager_alloc = false;
+}
+
 
 void initialize()
 {
@@ -1217,10 +1238,18 @@ void initialize()
     // If we are going to fork, make sure we have the needed OS support
     if (opts.options.fork)
         opts.options.fork = os.HAVE_SHARED && os.HAVE_FORK;
+    // Eager allocation is only possible when forking
+    if (!opts.options.fork)
+        opts.options.eager_alloc = false;
     gc.lock = GCLock.classinfo;
     gc.inited = 1;
     setStackBottom(rt_stackBottom());
     gc.stats = Stats(gc);
+    if (opts.options.prealloc_npools) {
+        size_t pages = round_up(opts.options.prealloc_psize, PAGESIZE);
+        for (size_t i = 0; i < opts.options.prealloc_npools; ++i)
+            newPool(pages);
+    }
 }
 
 
@@ -1259,6 +1288,7 @@ private void *malloc(size_t size, uint attrs, size_t* pm_bitmask)
     }
 
     Pool* pool = void;
+    size_t bit_i = void;
     size_t capacity = void; // to figure out where to store the bitmask
     if (bin < B_PAGE)
     {
@@ -1301,6 +1331,9 @@ private void *malloc(size_t size, uint attrs, size_t* pm_bitmask)
         assert ((cast(byte*)list) < list.pool.topAddr);
         gc.free_list[bin] = list.next;
         pool = list.pool;
+        bit_i = (p - pool.baseAddr) / 16;
+        assert (pool.freebits.test(bit_i));
+        pool.freebits.clear(bit_i);
         if (!(attrs & BlkAttr.NO_SCAN))
             memset(p + size, 0, capacity - size);
         if (opts.options.mem_stomp)
@@ -1308,13 +1341,24 @@ private void *malloc(size_t size, uint attrs, size_t* pm_bitmask)
     }
     else
     {
-        p = bigAlloc(size, pool);
+        size_t pn;
+        size_t npages = round_up(size, PAGESIZE);
+        p = bigAlloc(npages, pool, &pn);
         if (!p)
             onOutOfMemoryError();
         assert (pool !is null);
-        // Round the size up to the number of pages needed to store it
-        size_t npages = (size + PAGESIZE - 1) / PAGESIZE;
+
         capacity = npages * PAGESIZE;
+        bit_i = pn * (PAGESIZE / 16);
+        pool.freebits.clear(bit_i);
+        pool.pagetable[pn] = B_PAGE;
+        if (npages > 1)
+            memset(&pool.pagetable[pn + 1], B_PAGEPLUS, npages - 1);
+        p = pool.baseAddr + pn * PAGESIZE;
+        memset(cast(char *)p + size, 0, npages * PAGESIZE - size);
+        if (opts.options.mem_stomp)
+            memset(p, 0xF1, size);
+
     }
 
     // Store the bit mask AFTER SENTINEL_POST
@@ -1331,8 +1375,10 @@ private void *malloc(size_t size, uint attrs, size_t* pm_bitmask)
         sentinel_init(p, size);
     }
 
-    if (attrs)
-        setAttr(pool, cast(size_t)(p - pool.baseAddr) / 16, attrs);
+    if (attrs) {
+        setAttr(pool, bit_i, attrs);
+        assert (bin >= B_PAGE || !pool.freebits.test(bit_i));
+    }
 
     return p;
 }
@@ -1417,7 +1463,7 @@ private void *realloc(void *p, size_t size, uint attrs,
             if (blk_size >= PAGESIZE && size >= PAGESIZE)
             {
                 auto psz = blk_size / PAGESIZE;
-                auto newsz = (size + PAGESIZE - 1) / PAGESIZE;
+                auto newsz = round_up(size, PAGESIZE);
                 if (newsz == psz)
                     return p;
 
@@ -1541,8 +1587,8 @@ body
         return 0; // cannot extend buckets
 
     auto psz = blk_size / PAGESIZE;
-    auto minsz = (minsize + PAGESIZE - 1) / PAGESIZE;
-    auto maxsz = (maxsize + PAGESIZE - 1) / PAGESIZE;
+    auto minsz = round_up(minsize, PAGESIZE);
+    auto maxsz = round_up(maxsize, PAGESIZE);
 
     auto pagenum = (p - pool.baseAddr) / PAGESIZE;
 
@@ -1613,6 +1659,7 @@ private void free(void *p)
         // Free pages
         size_t npages = 1;
         size_t n = pagenum;
+        pool.freebits.set_group(bit_i, PAGESIZE / 16);
         while (++n < pool.npages && pool.pagetable[n] == B_PAGEPLUS)
             npages++;
         if (opts.options.mem_stomp)
@@ -1632,6 +1679,7 @@ private void free(void *p)
         list.next = gc.free_list[bin];
         list.pool = pool;
         gc.free_list[bin] = list;
+        pool.freebits.set(bit_i);
     }
 }
 
@@ -1936,10 +1984,17 @@ struct Pool
         if (opts.options.fork)
             vis = os.Vis.SHARED;
         mark.alloc(nbits, vis); // shared between mark and sweep
-        freebits.alloc(nbits, vis); // ditto
+        freebits.alloc(nbits); // not used by the mark phase
         scan.alloc(nbits); // only used in the mark phase
-        finals.alloc(nbits); // mark phase *MUST* have a snapshot
-        noscan.alloc(nbits); // ditto
+        finals.alloc(nbits); // not used by the mark phase
+        noscan.alloc(nbits); // mark phase *MUST* have a snapshot
+
+        // all is free when we start
+        freebits.set_all();
+
+        // avoid accidental sweeping of new pools while using eager allocation
+        if (gc.mark_proc_pid)
+            mark.set_all();
 
         pagetable = cast(ubyte*) cstdlib.malloc(npages);
         if (!pagetable)
@@ -1974,7 +2029,7 @@ struct Pool
         if (opts.options.fork)
             vis = os.Vis.SHARED;
         mark.Dtor(vis);
-        freebits.Dtor(vis);
+        freebits.Dtor();
         scan.Dtor();
         finals.Dtor();
         noscan.Dtor();