From 6a40cbf5959226bf1e6ec7bdb6c7e03253072c28 Mon Sep 17 00:00:00 2001 From: Leandro Lucarella Date: Thu, 9 Sep 2010 00:17:16 -0300 Subject: [PATCH] Add eager allocation support when fork()ing Eager allocation consist in allocating a new pool when a collection is triggered (because an allocation failed to find enough free space in the current pools). This enables the mutator to keep running when the mark phase run in a fork()ed process in parallel. This completes the concurrent GC, decreasing the maximum pause time (not only the stop-the-world time) dramatically (almost by 2 orders of magnitude). As a side effect, the total run-time is greatly reduced too because the GC pressure is reduced due to the extra allocated pools. The number of collections needed by a program can be reduced 3 to 9 times depending on the load, which explains the total run-time reduction, even for single-core environments. To allow the mutator run in parallel with the mark phase, the freebits of free pages must be set for *all* the possible block sizes, not only for the start of the page, because the freebits are used as a starting point for mark bits and the bin size of a freed page can be changed *after* the mark phase is started, resulting in an inconsistency between mark bits and free bits. Pools allocated during a parallel mark set all the mark bits to avoid the sweep phase freeing those pages after the mark is done. --- rt/gc/cdgc/bits.d | 17 +++++++ rt/gc/cdgc/gc.d | 117 ++++++++++++++++++++++++++++++++++++---------- rt/gc/cdgc/opts.d | 13 +++++- rt/gc/cdgc/os.d | 35 ++++++++++++-- 4 files changed, 154 insertions(+), 28 deletions(-) diff --git a/rt/gc/cdgc/bits.d b/rt/gc/cdgc/bits.d index 6bfb073..7c06c83 100644 --- a/rt/gc/cdgc/bits.d +++ b/rt/gc/cdgc/bits.d @@ -165,6 +165,23 @@ struct GCBits cstring.memset(data + 1, 0, nwords * uint.sizeof); } + void set_all() + { + cstring.memset(data + 1, 0xff, nwords * uint.sizeof); + } + + void set_group(size_t base, size_t nbits) + in + { + } + body + { + assert ((base % 8) == 0); + assert ((nbits % 8) == 0); + size_t nbytes = nbits / 8; + cstring.memset(data + 1 + (base >> BITS_SHIFT), 0xff, nbytes); + } + void copy(GCBits *f) in { diff --git a/rt/gc/cdgc/gc.d b/rt/gc/cdgc/gc.d index 5f65ff9..0d9e927 100644 --- a/rt/gc/cdgc/gc.d +++ b/rt/gc/cdgc/gc.d @@ -51,6 +51,7 @@ import opts = rt.gc.cdgc.opts; import cstdlib = tango.stdc.stdlib; import cstring = tango.stdc.string; import cstdio = tango.stdc.stdio; +debug(COLLECT_PRINTF) alias cstdio.printf printf; /* * This is a small optimization that proved it's usefulness. For small chunks @@ -202,6 +203,9 @@ struct GC /// Turn off collections if > 0 int disabled; + // PID of the fork()ed process doing the mark() (0 if is not running) + int mark_proc_pid; + /// min(pool.baseAddr) byte *min_addr; /// max(pool.topAddr) @@ -402,6 +406,11 @@ size_t reserve(size_t size) */ void minimize() { + // Disabled if a parallel collection is in progress because the shared mark + // bits of the freed pool might be used by the mark process + if (gc.mark_proc_pid != 0) + return; + size_t n; size_t pn; Pool* pool; @@ -596,15 +605,13 @@ int allocPage(Bins bin) byte* p = pool.baseAddr + pn * PAGESIZE; byte* ptop = p + PAGESIZE; size_t bit_i = pn * (PAGESIZE / 16); - size_t bit_stride = size / 16; - for (; p < ptop; p += size, bit_i += bit_stride) + pool.freebits.set_group(bit_i, PAGESIZE / 16); + for (; p < ptop; p += size) { List* l = cast(List *) p; l.next = *list_head; l.pool = pool; *list_head = l; - // TODO: maybe this can be optimized to be set in chunks - pool.freebits.set(bit_i); } return 1; } @@ -787,18 +794,56 @@ size_t fullcollect(void *stackTop) { debug(COLLECT_PRINTF) printf("Gcx.fullcollect()\n"); - // we always need to stop the world to make threads save the CPU registers + // If eager allocation is used, we need to check first if there is a mark + // process running. If there isn't, we start a new one (see the next code + // block). If there is, we check if it's still running or already finished. + // If it's still running, we tell the caller process no memory has been + // recovered (it will allocated more to fulfill the current request). If + // the mark process is done, we lunch the sweep phase and hope enough + // memory is freed (if that not the case, the caller will allocate more + // memory and the next time it's exhausted it will run a new collection). + if (opts.options.eager_alloc) { + if (gc.mark_proc_pid != 0) { // there is a mark process in progress + os.WRes r = os.wait_pid(gc.mark_proc_pid, false); // don't block + assert (r != os.WRes.ERROR); + switch (r) { + case os.WRes.DONE: + debug(COLLECT_PRINTF) printf("\t\tmark proc DONE\n"); + gc.mark_proc_pid = 0; + return sweep(); + case os.WRes.RUNNING: + debug(COLLECT_PRINTF) printf("\t\tmark proc RUNNING\n"); + return 0; + case os.WRes.ERROR: + debug(COLLECT_PRINTF) printf("\t\tmark proc ERROR\n"); + disable_fork(); // Try to keep going without forking + break; + } + } + } + + // We always need to stop the world to make threads save the CPU registers // in the stack and prepare themselves for thread_scanAll() thread_suspendAll(); gc.stats.world_stopped(); + // If forking is enabled, we fork() and start a new mark phase in the + // child. The parent process will tell the caller that no memory could be + // recycled if eager allocation is used, allowing the mutator to keep going + // almost instantly (at the expense of more memory consumption because + // a new allocation will be triggered to fulfill the current request). If + // no eager allocation is used, the parent will wait for the mark phase to + // finish before returning control to the mutator, but other threads are + // restarted and may run in parallel with the mark phase (unless they + // allocate or use the GC themselves, in which case the global GC lock will + // stop them). if (opts.options.fork) { cstdio.fflush(null); // avoid duplicated FILE* output os.pid_t child_pid = os.fork(); assert (child_pid != -1); // don't accept errors in non-release mode switch (child_pid) { - case -1: // if fork() fails, fallback to stop-the-world - opts.options.fork = false; + case -1: // if fork() fails, fall-back to stop-the-world + disable_fork(); break; case 0: // child process (i.e. the collectors mark phase) mark(stackTop); @@ -808,15 +853,28 @@ size_t fullcollect(void *stackTop) // start the world again and wait for the mark phase to finish thread_resumeAll(); gc.stats.world_started(); - int status = void; - os.pid_t wait_pid = os.waitpid(child_pid, &status, 0); - assert (wait_pid == child_pid); - return sweep(); + if (opts.options.eager_alloc) { + gc.mark_proc_pid = child_pid; + return 0; + } + os.WRes r = os.wait_pid(child_pid); // block until it finishes + assert (r == os.WRes.DONE); + debug(COLLECT_PRINTF) printf("\t\tmark proc DONE (block)\n"); + if (r == os.WRes.DONE) + return sweep(); + debug(COLLECT_PRINTF) printf("\tmark() proc ERROR\n"); + // If there was some error, try to keep going without forking + disable_fork(); + // Re-suspend the threads to do the marking in this process + thread_suspendAll(); + gc.stats.world_stopped(); } } - // if we reach here, we are using the standard stop-the-world collection + // If we reach here, we are using the standard stop-the-world collection, + // either because fork was disabled in the first place, or because it was + // disabled because of some error. mark(stackTop); thread_resumeAll(); gc.stats.world_started(); @@ -1043,9 +1101,9 @@ version(none) // BUG: doesn't work because freebits() must also be cleared } clrAttr(pool, bit_i, BlkAttr.ALL_BITS); - debug(COLLECT_PRINTF) printf("\tcollecting big %x\n", p); + debug(COLLECT_PRINTF) printf("\tcollecting big %p\n", p); pool.pagetable[pn] = B_FREE; - pool.freebits.set(bit_i); + pool.freebits.set_group(bit_i, PAGESIZE / 16); freedpages++; if (opts.options.mem_stomp) memset(p, 0xF3, PAGESIZE); @@ -1054,7 +1112,7 @@ version(none) // BUG: doesn't work because freebits() must also be cleared pn++; pool.pagetable[pn] = B_FREE; bit_i += bit_stride; - pool.freebits.set(bit_i); + pool.freebits.set_group(bit_i, PAGESIZE / 16); freedpages++; if (opts.options.mem_stomp) @@ -1097,10 +1155,8 @@ version(none) // BUG: doesn't work because freebits() must also be cleared if (!pool.freebits.test(bit_i)) goto Lnotfree; } - // we don't need to explicitly set the freebit here because all - // freebits were already set, including the bit used for the - // whole freed page (bit_base). pool.pagetable[pn] = B_FREE; + pool.freebits.set_group(bit_base, PAGESIZE / 16); recoveredpages++; continue; @@ -1200,6 +1256,13 @@ body } +void disable_fork() +{ + // we have to disable both options, as eager_alloc assumes fork is enabled + opts.options.fork = false; + opts.options.eager_alloc = false; +} + void initialize() { @@ -1209,6 +1272,9 @@ void initialize() // If we are going to fork, make sure we have the needed OS support if (opts.options.fork) opts.options.fork = os.HAVE_SHARED && os.HAVE_FORK; + // Eager allocation is only possible when forking + if (!opts.options.fork) + opts.options.eager_alloc = false; gc.lock = GCLock.classinfo; gc.inited = 1; setStackBottom(rt_stackBottom()); @@ -1612,13 +1678,9 @@ private void free(void *p) // Free pages size_t npages = 1; size_t n = pagenum; - pool.freebits.set(bit_i); - size_t bit_stride = PAGESIZE / 16; - while (++n < pool.npages && pool.pagetable[n] == B_PAGEPLUS) { + pool.freebits.set_group(bit_i, PAGESIZE / 16); + while (++n < pool.npages && pool.pagetable[n] == B_PAGEPLUS) npages++; - bit_i += bit_stride; - pool.freebits.set(bit_i); - } if (opts.options.mem_stomp) memset(p, 0xF2, npages * PAGESIZE); pool.freePages(pagenum, npages); @@ -1946,6 +2008,13 @@ struct Pool finals.alloc(nbits); // not used by the mark phase noscan.alloc(nbits); // mark phase *MUST* have a snapshot + // all is free when we start + freebits.set_all(); + + // avoid accidental sweeping of new pools while using eager allocation + if (gc.mark_proc_pid) + mark.set_all(); + pagetable = cast(ubyte*) cstdlib.malloc(npages); if (!pagetable) onOutOfMemoryError(); diff --git a/rt/gc/cdgc/opts.d b/rt/gc/cdgc/opts.d index 042cc01..dc2c8de 100644 --- a/rt/gc/cdgc/opts.d +++ b/rt/gc/cdgc/opts.d @@ -53,6 +53,7 @@ struct Options bool mem_stomp = false; bool conservative = false; bool fork = true; + bool eager_alloc = true; } package Options options; @@ -90,6 +91,8 @@ void process_option(char* opt_name, char* opt_value) options.conservative = parse_bool(opt_value); else if (cstr_eq(opt_name, "no_fork")) options.fork = !parse_bool(opt_value); + else if (cstr_eq(opt_name, "eager_alloc")) + options.eager_alloc = parse_bool(opt_value); } @@ -149,6 +152,7 @@ unittest assert (mem_stomp == false); assert (conservative == false); assert (fork == true); + assert (eager_alloc == true); } parse("mem_stomp"); with (options) { @@ -158,8 +162,9 @@ unittest assert (mem_stomp == true); assert (conservative == false); assert (fork == true); + assert (eager_alloc == true); } - parse("mem_stomp=0:verbose=2:conservative:no_fork=10"); + parse("mem_stomp=0:verbose=2:conservative:no_fork=10:eager_alloc=0"); with (options) { assert (verbose == 2); assert (log_file[0] == '\0'); @@ -167,6 +172,7 @@ unittest assert (mem_stomp == false); assert (conservative == true); assert (fork == false); + assert (eager_alloc == false); } parse("log_file=12345 67890:verbose=1:sentinel=4:mem_stomp=1"); with (options) { @@ -176,6 +182,7 @@ unittest assert (mem_stomp == true); assert (conservative == true); assert (fork == false); + assert (eager_alloc == false); } parse(null); with (options) { @@ -185,6 +192,7 @@ unittest assert (mem_stomp == true); assert (conservative == true); assert (fork == false); + assert (eager_alloc == false); } parse(""); with (options) { @@ -194,6 +202,7 @@ unittest assert (mem_stomp == true); assert (conservative == true); assert (fork == false); + assert (eager_alloc == false); } parse(":"); with (options) { @@ -203,6 +212,7 @@ unittest assert (mem_stomp == true); assert (conservative == true); assert (fork == false); + assert (eager_alloc == false); } parse("::::"); with (options) { @@ -212,6 +222,7 @@ unittest assert (mem_stomp == true); assert (conservative == true); assert (fork == false); + assert (eager_alloc == false); } } diff --git a/rt/gc/cdgc/os.d b/rt/gc/cdgc/os.d index 81939d2..62e247a 100644 --- a/rt/gc/cdgc/os.d +++ b/rt/gc/cdgc/os.d @@ -32,6 +32,16 @@ module rt.gc.cdgc.os; // Public interface/Documentation +/** + * Possible results for the wait_pid() function. + */ +enum WRes +{ + DONE, /// The process has finished successfully + RUNNING, /// The process is still running + ERROR /// There was an error waiting for the process +} + version (D_Ddoc) { /** @@ -42,8 +52,16 @@ version (D_Ddoc) { */ const HAVE_FORK = true; +/** + * Wait for a process with PID pid to finish. + * + * If block is false, this function will not block, and return WRes.RUNNING if + * the process is still running. Otherwise it will return always WRes.DONE + * (unless there is an error, in which case WRes.ERROR is returned). + */ +WRes wait_pid(pid_t pid, bool block = true); + public import tango.stdc.posix.unistd: pid_t, fork; -public import tango.stdc.posix.sys.wait: waitpid; } @@ -51,14 +69,25 @@ public import tango.stdc.posix.sys.wait: waitpid; else version (Posix) { enum { HAVE_FORK = true } public import tango.stdc.posix.unistd: pid_t, fork; - public import tango.stdc.posix.sys.wait: waitpid; + import tango.stdc.posix.sys.wait: waitpid, WNOHANG; + public WRes wait_pid(pid_t pid, bool block = true) { + int status = void; + pid_t waited_pid = waitpid(pid, &status, block ? 0 : WNOHANG); + if (waited_pid == 0) + return WRes.RUNNING; + assert (waited_pid == pid); + assert (status == 0); + if (waited_pid != pid || status != 0) + return WRes.ERROR; + return WRes.DONE; + } } else { enum { HAVE_FORK = false } alias int pid_t; pid_t fork() { assert (false); return -1; } - pid_t waitpid(pid_t, int*, int) { assert (false); return -1; } + WRes wait_pid(pid_t, bool = true) { assert (false); return false; } } -- 2.43.0