import cstdlib = tango.stdc.stdlib;
import cstring = tango.stdc.string;
import cstdio = tango.stdc.stdio;
+debug(COLLECT_PRINTF) alias cstdio.printf printf;
/*
* This is a small optimization that proved it's usefulness. For small chunks
return !opts.options.conservative && !(attrs & BlkAttr.NO_SCAN);
}
+private size_t round_up(size_t n, size_t to)
+{
+ return (n + to - 1) / to;
+}
+
private
{
alias void delegate(Object) DEvent;
/// Turn off collections if > 0
int disabled;
+ // PID of the fork()ed process doing the mark() (0 if is not running)
+ int mark_proc_pid;
+
/// min(pool.baseAddr)
byte *min_addr;
/// max(pool.topAddr)
for (size_t i = 0; i < B_PAGE; i++) {
for (List *list = gc.free_list[i]; list; list = list.next) {
- assert (list.pool !is null);
+ auto pool = list.pool;
+ assert (pool !is null);
auto p = cast(byte*) list;
- assert (p >= list.pool.baseAddr);
- assert (p < list.pool.topAddr);
+ assert (p >= pool.baseAddr);
+ assert (p < pool.topAddr);
+ assert (pool.freebits.test((p - pool.baseAddr) / 16));
}
}
}
if (info.base is null)
return BlkInfo.init;
info.size = pool.findSize(info.base);
- info.attr = getAttr(pool, cast(size_t)(info.base - pool.baseAddr) / 16u);
+ size_t bit_i = (info.base - pool.baseAddr) / 16;
+ info.attr = getAttr(pool, bit_i);
if (has_pointermap(info.attr)) {
info.size -= size_t.sizeof; // PointerMap bitmask
// Points to the PointerMap bitmask pointer, not user data
size_t reserve(size_t size)
{
assert(size != 0);
- size_t npages = (size + PAGESIZE - 1) / PAGESIZE;
+ size_t npages = round_up(size, PAGESIZE);
Pool* pool = newPool(npages);
if (!pool)
*/
void minimize()
{
+ // Disabled if a parallel collection is in progress because the shared mark
+ // bits of the freed pool might be used by the mark process
+ if (gc.mark_proc_pid != 0)
+ return;
+
size_t n;
size_t pn;
Pool* pool;
* Allocate a chunk of memory that is larger than a page.
* Return null if out of memory.
*/
-void* bigAlloc(size_t size, out Pool* pool)
+void* bigAlloc(size_t npages, out Pool* pool, size_t* pn)
{
- size_t npages;
- size_t n;
- size_t pn;
- size_t freedpages;
- void* p;
- int state;
+ // This code could use some refinement when repeatedly
+ // allocating very large arrays.
- npages = (size + PAGESIZE - 1) / PAGESIZE;
-
- for (state = 0; ; )
+ void* find_block()
{
- // This code could use some refinement when repeatedly
- // allocating very large arrays.
-
- for (n = 0; n < gc.pools.length; n++)
+ for (size_t n = 0; n < gc.pools.length; n++)
{
pool = gc.pools[n];
- pn = pool.allocPages(npages);
- if (pn != OPFAIL)
- goto L1;
+ *pn = pool.allocPages(npages);
+ if (*pn != OPFAIL)
+ return pool.baseAddr + *pn * PAGESIZE;
}
+ return null;
+ }
- // Failed
- switch (state)
- {
- case 0:
- if (gc.disabled)
- {
- state = 1;
- continue;
- }
- // Try collecting
- freedpages = fullcollectshell();
- if (freedpages >= gc.pools.length * ((POOLSIZE / PAGESIZE) / 4))
- {
- state = 1;
- continue;
- }
- // Release empty pools to prevent bloat
- minimize();
- // Allocate new pool
- pool = newPool(npages);
- if (!pool)
- {
- state = 2;
- continue;
- }
- pn = pool.allocPages(npages);
- assert(pn != OPFAIL);
- goto L1;
- case 1:
- // Release empty pools to prevent bloat
- minimize();
- // Allocate new pool
- pool = newPool(npages);
- if (!pool)
- goto Lnomemory;
- pn = pool.allocPages(npages);
- assert(pn != OPFAIL);
- goto L1;
- case 2:
- goto Lnomemory;
- default:
- assert(false);
- }
+ void* alloc_more()
+ {
+ // Release empty pools to prevent bloat
+ minimize();
+ // Allocate new pool
+ pool = newPool(npages);
+ if (!pool)
+ return null; // let malloc handle the error
+ *pn = pool.allocPages(npages);
+ assert(*pn != OPFAIL);
+ return pool.baseAddr + *pn * PAGESIZE;
}
- L1:
- pool.pagetable[pn] = B_PAGE;
- if (npages > 1)
- memset(&pool.pagetable[pn + 1], B_PAGEPLUS, npages - 1);
- p = pool.baseAddr + pn * PAGESIZE;
- memset(cast(char *)p + size, 0, npages * PAGESIZE - size);
- if (opts.options.mem_stomp)
- memset(p, 0xF1, size);
- return p;
+ if (void* p = find_block())
+ return p;
+
+ if (gc.disabled)
+ return alloc_more();
+
+ // Try collecting
+ size_t freedpages = fullcollectshell();
+ if (freedpages >= gc.pools.length * ((POOLSIZE / PAGESIZE) / 4)) {
+ if (void* p = find_block())
+ return p;
+ }
- Lnomemory:
- return null; // let mallocNoSync handle the error
+ return alloc_more();
}
int allocPage(Bins bin)
{
Pool* pool;
- size_t n;
size_t pn;
- byte* p;
- byte* ptop;
- for (n = 0; n < gc.pools.length; n++)
+ for (size_t n = 0; n < gc.pools.length; n++)
{
pool = gc.pools[n];
pn = pool.allocPages(1);
size_t size = binsize[bin];
auto list_head = &gc.free_list[bin];
- p = pool.baseAddr + pn * PAGESIZE;
- ptop = p + PAGESIZE;
+ byte* p = pool.baseAddr + pn * PAGESIZE;
+ byte* ptop = p + PAGESIZE;
+ size_t bit_i = pn * (PAGESIZE / 16);
+ pool.freebits.set_group(bit_i, PAGESIZE / 16);
for (; p < ptop; p += size)
{
List* l = cast(List *) p;
{
debug(COLLECT_PRINTF) printf("Gcx.fullcollect()\n");
- // we always need to stop the world to make threads save the CPU registers
+ // If eager allocation is used, we need to check first if there is a mark
+ // process running. If there isn't, we start a new one (see the next code
+ // block). If there is, we check if it's still running or already finished.
+ // If it's still running, we tell the caller process no memory has been
+ // recovered (it will allocated more to fulfill the current request). If
+ // the mark process is done, we lunch the sweep phase and hope enough
+ // memory is freed (if that not the case, the caller will allocate more
+ // memory and the next time it's exhausted it will run a new collection).
+ if (opts.options.eager_alloc) {
+ if (gc.mark_proc_pid != 0) { // there is a mark process in progress
+ os.WRes r = os.wait_pid(gc.mark_proc_pid, false); // don't block
+ assert (r != os.WRes.ERROR);
+ switch (r) {
+ case os.WRes.DONE:
+ debug(COLLECT_PRINTF) printf("\t\tmark proc DONE\n");
+ gc.mark_proc_pid = 0;
+ return sweep();
+ case os.WRes.RUNNING:
+ debug(COLLECT_PRINTF) printf("\t\tmark proc RUNNING\n");
+ return 0;
+ case os.WRes.ERROR:
+ debug(COLLECT_PRINTF) printf("\t\tmark proc ERROR\n");
+ disable_fork(); // Try to keep going without forking
+ break;
+ }
+ }
+ }
+
+ // We always need to stop the world to make threads save the CPU registers
// in the stack and prepare themselves for thread_scanAll()
thread_suspendAll();
gc.stats.world_stopped();
+ // If forking is enabled, we fork() and start a new mark phase in the
+ // child. The parent process will tell the caller that no memory could be
+ // recycled if eager allocation is used, allowing the mutator to keep going
+ // almost instantly (at the expense of more memory consumption because
+ // a new allocation will be triggered to fulfill the current request). If
+ // no eager allocation is used, the parent will wait for the mark phase to
+ // finish before returning control to the mutator, but other threads are
+ // restarted and may run in parallel with the mark phase (unless they
+ // allocate or use the GC themselves, in which case the global GC lock will
+ // stop them).
if (opts.options.fork) {
cstdio.fflush(null); // avoid duplicated FILE* output
os.pid_t child_pid = os.fork();
assert (child_pid != -1); // don't accept errors in non-release mode
switch (child_pid) {
- case -1: // if fork() fails, fallback to stop-the-world
- opts.options.fork = false;
+ case -1: // if fork() fails, fall-back to stop-the-world
+ disable_fork();
break;
case 0: // child process (i.e. the collectors mark phase)
mark(stackTop);
// start the world again and wait for the mark phase to finish
thread_resumeAll();
gc.stats.world_started();
- int status = void;
- os.pid_t wait_pid = os.waitpid(child_pid, &status, 0);
- assert (wait_pid == child_pid);
- return sweep();
+ if (opts.options.eager_alloc) {
+ gc.mark_proc_pid = child_pid;
+ return 0;
+ }
+ os.WRes r = os.wait_pid(child_pid); // block until it finishes
+ assert (r == os.WRes.DONE);
+ debug(COLLECT_PRINTF) printf("\t\tmark proc DONE (block)\n");
+ if (r == os.WRes.DONE)
+ return sweep();
+ debug(COLLECT_PRINTF) printf("\tmark() proc ERROR\n");
+ // If there was some error, try to keep going without forking
+ disable_fork();
+ // Re-suspend the threads to do the marking in this process
+ thread_suspendAll();
+ gc.stats.world_stopped();
}
}
- // if we reach here, we are using the standard stop-the-world collection
+ // If we reach here, we are using the standard stop-the-world collection,
+ // either because fork was disabled in the first place, or because it was
+ // disabled because of some error.
mark(stackTop);
thread_resumeAll();
gc.stats.world_started();
debug(COLLECT_PRINTF) printf("\tmark()\n");
gc.any_changes = false;
- for (size_t n = 0; n < gc.pools.length; n++)
- {
- Pool* pool = gc.pools[n];
- pool.mark.zero();
- pool.scan.zero();
- pool.freebits.zero();
- }
-
- // Mark each free entry, so it doesn't get scanned
- for (size_t n = 0; n < B_PAGE; n++)
- {
- for (List *list = gc.free_list[n]; list; list = list.next)
- {
- Pool* pool = list.pool;
- auto ptr = cast(byte*) list;
- assert (pool);
- assert (pool.baseAddr <= ptr);
- assert (ptr < pool.topAddr);
- size_t bit_i = cast(size_t)(ptr - pool.baseAddr) / 16;
- pool.freebits.set(bit_i);
- }
- }
for (size_t n = 0; n < gc.pools.length; n++)
{
Pool* pool = gc.pools[n];
pool.mark.copy(&pool.freebits);
+ pool.scan.zero();
}
/// Marks a range of memory in conservative mode.
}
else if (bin == B_PAGE)
{
- size_t bit_i = pn * (PAGESIZE / 16);
+ size_t bit_stride = PAGESIZE / 16;
+ size_t bit_i = pn * bit_stride;
if (!pool.mark.test(bit_i))
{
byte *p = pool.baseAddr + pn * PAGESIZE;
}
clrAttr(pool, bit_i, BlkAttr.ALL_BITS);
- debug(COLLECT_PRINTF) printf("\tcollecting big %x\n", p);
+ debug(COLLECT_PRINTF) printf("\tcollecting big %p\n", p);
pool.pagetable[pn] = B_FREE;
+ pool.freebits.set_group(bit_i, PAGESIZE / 16);
freedpages++;
if (opts.options.mem_stomp)
memset(p, 0xF3, PAGESIZE);
{
pn++;
pool.pagetable[pn] = B_FREE;
+ bit_i += bit_stride;
+ pool.freebits.set_group(bit_i, PAGESIZE / 16);
freedpages++;
if (opts.options.mem_stomp)
goto Lnotfree;
}
pool.pagetable[pn] = B_FREE;
+ pool.freebits.set_group(bit_base, PAGESIZE / 16);
recoveredpages++;
continue;
}
+void disable_fork()
+{
+ // we have to disable both options, as eager_alloc assumes fork is enabled
+ opts.options.fork = false;
+ opts.options.eager_alloc = false;
+}
+
void initialize()
{
// If we are going to fork, make sure we have the needed OS support
if (opts.options.fork)
opts.options.fork = os.HAVE_SHARED && os.HAVE_FORK;
+ // Eager allocation is only possible when forking
+ if (!opts.options.fork)
+ opts.options.eager_alloc = false;
gc.lock = GCLock.classinfo;
gc.inited = 1;
setStackBottom(rt_stackBottom());
gc.stats = Stats(gc);
+ if (opts.options.prealloc_npools) {
+ size_t pages = round_up(opts.options.prealloc_psize, PAGESIZE);
+ for (size_t i = 0; i < opts.options.prealloc_npools; ++i)
+ newPool(pages);
+ }
}
}
Pool* pool = void;
+ size_t bit_i = void;
size_t capacity = void; // to figure out where to store the bitmask
if (bin < B_PAGE)
{
assert ((cast(byte*)list) < list.pool.topAddr);
gc.free_list[bin] = list.next;
pool = list.pool;
+ bit_i = (p - pool.baseAddr) / 16;
+ assert (pool.freebits.test(bit_i));
+ pool.freebits.clear(bit_i);
if (!(attrs & BlkAttr.NO_SCAN))
memset(p + size, 0, capacity - size);
if (opts.options.mem_stomp)
}
else
{
- p = bigAlloc(size, pool);
+ size_t pn;
+ size_t npages = round_up(size, PAGESIZE);
+ p = bigAlloc(npages, pool, &pn);
if (!p)
onOutOfMemoryError();
assert (pool !is null);
- // Round the size up to the number of pages needed to store it
- size_t npages = (size + PAGESIZE - 1) / PAGESIZE;
+
capacity = npages * PAGESIZE;
+ bit_i = pn * (PAGESIZE / 16);
+ pool.freebits.clear(bit_i);
+ pool.pagetable[pn] = B_PAGE;
+ if (npages > 1)
+ memset(&pool.pagetable[pn + 1], B_PAGEPLUS, npages - 1);
+ p = pool.baseAddr + pn * PAGESIZE;
+ memset(cast(char *)p + size, 0, npages * PAGESIZE - size);
+ if (opts.options.mem_stomp)
+ memset(p, 0xF1, size);
+
}
// Store the bit mask AFTER SENTINEL_POST
sentinel_init(p, size);
}
- if (attrs)
- setAttr(pool, cast(size_t)(p - pool.baseAddr) / 16, attrs);
+ if (attrs) {
+ setAttr(pool, bit_i, attrs);
+ assert (bin >= B_PAGE || !pool.freebits.test(bit_i));
+ }
return p;
}
if (blk_size >= PAGESIZE && size >= PAGESIZE)
{
auto psz = blk_size / PAGESIZE;
- auto newsz = (size + PAGESIZE - 1) / PAGESIZE;
+ auto newsz = round_up(size, PAGESIZE);
if (newsz == psz)
return p;
return 0; // cannot extend buckets
auto psz = blk_size / PAGESIZE;
- auto minsz = (minsize + PAGESIZE - 1) / PAGESIZE;
- auto maxsz = (maxsize + PAGESIZE - 1) / PAGESIZE;
+ auto minsz = round_up(minsize, PAGESIZE);
+ auto maxsz = round_up(maxsize, PAGESIZE);
auto pagenum = (p - pool.baseAddr) / PAGESIZE;
// Free pages
size_t npages = 1;
size_t n = pagenum;
+ pool.freebits.set_group(bit_i, PAGESIZE / 16);
while (++n < pool.npages && pool.pagetable[n] == B_PAGEPLUS)
npages++;
if (opts.options.mem_stomp)
list.next = gc.free_list[bin];
list.pool = pool;
gc.free_list[bin] = list;
+ pool.freebits.set(bit_i);
}
}
if (opts.options.fork)
vis = os.Vis.SHARED;
mark.alloc(nbits, vis); // shared between mark and sweep
- freebits.alloc(nbits, vis); // ditto
+ freebits.alloc(nbits); // not used by the mark phase
scan.alloc(nbits); // only used in the mark phase
- finals.alloc(nbits); // mark phase *MUST* have a snapshot
- noscan.alloc(nbits); // ditto
+ finals.alloc(nbits); // not used by the mark phase
+ noscan.alloc(nbits); // mark phase *MUST* have a snapshot
+
+ // all is free when we start
+ freebits.set_all();
+
+ // avoid accidental sweeping of new pools while using eager allocation
+ if (gc.mark_proc_pid)
+ mark.set_all();
pagetable = cast(ubyte*) cstdlib.malloc(npages);
if (!pagetable)
if (opts.options.fork)
vis = os.Vis.SHARED;
mark.Dtor(vis);
- freebits.Dtor(vis);
+ freebits.Dtor();
scan.Dtor();
finals.Dtor();
noscan.Dtor();