X-Git-Url: https://git.llucax.com/software/dgc/cdgc.git/blobdiff_plain/cc0ea21e4d2f60ad8cc8c4cc5da47264b8e0dc85..3389bc359d65b04c74bb41ef4b640367cb9aa617:/rt/gc/cdgc/gc.d diff --git a/rt/gc/cdgc/gc.d b/rt/gc/cdgc/gc.d index 2d75813..11944d8 100644 --- a/rt/gc/cdgc/gc.d +++ b/rt/gc/cdgc/gc.d @@ -50,6 +50,8 @@ import opts = rt.gc.cdgc.opts; import cstdlib = tango.stdc.stdlib; import cstring = tango.stdc.string; +import cstdio = tango.stdc.stdio; +debug(COLLECT_PRINTF) alias cstdio.printf printf; /* * This is a small optimization that proved it's usefulness. For small chunks @@ -97,6 +99,11 @@ package bool has_pointermap(uint attrs) return !opts.options.conservative && !(attrs & BlkAttr.NO_SCAN); } +private size_t round_up(size_t n, size_t to) +{ + return (n + to - 1) / to; +} + private { alias void delegate(Object) DEvent; @@ -201,6 +208,9 @@ struct GC /// Turn off collections if > 0 int disabled; + // PID of the fork()ed process doing the mark() (0 if is not running) + int mark_proc_pid; + /// min(pool.baseAddr) byte *min_addr; /// max(pool.topAddr) @@ -253,10 +263,12 @@ bool Invariant() for (size_t i = 0; i < B_PAGE; i++) { for (List *list = gc.free_list[i]; list; list = list.next) { - assert (list.pool !is null); + auto pool = list.pool; + assert (pool !is null); auto p = cast(byte*) list; - assert (p >= list.pool.baseAddr); - assert (p < list.pool.topAddr); + assert (p >= pool.baseAddr); + assert (p < pool.topAddr); + assert (pool.freebits.test((p - pool.baseAddr) / 16)); } } } @@ -310,7 +322,8 @@ BlkInfo getInfo(void* p) if (info.base is null) return BlkInfo.init; info.size = pool.findSize(info.base); - info.attr = getAttr(pool, cast(size_t)(info.base - pool.baseAddr) / 16u); + size_t bit_i = (info.base - pool.baseAddr) / 16; + info.attr = getAttr(pool, bit_i); if (has_pointermap(info.attr)) { info.size -= size_t.sizeof; // PointerMap bitmask // Points to the PointerMap bitmask pointer, not user data @@ -384,7 +397,7 @@ Bins findBin(size_t size) size_t reserve(size_t size) { assert(size != 0); - size_t npages = (size + PAGESIZE - 1) / PAGESIZE; + size_t npages = round_up(size, PAGESIZE); Pool* pool = newPool(npages); if (!pool) @@ -398,6 +411,11 @@ size_t reserve(size_t size) */ void minimize() { + // Disabled if a parallel collection is in progress because the shared mark + // bits of the freed pool might be used by the mark process + if (gc.mark_proc_pid != 0) + return; + size_t n; size_t pn; Pool* pool; @@ -426,87 +444,50 @@ void minimize() * Allocate a chunk of memory that is larger than a page. * Return null if out of memory. */ -void* bigAlloc(size_t size, out Pool* pool) +void* bigAlloc(size_t npages, out Pool* pool, size_t* pn) { - size_t npages; - size_t n; - size_t pn; - size_t freedpages; - void* p; - int state; + // This code could use some refinement when repeatedly + // allocating very large arrays. - npages = (size + PAGESIZE - 1) / PAGESIZE; - - for (state = 0; ; ) + void* find_block() { - // This code could use some refinement when repeatedly - // allocating very large arrays. - - for (n = 0; n < gc.pools.length; n++) + for (size_t n = 0; n < gc.pools.length; n++) { pool = gc.pools[n]; - pn = pool.allocPages(npages); - if (pn != OPFAIL) - goto L1; + *pn = pool.allocPages(npages); + if (*pn != OPFAIL) + return pool.baseAddr + *pn * PAGESIZE; } + return null; + } - // Failed - switch (state) - { - case 0: - if (gc.disabled) - { - state = 1; - continue; - } - // Try collecting - freedpages = fullcollectshell(); - if (freedpages >= gc.pools.length * ((POOLSIZE / PAGESIZE) / 4)) - { - state = 1; - continue; - } - // Release empty pools to prevent bloat - minimize(); - // Allocate new pool - pool = newPool(npages); - if (!pool) - { - state = 2; - continue; - } - pn = pool.allocPages(npages); - assert(pn != OPFAIL); - goto L1; - case 1: - // Release empty pools to prevent bloat - minimize(); - // Allocate new pool - pool = newPool(npages); - if (!pool) - goto Lnomemory; - pn = pool.allocPages(npages); - assert(pn != OPFAIL); - goto L1; - case 2: - goto Lnomemory; - default: - assert(false); - } + void* alloc_more() + { + // Release empty pools to prevent bloat + minimize(); + // Allocate new pool + pool = newPool(npages); + if (!pool) + return null; // let malloc handle the error + *pn = pool.allocPages(npages); + assert(*pn != OPFAIL); + return pool.baseAddr + *pn * PAGESIZE; } - L1: - pool.pagetable[pn] = B_PAGE; - if (npages > 1) - memset(&pool.pagetable[pn + 1], B_PAGEPLUS, npages - 1); - p = pool.baseAddr + pn * PAGESIZE; - memset(cast(char *)p + size, 0, npages * PAGESIZE - size); - if (opts.options.mem_stomp) - memset(p, 0xF1, size); - return p; + if (void* p = find_block()) + return p; + + if (gc.disabled) + return alloc_more(); + + // Try collecting + size_t freedpages = fullcollectshell(); + if (freedpages >= gc.pools.length * ((POOLSIZE / PAGESIZE) / 4)) { + if (void* p = find_block()) + return p; + } - Lnomemory: - return null; // let mallocNoSync handle the error + return alloc_more(); } @@ -569,12 +550,9 @@ Pool *newPool(size_t npages) int allocPage(Bins bin) { Pool* pool; - size_t n; size_t pn; - byte* p; - byte* ptop; - for (n = 0; n < gc.pools.length; n++) + for (size_t n = 0; n < gc.pools.length; n++) { pool = gc.pools[n]; pn = pool.allocPages(1); @@ -590,8 +568,10 @@ int allocPage(Bins bin) size_t size = binsize[bin]; auto list_head = &gc.free_list[bin]; - p = pool.baseAddr + pn * PAGESIZE; - ptop = p + PAGESIZE; + byte* p = pool.baseAddr + pn * PAGESIZE; + byte* ptop = p + PAGESIZE; + size_t bit_i = pn * (PAGESIZE / 16); + pool.freebits.set_group(bit_i, PAGESIZE / 16); for (; p < ptop; p += size) { List* l = cast(List *) p; @@ -780,17 +760,56 @@ size_t fullcollect(void *stackTop) { debug(COLLECT_PRINTF) printf("Gcx.fullcollect()\n"); - // we always need to stop the world to make threads save the CPU registers + // If eager allocation is used, we need to check first if there is a mark + // process running. If there isn't, we start a new one (see the next code + // block). If there is, we check if it's still running or already finished. + // If it's still running, we tell the caller process no memory has been + // recovered (it will allocated more to fulfill the current request). If + // the mark process is done, we lunch the sweep phase and hope enough + // memory is freed (if that not the case, the caller will allocate more + // memory and the next time it's exhausted it will run a new collection). + if (opts.options.eager_alloc) { + if (gc.mark_proc_pid != 0) { // there is a mark process in progress + os.WRes r = os.wait_pid(gc.mark_proc_pid, false); // don't block + assert (r != os.WRes.ERROR); + switch (r) { + case os.WRes.DONE: + debug(COLLECT_PRINTF) printf("\t\tmark proc DONE\n"); + gc.mark_proc_pid = 0; + return sweep(); + case os.WRes.RUNNING: + debug(COLLECT_PRINTF) printf("\t\tmark proc RUNNING\n"); + return 0; + case os.WRes.ERROR: + debug(COLLECT_PRINTF) printf("\t\tmark proc ERROR\n"); + disable_fork(); // Try to keep going without forking + break; + } + } + } + + // We always need to stop the world to make threads save the CPU registers // in the stack and prepare themselves for thread_scanAll() thread_suspendAll(); gc.stats.world_stopped(); + // If forking is enabled, we fork() and start a new mark phase in the + // child. The parent process will tell the caller that no memory could be + // recycled if eager allocation is used, allowing the mutator to keep going + // almost instantly (at the expense of more memory consumption because + // a new allocation will be triggered to fulfill the current request). If + // no eager allocation is used, the parent will wait for the mark phase to + // finish before returning control to the mutator, but other threads are + // restarted and may run in parallel with the mark phase (unless they + // allocate or use the GC themselves, in which case the global GC lock will + // stop them). if (opts.options.fork) { + cstdio.fflush(null); // avoid duplicated FILE* output os.pid_t child_pid = os.fork(); assert (child_pid != -1); // don't accept errors in non-release mode switch (child_pid) { - case -1: // if fork() fails, fallback to stop-the-world - opts.options.fork = false; + case -1: // if fork() fails, fall-back to stop-the-world + disable_fork(); break; case 0: // child process (i.e. the collectors mark phase) mark(stackTop); @@ -800,15 +819,28 @@ size_t fullcollect(void *stackTop) // start the world again and wait for the mark phase to finish thread_resumeAll(); gc.stats.world_started(); - int status = void; - os.pid_t wait_pid = os.waitpid(child_pid, &status, 0); - assert (wait_pid == child_pid); - return sweep(); + if (opts.options.eager_alloc) { + gc.mark_proc_pid = child_pid; + return 0; + } + os.WRes r = os.wait_pid(child_pid); // block until it finishes + assert (r == os.WRes.DONE); + debug(COLLECT_PRINTF) printf("\t\tmark proc DONE (block)\n"); + if (r == os.WRes.DONE) + return sweep(); + debug(COLLECT_PRINTF) printf("\tmark() proc ERROR\n"); + // If there was some error, try to keep going without forking + disable_fork(); + // Re-suspend the threads to do the marking in this process + thread_suspendAll(); + gc.stats.world_stopped(); } } - // if we reach here, we are using the standard stop-the-world collection + // If we reach here, we are using the standard stop-the-world collection, + // either because fork was disabled in the first place, or because it was + // disabled because of some error. mark(stackTop); thread_resumeAll(); gc.stats.world_started(); @@ -825,33 +857,12 @@ void mark(void *stackTop) debug(COLLECT_PRINTF) printf("\tmark()\n"); gc.any_changes = false; - for (size_t n = 0; n < gc.pools.length; n++) - { - Pool* pool = gc.pools[n]; - pool.mark.zero(); - pool.scan.zero(); - pool.freebits.zero(); - } - - // Mark each free entry, so it doesn't get scanned - for (size_t n = 0; n < B_PAGE; n++) - { - for (List *list = gc.free_list[n]; list; list = list.next) - { - Pool* pool = list.pool; - auto ptr = cast(byte*) list; - assert (pool); - assert (pool.baseAddr <= ptr); - assert (ptr < pool.topAddr); - size_t bit_i = cast(size_t)(ptr - pool.baseAddr) / 16; - pool.freebits.set(bit_i); - } - } for (size_t n = 0; n < gc.pools.length; n++) { Pool* pool = gc.pools[n]; pool.mark.copy(&pool.freebits); + pool.scan.zero(); } /// Marks a range of memory in conservative mode. @@ -1041,7 +1052,8 @@ version(none) // BUG: doesn't work because freebits() must also be cleared } else if (bin == B_PAGE) { - size_t bit_i = pn * (PAGESIZE / 16); + size_t bit_stride = PAGESIZE / 16; + size_t bit_i = pn * bit_stride; if (!pool.mark.test(bit_i)) { byte *p = pool.baseAddr + pn * PAGESIZE; @@ -1055,8 +1067,9 @@ version(none) // BUG: doesn't work because freebits() must also be cleared } clrAttr(pool, bit_i, BlkAttr.ALL_BITS); - debug(COLLECT_PRINTF) printf("\tcollecting big %x\n", p); + debug(COLLECT_PRINTF) printf("\tcollecting big %p\n", p); pool.pagetable[pn] = B_FREE; + pool.freebits.set_group(bit_i, PAGESIZE / 16); freedpages++; if (opts.options.mem_stomp) memset(p, 0xF3, PAGESIZE); @@ -1064,6 +1077,8 @@ version(none) // BUG: doesn't work because freebits() must also be cleared { pn++; pool.pagetable[pn] = B_FREE; + bit_i += bit_stride; + pool.freebits.set_group(bit_i, PAGESIZE / 16); freedpages++; if (opts.options.mem_stomp) @@ -1107,6 +1122,7 @@ version(none) // BUG: doesn't work because freebits() must also be cleared goto Lnotfree; } pool.pagetable[pn] = B_FREE; + pool.freebits.set_group(bit_base, PAGESIZE / 16); recoveredpages++; continue; @@ -1206,6 +1222,13 @@ body } +void disable_fork() +{ + // we have to disable both options, as eager_alloc assumes fork is enabled + opts.options.fork = false; + opts.options.eager_alloc = false; +} + void initialize() { @@ -1215,10 +1238,18 @@ void initialize() // If we are going to fork, make sure we have the needed OS support if (opts.options.fork) opts.options.fork = os.HAVE_SHARED && os.HAVE_FORK; + // Eager allocation is only possible when forking + if (!opts.options.fork) + opts.options.eager_alloc = false; gc.lock = GCLock.classinfo; gc.inited = 1; setStackBottom(rt_stackBottom()); gc.stats = Stats(gc); + if (opts.options.prealloc_npools) { + size_t pages = round_up(opts.options.prealloc_psize, PAGESIZE); + for (size_t i = 0; i < opts.options.prealloc_npools; ++i) + newPool(pages); + } } @@ -1257,6 +1288,7 @@ private void *malloc(size_t size, uint attrs, size_t* pm_bitmask) } Pool* pool = void; + size_t bit_i = void; size_t capacity = void; // to figure out where to store the bitmask if (bin < B_PAGE) { @@ -1299,6 +1331,9 @@ private void *malloc(size_t size, uint attrs, size_t* pm_bitmask) assert ((cast(byte*)list) < list.pool.topAddr); gc.free_list[bin] = list.next; pool = list.pool; + bit_i = (p - pool.baseAddr) / 16; + assert (pool.freebits.test(bit_i)); + pool.freebits.clear(bit_i); if (!(attrs & BlkAttr.NO_SCAN)) memset(p + size, 0, capacity - size); if (opts.options.mem_stomp) @@ -1306,13 +1341,24 @@ private void *malloc(size_t size, uint attrs, size_t* pm_bitmask) } else { - p = bigAlloc(size, pool); + size_t pn; + size_t npages = round_up(size, PAGESIZE); + p = bigAlloc(npages, pool, &pn); if (!p) onOutOfMemoryError(); assert (pool !is null); - // Round the size up to the number of pages needed to store it - size_t npages = (size + PAGESIZE - 1) / PAGESIZE; + capacity = npages * PAGESIZE; + bit_i = pn * (PAGESIZE / 16); + pool.freebits.clear(bit_i); + pool.pagetable[pn] = B_PAGE; + if (npages > 1) + memset(&pool.pagetable[pn + 1], B_PAGEPLUS, npages - 1); + p = pool.baseAddr + pn * PAGESIZE; + memset(cast(char *)p + size, 0, npages * PAGESIZE - size); + if (opts.options.mem_stomp) + memset(p, 0xF1, size); + } // Store the bit mask AFTER SENTINEL_POST @@ -1329,8 +1375,10 @@ private void *malloc(size_t size, uint attrs, size_t* pm_bitmask) sentinel_init(p, size); } - if (attrs) - setAttr(pool, cast(size_t)(p - pool.baseAddr) / 16, attrs); + if (attrs) { + setAttr(pool, bit_i, attrs); + assert (bin >= B_PAGE || !pool.freebits.test(bit_i)); + } return p; } @@ -1415,7 +1463,7 @@ private void *realloc(void *p, size_t size, uint attrs, if (blk_size >= PAGESIZE && size >= PAGESIZE) { auto psz = blk_size / PAGESIZE; - auto newsz = (size + PAGESIZE - 1) / PAGESIZE; + auto newsz = round_up(size, PAGESIZE); if (newsz == psz) return p; @@ -1539,8 +1587,8 @@ body return 0; // cannot extend buckets auto psz = blk_size / PAGESIZE; - auto minsz = (minsize + PAGESIZE - 1) / PAGESIZE; - auto maxsz = (maxsize + PAGESIZE - 1) / PAGESIZE; + auto minsz = round_up(minsize, PAGESIZE); + auto maxsz = round_up(maxsize, PAGESIZE); auto pagenum = (p - pool.baseAddr) / PAGESIZE; @@ -1611,6 +1659,7 @@ private void free(void *p) // Free pages size_t npages = 1; size_t n = pagenum; + pool.freebits.set_group(bit_i, PAGESIZE / 16); while (++n < pool.npages && pool.pagetable[n] == B_PAGEPLUS) npages++; if (opts.options.mem_stomp) @@ -1630,6 +1679,7 @@ private void free(void *p) list.next = gc.free_list[bin]; list.pool = pool; gc.free_list[bin] = list; + pool.freebits.set(bit_i); } } @@ -1934,10 +1984,17 @@ struct Pool if (opts.options.fork) vis = os.Vis.SHARED; mark.alloc(nbits, vis); // shared between mark and sweep - freebits.alloc(nbits, vis); // ditto + freebits.alloc(nbits); // not used by the mark phase scan.alloc(nbits); // only used in the mark phase - finals.alloc(nbits); // mark phase *MUST* have a snapshot - noscan.alloc(nbits); // ditto + finals.alloc(nbits); // not used by the mark phase + noscan.alloc(nbits); // mark phase *MUST* have a snapshot + + // all is free when we start + freebits.set_all(); + + // avoid accidental sweeping of new pools while using eager allocation + if (gc.mark_proc_pid) + mark.set_all(); pagetable = cast(ubyte*) cstdlib.malloc(npages); if (!pagetable) @@ -1972,7 +2029,7 @@ struct Pool if (opts.options.fork) vis = os.Vis.SHARED; mark.Dtor(vis); - freebits.Dtor(vis); + freebits.Dtor(); scan.Dtor(); finals.Dtor(); noscan.Dtor();