X-Git-Url: https://git.llucax.com/software/dgc/cdgc.git/blobdiff_plain/876c4089652c85740c66a62899a1e32837e15ad5..7b736090719c6f08e286b246d9b7509413716fcf:/rt/gc/cdgc/gc.d?ds=inline diff --git a/rt/gc/cdgc/gc.d b/rt/gc/cdgc/gc.d index f2d2f6e..2ec72c4 100644 --- a/rt/gc/cdgc/gc.d +++ b/rt/gc/cdgc/gc.d @@ -45,11 +45,13 @@ version = STACKGROWSDOWN; // growing the stack means subtracting from the import rt.gc.cdgc.bits: GCBits; import rt.gc.cdgc.stats: GCStats, Stats; import dynarray = rt.gc.cdgc.dynarray; -import alloc = rt.gc.cdgc.alloc; +import os = rt.gc.cdgc.os; import opts = rt.gc.cdgc.opts; import cstdlib = tango.stdc.stdlib; import cstring = tango.stdc.string; +import cstdio = tango.stdc.stdio; +debug(COLLECT_PRINTF) alias cstdio.printf printf; /* * This is a small optimization that proved it's usefulness. For small chunks @@ -97,6 +99,11 @@ package bool has_pointermap(uint attrs) return !opts.options.conservative && !(attrs & BlkAttr.NO_SCAN); } +private size_t round_up(size_t n, size_t to) +{ + return (n + to - 1) / to; +} + private { alias void delegate(Object) DEvent; @@ -155,7 +162,8 @@ alias ubyte Bins; struct List { - List *next; + List* next; + Pool* pool; } @@ -200,17 +208,25 @@ struct GC /// Turn off collections if > 0 int disabled; + // PID of the fork()ed process doing the mark() (0 if is not running) + int mark_proc_pid; + /// min(pool.baseAddr) byte *min_addr; /// max(pool.topAddr) byte *max_addr; + /// Total heap memory + size_t total_mem; + /// Free heap memory + size_t free_mem; + /// Free list for each size List*[B_MAX] free_list; dynarray.DynArray!(void*) roots; dynarray.DynArray!(Range) ranges; - dynarray.DynArray!(Pool) pools; + dynarray.DynArray!(Pool*) pools; Stats stats; } @@ -226,19 +242,32 @@ private T locked(T, alias Code)() private GC* gc; + +bool collect_in_progress() +{ + return gc.mark_proc_pid != 0; +} + + bool Invariant() { assert (gc !is null); if (gc.inited) { + size_t total_mem = 0; + size_t free_mem = 0; for (size_t i = 0; i < gc.pools.length; i++) { Pool* pool = gc.pools[i]; pool.Invariant(); if (i == 0) assert(gc.min_addr == pool.baseAddr); if (i + 1 < gc.pools.length) - assert(*pool < gc.pools[i + 1]); + assert(*pool < *gc.pools[i + 1]); else if (i + 1 == gc.pools.length) assert(gc.max_addr == pool.topAddr); + total_mem += pool.npages * PAGESIZE; + for (size_t pn = 0; pn < pool.npages; ++pn) + if (pool.pagetable[pn] == B_FREE) + free_mem += PAGESIZE; } gc.roots.Invariant(); @@ -250,10 +279,19 @@ bool Invariant() assert(gc.ranges[i].pbot <= gc.ranges[i].ptop); } - for (size_t i = 0; i < B_PAGE; i++) - for (List *list = gc.free_list[i]; list; list = list.next) - { + for (size_t i = 0; i < B_PAGE; i++) { + for (List *list = gc.free_list[i]; list; list = list.next) { + auto pool = list.pool; + assert (pool !is null); + auto p = cast(byte*) list; + assert (p >= pool.baseAddr); + assert (p < pool.topAddr); + assert (pool.freebits.test((p - pool.baseAddr) / 16)); + free_mem += binsize[i]; } + } + assert (gc.total_mem == total_mem); + assert (gc.free_mem == free_mem); } return true; } @@ -264,26 +302,28 @@ bool Invariant() * Return null if not in a Pool. * Assume pools is sorted. */ -Pool *findPool(void *p) +Pool* findPool(void* p) { - if (p >= gc.min_addr && p < gc.max_addr) - { - if (gc.pools.length == 1) - { - return gc.pools[0]; - } - - for (size_t i = 0; i < gc.pools.length; i++) - { - Pool* pool = gc.pools[i]; - if (p < pool.topAddr) - { - if (pool.baseAddr <= p) - return pool; - break; - } - } + if (p < gc.min_addr || p >= gc.max_addr) + return null; + if (gc.pools.length == 0) + return null; + if (gc.pools.length == 1) + return gc.pools[0]; + /// The pooltable[] is sorted by address, so do a binary search + size_t low = 0; + size_t high = gc.pools.length - 1; + while (low <= high) { + size_t mid = (low + high) / 2; + auto pool = gc.pools[mid]; + if (p < pool.baseAddr) + high = mid - 1; + else if (p >= pool.topAddr) + low = mid + 1; + else + return pool; } + // Not found return null; } @@ -300,8 +340,11 @@ BlkInfo getInfo(void* p) return BlkInfo.init; BlkInfo info; info.base = pool.findBase(p); + if (info.base is null) + return BlkInfo.init; info.size = pool.findSize(info.base); - info.attr = getAttr(pool, cast(size_t)(info.base - pool.baseAddr) / 16u); + size_t bit_i = (info.base - pool.baseAddr) / 16; + info.attr = getAttr(pool, bit_i); if (has_pointermap(info.attr)) { info.size -= size_t.sizeof; // PointerMap bitmask // Points to the PointerMap bitmask pointer, not user data @@ -323,7 +366,7 @@ BlkInfo getInfo(void* p) /** * Compute bin for size. */ -static Bins findBin(size_t size) +Bins findBin(size_t size) { Bins bin; if (size <= 256) @@ -375,7 +418,7 @@ static Bins findBin(size_t size) size_t reserve(size_t size) { assert(size != 0); - size_t npages = (size + PAGESIZE - 1) / PAGESIZE; + size_t npages = round_up(size, PAGESIZE); Pool* pool = newPool(npages); if (!pool) @@ -386,16 +429,23 @@ size_t reserve(size_t size) /** * Minimizes physical memory usage by returning free pools to the OS. + * + * If full is false, keep some pools alive if the resulting free memory would + * be too small. */ -void minimize() +void minimize(bool full = true) { - size_t n; - size_t pn; - Pool* pool; + // The shared mark bits of the freed pool might be used by the mark process + if (collect_in_progress()) + return; - for (n = 0; n < gc.pools.length; n++) + if (gc.pools.length == 0) + return; + + for (size_t n = 0; n < gc.pools.length; n++) { - pool = gc.pools[n]; + Pool* pool = gc.pools[n]; + size_t pn; for (pn = 0; pn < pool.npages; pn++) { if (cast(Bins)pool.pagetable[pn] != B_FREE) @@ -403,7 +453,18 @@ void minimize() } if (pn < pool.npages) continue; + // Free pool + size_t pool_size = pool.npages * PAGESIZE; + if (!full) { + double percent_free = (gc.free_mem - pool_size) * 100.0 / + (gc.total_mem - pool_size); + if (percent_free < opts.options.min_free) + continue; // not enough free, don't remove this pool + } + gc.total_mem -= pool_size; + gc.free_mem -= pool_size; pool.Dtor(); + cstdlib.free(pool); gc.pools.remove_at(n); n--; } @@ -416,88 +477,50 @@ void minimize() * Allocate a chunk of memory that is larger than a page. * Return null if out of memory. */ -void *bigAlloc(size_t size) +void* bigAlloc(size_t npages, out Pool* pool, size_t* pn, bool* collected) { - Pool* pool; - size_t npages; - size_t n; - size_t pn; - size_t freedpages; - void* p; - int state; + *collected = false; + // This code could use some refinement when repeatedly + // allocating very large arrays. - npages = (size + PAGESIZE - 1) / PAGESIZE; - - for (state = 0; ; ) + void* find_block() { - // This code could use some refinement when repeatedly - // allocating very large arrays. - - for (n = 0; n < gc.pools.length; n++) + for (size_t n = 0; n < gc.pools.length; n++) { pool = gc.pools[n]; - pn = pool.allocPages(npages); - if (pn != OPFAIL) - goto L1; + *pn = pool.allocPages(npages); + if (*pn != OPFAIL) + return pool.baseAddr + *pn * PAGESIZE; } + return null; + } - // Failed - switch (state) - { - case 0: - if (gc.disabled) - { - state = 1; - continue; - } - // Try collecting - freedpages = fullcollectshell(); - if (freedpages >= gc.pools.length * ((POOLSIZE / PAGESIZE) / 4)) - { - state = 1; - continue; - } - // Release empty pools to prevent bloat - minimize(); - // Allocate new pool - pool = newPool(npages); - if (!pool) - { - state = 2; - continue; - } - pn = pool.allocPages(npages); - assert(pn != OPFAIL); - goto L1; - case 1: - // Release empty pools to prevent bloat - minimize(); - // Allocate new pool - pool = newPool(npages); - if (!pool) - goto Lnomemory; - pn = pool.allocPages(npages); - assert(pn != OPFAIL); - goto L1; - case 2: - goto Lnomemory; - default: - assert(false); - } + void* alloc_more() + { + // Allocate new pool + pool = newPool(npages); + if (!pool) + return null; // let malloc handle the error + *pn = pool.allocPages(npages); + assert(*pn != OPFAIL); + return pool.baseAddr + *pn * PAGESIZE; } - L1: - pool.pagetable[pn] = B_PAGE; - if (npages > 1) - memset(&pool.pagetable[pn + 1], B_PAGEPLUS, npages - 1); - p = pool.baseAddr + pn * PAGESIZE; - memset(cast(char *)p + size, 0, npages * PAGESIZE - size); - if (opts.options.mem_stomp) - memset(p, 0xF1, size); - return p; + if (void* p = find_block()) + return p; - Lnomemory: - return null; // let mallocNoSync handle the error + if (gc.disabled) + return alloc_more(); + + // Try collecting + size_t freedpages = fullcollectshell(); + *collected = true; + if (freedpages >= npages) { + if (void* p = find_block()) + return p; + } + + return alloc_more(); } @@ -530,20 +553,27 @@ Pool *newPool(size_t npages) npages = n; } - Pool p; - p.initialize(npages); - if (!p.baseAddr) + auto pool = cast(Pool*) cstdlib.calloc(1, Pool.sizeof); + if (pool is null) + return null; + pool.initialize(npages); + if (!pool.baseAddr) { - p.Dtor(); + pool.Dtor(); return null; } - Pool* pool = gc.pools.insert_sorted(p); - if (pool) - { - gc.min_addr = gc.pools[0].baseAddr; - gc.max_addr = gc.pools[gc.pools.length - 1].topAddr; + auto inserted_pool = *gc.pools.insert_sorted!("*a < *b")(pool); + if (inserted_pool is null) { + pool.Dtor(); + return null; } + assert (inserted_pool is pool); + gc.min_addr = gc.pools[0].baseAddr; + gc.max_addr = gc.pools[gc.pools.length - 1].topAddr; + size_t pool_size = pool.topAddr - pool.baseAddr; + gc.total_mem += pool_size; + gc.free_mem += pool_size; return pool; } @@ -556,12 +586,9 @@ Pool *newPool(size_t npages) int allocPage(Bins bin) { Pool* pool; - size_t n; size_t pn; - byte* p; - byte* ptop; - for (n = 0; n < gc.pools.length; n++) + for (size_t n = 0; n < gc.pools.length; n++) { pool = gc.pools[n]; pn = pool.allocPages(1); @@ -575,33 +602,28 @@ int allocPage(Bins bin) // Convert page to free list size_t size = binsize[bin]; - List **b = &gc.free_list[bin]; + auto list_head = &gc.free_list[bin]; - p = pool.baseAddr + pn * PAGESIZE; - ptop = p + PAGESIZE; + byte* p = pool.baseAddr + pn * PAGESIZE; + byte* ptop = p + PAGESIZE; + size_t bit_i = pn * (PAGESIZE / 16); + pool.freebits.set_group(bit_i, PAGESIZE / 16); for (; p < ptop; p += size) { - (cast(List *)p).next = *b; - *b = cast(List *)p; + List* l = cast(List *) p; + l.next = *list_head; + l.pool = pool; + *list_head = l; } return 1; } /** - * Marks a range of memory using the conservative bit mask. Used for - * the stack, for the data segment, and additional memory ranges. - */ -void mark_conservative(void* pbot, void* ptop) -{ - mark(pbot, ptop, PointerMap.init.bits.ptr); -} - - -/** - * Search a range of memory values and mark any pointers into the GC pool. + * Search a range of memory values and mark any pointers into the GC pool using + * type information (bitmask of pointer locations). */ -void mark(void *pbot, void *ptop, size_t* pm_bitmask) +void mark_range(void *pbot, void *ptop, size_t* pm_bitmask) { // TODO: make our own assert because assert uses the GC assert (pbot <= ptop); @@ -611,16 +633,18 @@ void mark(void *pbot, void *ptop, size_t* pm_bitmask) void **p1 = cast(void **)pbot; void **p2 = cast(void **)ptop; size_t pcache = 0; - uint changes = 0; + bool changes = false; size_t type_size = pm_bitmask[0]; size_t* pm_bits = pm_bitmask + 1; + bool has_type_info = type_size != 1 || pm_bits[0] != 1 || pm_bits[1] != 0; //printf("marking range: %p -> %p\n", pbot, ptop); for (; p1 + type_size <= p2; p1 += type_size) { for (size_t n = 0; n < type_size; n++) { // scan bit set for this word - if (!(pm_bits[n / BITS_PER_WORD] & (1 << (n % BITS_PER_WORD)))) + if (has_type_info && + !(pm_bits[n / BITS_PER_WORD] & (1 << (n % BITS_PER_WORD)))) continue; void* p = *(p1 + n); @@ -635,13 +659,17 @@ void mark(void *pbot, void *ptop, size_t* pm_bitmask) if (pool) { size_t offset = cast(size_t)(p - pool.baseAddr); - size_t bit_i; + size_t bit_i = void; size_t pn = offset / PAGESIZE; Bins bin = cast(Bins)pool.pagetable[pn]; + // Cache B_PAGE, B_PAGEPLUS and B_FREE lookups + if (bin >= B_PAGE) + pcache = cast(size_t)p & ~(PAGESIZE-1); + // Adjust bit to be at start of allocated memory block if (bin <= B_PAGE) - bit_i = (offset & notbinsize[bin]) >> 4; + bit_i = (offset & notbinsize[bin]) / 16; else if (bin == B_PAGEPLUS) { do @@ -651,14 +679,8 @@ void mark(void *pbot, void *ptop, size_t* pm_bitmask) while (cast(Bins)pool.pagetable[pn] == B_PAGEPLUS); bit_i = pn * (PAGESIZE / 16); } - else - { - // Don't mark bits in B_FREE pages + else // Don't mark bits in B_FREE pages continue; - } - - if (bin >= B_PAGE) // Cache B_PAGE and B_PAGEPLUS lookups - pcache = cast(size_t)p & ~(PAGESIZE-1); if (!pool.mark.test(bit_i)) { @@ -666,7 +688,7 @@ void mark(void *pbot, void *ptop, size_t* pm_bitmask) if (!pool.noscan.test(bit_i)) { pool.scan.set(bit_i); - changes = 1; + changes = true; } } } @@ -772,79 +794,150 @@ size_t fullcollectshell() */ size_t fullcollect(void *stackTop) { - size_t n; - Pool* pool; - debug(COLLECT_PRINTF) printf("Gcx.fullcollect()\n"); + // If eager allocation is used, we need to check first if there is a mark + // process running. If there isn't, we start a new one (see the next code + // block). If there is, we check if it's still running or already finished. + // If it's still running, we tell the caller process no memory has been + // recovered (it will allocated more to fulfill the current request). If + // the mark process is done, we lunch the sweep phase and hope enough + // memory is freed (if that not the case, the caller will allocate more + // memory and the next time it's exhausted it will run a new collection). + if (opts.options.eager_alloc) { + if (collect_in_progress()) { + os.WRes r = os.wait_pid(gc.mark_proc_pid, false); // don't block + assert (r != os.WRes.ERROR); + switch (r) { + case os.WRes.DONE: + debug(COLLECT_PRINTF) printf("\t\tmark proc DONE\n"); + gc.mark_proc_pid = 0; + return sweep(); + case os.WRes.RUNNING: + debug(COLLECT_PRINTF) printf("\t\tmark proc RUNNING\n"); + return 0; + case os.WRes.ERROR: + debug(COLLECT_PRINTF) printf("\t\tmark proc ERROR\n"); + disable_fork(); // Try to keep going without forking + break; + } + } + } + + // We always need to stop the world to make threads save the CPU registers + // in the stack and prepare themselves for thread_scanAll() thread_suspendAll(); gc.stats.world_stopped(); - gc.p_cache = null; - gc.size_cache = 0; + // If forking is enabled, we fork() and start a new mark phase in the + // child. The parent process will tell the caller that no memory could be + // recycled if eager allocation is used, allowing the mutator to keep going + // almost instantly (at the expense of more memory consumption because + // a new allocation will be triggered to fulfill the current request). If + // no eager allocation is used, the parent will wait for the mark phase to + // finish before returning control to the mutator, but other threads are + // restarted and may run in parallel with the mark phase (unless they + // allocate or use the GC themselves, in which case the global GC lock will + // stop them). + if (opts.options.fork) { + cstdio.fflush(null); // avoid duplicated FILE* output + os.pid_t child_pid = os.fork(); + assert (child_pid != -1); // don't accept errors in non-release mode + switch (child_pid) { + case -1: // if fork() fails, fall-back to stop-the-world + disable_fork(); + break; + case 0: // child process (i.e. the collectors mark phase) + mark(stackTop); + cstdlib.exit(0); + break; // bogus, will never reach here + default: // parent process (i.e. the mutator) + // start the world again and wait for the mark phase to finish + thread_resumeAll(); + gc.stats.world_started(); + if (opts.options.eager_alloc) { + gc.mark_proc_pid = child_pid; + return 0; + } + os.WRes r = os.wait_pid(child_pid); // block until it finishes + assert (r == os.WRes.DONE); + debug(COLLECT_PRINTF) printf("\t\tmark proc DONE (block)\n"); + if (r == os.WRes.DONE) + return sweep(); + debug(COLLECT_PRINTF) printf("\tmark() proc ERROR\n"); + // If there was some error, try to keep going without forking + disable_fork(); + // Re-suspend the threads to do the marking in this process + thread_suspendAll(); + gc.stats.world_stopped(); + } - gc.any_changes = false; - for (n = 0; n < gc.pools.length; n++) - { - pool = gc.pools[n]; - pool.mark.zero(); - pool.scan.zero(); - pool.freebits.zero(); } - // Mark each free entry, so it doesn't get scanned - for (n = 0; n < B_PAGE; n++) - { - for (List *list = gc.free_list[n]; list; list = list.next) - { - pool = findPool(list); - assert(pool); - pool.freebits.set(cast(size_t)(cast(byte*)list - pool.baseAddr) / 16); - } - } + // If we reach here, we are using the standard stop-the-world collection, + // either because fork was disabled in the first place, or because it was + // disabled because of some error. + mark(stackTop); + thread_resumeAll(); + gc.stats.world_started(); - for (n = 0; n < gc.pools.length; n++) + return sweep(); +} + + +/** + * + */ +void mark(void *stackTop) +{ + debug(COLLECT_PRINTF) printf("\tmark()\n"); + + gc.any_changes = false; + + for (size_t n = 0; n < gc.pools.length; n++) { - pool = gc.pools[n]; + Pool* pool = gc.pools[n]; pool.mark.copy(&pool.freebits); + pool.scan.zero(); } - void mark_conservative_dg(void* pbot, void* ptop) + /// Marks a range of memory in conservative mode. + void mark_conservative_range(void* pbot, void* ptop) { - mark_conservative(pbot, ptop); + mark_range(pbot, ptop, PointerMap.init.bits.ptr); } - rt_scanStaticData(&mark_conservative_dg); + rt_scanStaticData(&mark_conservative_range); if (!gc.no_stack) { // Scan stacks and registers for each paused thread - thread_scanAll(&mark_conservative_dg, stackTop); + thread_scanAll(&mark_conservative_range, stackTop); } // Scan roots debug(COLLECT_PRINTF) printf("scan roots[]\n"); - mark_conservative(gc.roots.ptr, gc.roots.ptr + gc.roots.length); + mark_conservative_range(gc.roots.ptr, gc.roots.ptr + gc.roots.length); // Scan ranges debug(COLLECT_PRINTF) printf("scan ranges[]\n"); - for (n = 0; n < gc.ranges.length; n++) + for (size_t n = 0; n < gc.ranges.length; n++) { debug(COLLECT_PRINTF) printf("\t%x .. %x\n", gc.ranges[n].pbot, gc.ranges[n].ptop); - mark_conservative(gc.ranges[n].pbot, gc.ranges[n].ptop); + mark_conservative_range(gc.ranges[n].pbot, gc.ranges[n].ptop); } debug(COLLECT_PRINTF) printf("\tscan heap\n"); while (gc.any_changes) { gc.any_changes = false; - for (n = 0; n < gc.pools.length; n++) + for (size_t n = 0; n < gc.pools.length; n++) { uint *bbase; uint *b; uint *btop; - pool = gc.pools[n]; + Pool* pool = gc.pools[n]; bbase = pool.scan.base(); btop = bbase + pool.scan.nwords; @@ -879,12 +972,12 @@ size_t fullcollect(void *stackTop) bin = cast(Bins)pool.pagetable[pn]; if (bin < B_PAGE) { if (opts.options.conservative) - mark_conservative(o, o + binsize[bin]); + mark_conservative_range(o, o + binsize[bin]); else { auto end_of_blk = cast(size_t**)(o + binsize[bin] - size_t.sizeof); size_t* pm_bitmask = *end_of_blk; - mark(o, end_of_blk, pm_bitmask); + mark_range(o, end_of_blk, pm_bitmask); } } else if (bin == B_PAGE || bin == B_PAGEPLUS) @@ -901,29 +994,37 @@ size_t fullcollect(void *stackTop) size_t blk_size = u * PAGESIZE; if (opts.options.conservative) - mark_conservative(o, o + blk_size); + mark_conservative_range(o, o + blk_size); else { auto end_of_blk = cast(size_t**)(o + blk_size - size_t.sizeof); size_t* pm_bitmask = *end_of_blk; - mark(o, end_of_blk, pm_bitmask); + mark_range(o, end_of_blk, pm_bitmask); } } } } } } +} - thread_resumeAll(); - gc.stats.world_started(); +/** + * + */ +size_t sweep() +{ // Free up everything not marked - debug(COLLECT_PRINTF) printf("\tfree'ing\n"); + debug(COLLECT_PRINTF) printf("\tsweep\n"); + gc.p_cache = null; + gc.size_cache = 0; + gc.free_mem = 0; // will be recalculated size_t freedpages = 0; size_t freed = 0; - for (n = 0; n < gc.pools.length; n++) + for (size_t n = 0; n < gc.pools.length; n++) { - pool = gc.pools[n]; + Pool* pool = gc.pools[n]; + pool.clear_cache(); uint* bbase = pool.mark.base(); size_t pn; for (pn = 0; pn < pool.npages; pn++, bbase += PAGESIZE / (32 * 16)) @@ -947,16 +1048,14 @@ version(none) // BUG: doesn't work because freebits() must also be cleared { for (; p < ptop; p += size, bit_i += bit_stride) { - if (pool.finals.nbits && pool.finals.testClear(bit_i)) { + if (pool.finals.testClear(bit_i)) { if (opts.options.sentinel) - rt_finalize(cast(List *)sentinel_add(p), false/*gc.no_stack > 0*/); + rt_finalize(sentinel_add(p), false/*gc.no_stack > 0*/); else - rt_finalize(cast(List *)p, false/*gc.no_stack > 0*/); + rt_finalize(p, false/*gc.no_stack > 0*/); } clrAttr(pool, bit_i, BlkAttr.ALL_BITS); - List *list = cast(List *)p; - if (opts.options.mem_stomp) memset(p, 0xF3, size); } @@ -973,16 +1072,14 @@ version(none) // BUG: doesn't work because freebits() must also be cleared sentinel_Invariant(sentinel_add(p)); pool.freebits.set(bit_i); - if (pool.finals.nbits && pool.finals.testClear(bit_i)) { + if (pool.finals.testClear(bit_i)) { if (opts.options.sentinel) - rt_finalize(cast(List *)sentinel_add(p), false/*gc.no_stack > 0*/); + rt_finalize(sentinel_add(p), false/*gc.no_stack > 0*/); else - rt_finalize(cast(List *)p, false/*gc.no_stack > 0*/); + rt_finalize(p, false/*gc.no_stack > 0*/); } clrAttr(pool, bit_i, BlkAttr.ALL_BITS); - List *list = cast(List *)p; - if (opts.options.mem_stomp) memset(p, 0xF3, size); @@ -992,13 +1089,14 @@ version(none) // BUG: doesn't work because freebits() must also be cleared } else if (bin == B_PAGE) { - size_t bit_i = pn * (PAGESIZE / 16); + size_t bit_stride = PAGESIZE / 16; + size_t bit_i = pn * bit_stride; if (!pool.mark.test(bit_i)) { byte *p = pool.baseAddr + pn * PAGESIZE; if (opts.options.sentinel) sentinel_Invariant(sentinel_add(p)); - if (pool.finals.nbits && pool.finals.testClear(bit_i)) { + if (pool.finals.testClear(bit_i)) { if (opts.options.sentinel) rt_finalize(sentinel_add(p), false/*gc.no_stack > 0*/); else @@ -1006,16 +1104,21 @@ version(none) // BUG: doesn't work because freebits() must also be cleared } clrAttr(pool, bit_i, BlkAttr.ALL_BITS); - debug(COLLECT_PRINTF) printf("\tcollecting big %x\n", p); + debug(COLLECT_PRINTF) printf("\tcollecting big %p\n", p); pool.pagetable[pn] = B_FREE; + pool.freebits.set_group(bit_i, PAGESIZE / 16); freedpages++; + gc.free_mem += PAGESIZE; if (opts.options.mem_stomp) memset(p, 0xF3, PAGESIZE); while (pn + 1 < pool.npages && pool.pagetable[pn + 1] == B_PAGEPLUS) { pn++; pool.pagetable[pn] = B_FREE; + bit_i += bit_stride; + pool.freebits.set_group(bit_i, PAGESIZE / 16); freedpages++; + gc.free_mem += PAGESIZE; if (opts.options.mem_stomp) { @@ -1025,6 +1128,9 @@ version(none) // BUG: doesn't work because freebits() must also be cleared } } } + else if (bin == B_FREE) { + gc.free_mem += PAGESIZE; + } } } @@ -1034,9 +1140,9 @@ version(none) // BUG: doesn't work because freebits() must also be cleared // Free complete pages, rebuild free list debug(COLLECT_PRINTF) printf("\tfree complete pages\n"); size_t recoveredpages = 0; - for (n = 0; n < gc.pools.length; n++) + for (size_t n = 0; n < gc.pools.length; n++) { - pool = gc.pools[n]; + Pool* pool = gc.pools[n]; for (size_t pn = 0; pn < pool.npages; pn++) { Bins bin = cast(Bins)pool.pagetable[pn]; @@ -1058,7 +1164,9 @@ version(none) // BUG: doesn't work because freebits() must also be cleared goto Lnotfree; } pool.pagetable[pn] = B_FREE; + pool.freebits.set_group(bit_base, PAGESIZE / 16); recoveredpages++; + gc.free_mem += PAGESIZE; continue; Lnotfree: @@ -1068,11 +1176,16 @@ version(none) // BUG: doesn't work because freebits() must also be cleared bit_i = bit_base + u / 16; if (pool.freebits.test(bit_i)) { - List *list = cast(List *)(p + u); - // avoid unnecessary writes + assert ((p+u) >= pool.baseAddr); + assert ((p+u) < pool.topAddr); + List* list = cast(List*) (p + u); + // avoid unnecesary writes (it really saves time) if (list.next != gc.free_list[bin]) list.next = gc.free_list[bin]; + if (list.pool != pool) + list.pool = pool; gc.free_list[bin] = list; + gc.free_mem += binsize[bin]; } } } @@ -1097,14 +1210,11 @@ in body { uint attrs; - - if (pool.finals.nbits && - pool.finals.test(bit_i)) + if (pool.finals.test(bit_i)) attrs |= BlkAttr.FINALIZE; if (pool.noscan.test(bit_i)) attrs |= BlkAttr.NO_SCAN; -// if (pool.nomove.nbits && -// pool.nomove.test(bit_i)) +// if (pool.nomove.test(bit_i)) // attrs |= BlkAttr.NO_MOVE; return attrs; } @@ -1122,8 +1232,6 @@ body { if (mask & BlkAttr.FINALIZE) { - if (!pool.finals.nbits) - pool.finals.alloc(pool.mark.nbits); pool.finals.set(bit_i); } if (mask & BlkAttr.NO_SCAN) @@ -1149,7 +1257,7 @@ in } body { - if (mask & BlkAttr.FINALIZE && pool.finals.nbits) + if (mask & BlkAttr.FINALIZE) pool.finals.clear(bit_i); if (mask & BlkAttr.NO_SCAN) pool.noscan.clear(bit_i); @@ -1158,16 +1266,34 @@ body } +void disable_fork() +{ + // we have to disable both options, as eager_alloc assumes fork is enabled + opts.options.fork = false; + opts.options.eager_alloc = false; +} + void initialize() { int dummy; gc.stack_bottom = cast(char*)&dummy; opts.parse(cstdlib.getenv("D_GC_OPTS")); + // If we are going to fork, make sure we have the needed OS support + if (opts.options.fork) + opts.options.fork = os.HAVE_SHARED && os.HAVE_FORK; + // Eager allocation is only possible when forking + if (!opts.options.fork) + opts.options.eager_alloc = false; gc.lock = GCLock.classinfo; gc.inited = 1; setStackBottom(rt_stackBottom()); gc.stats = Stats(gc); + if (opts.options.prealloc_npools) { + size_t pages = round_up(opts.options.prealloc_psize, PAGESIZE); + for (size_t i = 0; i < opts.options.prealloc_npools; ++i) + newPool(pages); + } } @@ -1205,7 +1331,10 @@ private void *malloc(size_t size, uint attrs, size_t* pm_bitmask) lastbin = bin; } - size_t capacity; // to figure out where to store the bitmask + Pool* pool = void; + size_t bit_i = void; + size_t capacity = void; // to figure out where to store the bitmask + bool collected = false; if (bin < B_PAGE) { p = gc.free_list[bin]; @@ -1228,10 +1357,12 @@ private void *malloc(size_t size, uint attrs, size_t* pm_bitmask) { //newPool(1); } + collected = true; } if (!gc.free_list[bin] && !allocPage(bin)) { newPool(1); // allocate new pool to find a new page + // TODO: hint allocPage() to use the pool we just created int result = allocPage(bin); if (!result) onOutOfMemoryError(); @@ -1241,7 +1372,14 @@ private void *malloc(size_t size, uint attrs, size_t* pm_bitmask) capacity = binsize[bin]; // Return next item from free list - gc.free_list[bin] = (cast(List*)p).next; + List* list = cast(List*) p; + assert ((cast(byte*)list) >= list.pool.baseAddr); + assert ((cast(byte*)list) < list.pool.topAddr); + gc.free_list[bin] = list.next; + pool = list.pool; + bit_i = (p - pool.baseAddr) / 16; + assert (pool.freebits.test(bit_i)); + pool.freebits.clear(bit_i); if (!(attrs & BlkAttr.NO_SCAN)) memset(p + size, 0, capacity - size); if (opts.options.mem_stomp) @@ -1249,12 +1387,24 @@ private void *malloc(size_t size, uint attrs, size_t* pm_bitmask) } else { - p = bigAlloc(size); + size_t pn; + size_t npages = round_up(size, PAGESIZE); + p = bigAlloc(npages, pool, &pn, &collected); if (!p) onOutOfMemoryError(); - // Round the size up to the number of pages needed to store it - size_t npages = (size + PAGESIZE - 1) / PAGESIZE; + assert (pool !is null); + capacity = npages * PAGESIZE; + bit_i = pn * (PAGESIZE / 16); + pool.freebits.clear(bit_i); + pool.pagetable[pn] = B_PAGE; + if (npages > 1) + memset(&pool.pagetable[pn + 1], B_PAGEPLUS, npages - 1); + p = pool.baseAddr + pn * PAGESIZE; + memset(cast(char *)p + size, 0, npages * PAGESIZE - size); + if (opts.options.mem_stomp) + memset(p, 0xF1, size); + } // Store the bit mask AFTER SENTINEL_POST @@ -1271,13 +1421,26 @@ private void *malloc(size_t size, uint attrs, size_t* pm_bitmask) sentinel_init(p, size); } - if (attrs) - { - Pool *pool = findPool(p); - assert(pool); - - setAttr(pool, cast(size_t)(p - pool.baseAddr) / 16, attrs); + if (attrs) { + setAttr(pool, bit_i, attrs); + assert (bin >= B_PAGE || !pool.freebits.test(bit_i)); + } + + gc.free_mem -= capacity; + if (collected) { + // If there is not enough free memory, allocate a new pool big enough + // to have at least the min_free% of the total heap free. If there is + // too much free memory, try to free some empty pools. + double percent_free = gc.free_mem * 100.0 / gc.total_mem; + if (percent_free < opts.options.min_free) { + auto pool_size = gc.total_mem * 1.0 / opts.options.min_free + - gc.free_mem; + newPool(round_up(cast(size_t)pool_size, PAGESIZE)); + } + else + minimize(false); } + return p; } @@ -1301,132 +1464,125 @@ private void *calloc(size_t size, uint attrs, size_t* pm_bitmask) private void *realloc(void *p, size_t size, uint attrs, size_t* pm_bitmask) { - if (!size) - { + if (!size) { if (p) - { free(p); - p = null; - } + return null; } - else if (!p) - { - p = malloc(size, attrs, pm_bitmask); + + if (p is null) + return malloc(size, attrs, pm_bitmask); + + Pool* pool = findPool(p); + if (pool is null) + return null; + + // Set or retrieve attributes as appropriate + auto bit_i = cast(size_t)(p - pool.baseAddr) / 16; + if (attrs) { + clrAttr(pool, bit_i, BlkAttr.ALL_BITS); + setAttr(pool, bit_i, attrs); } else - { - Pool* pool = findPool(p); - if (pool is null) - return null; + attrs = getAttr(pool, bit_i); - // Set or retrieve attributes as appropriate - auto bit_i = cast(size_t)(p - pool.baseAddr) / 16; - if (attrs) { - clrAttr(pool, bit_i, BlkAttr.ALL_BITS); - setAttr(pool, bit_i, attrs); - } - else - attrs = getAttr(pool, bit_i); - - void* blk_base_addr = pool.findBase(p); - size_t blk_size = pool.findSize(p); - bool has_pm = has_pointermap(attrs); - size_t pm_bitmask_size = 0; - if (has_pm) { - pm_bitmask_size = size_t.sizeof; - // Retrieve pointer map bit mask if appropriate - if (pm_bitmask is null) { - auto end_of_blk = cast(size_t**)(blk_base_addr + - blk_size - size_t.sizeof); - pm_bitmask = *end_of_blk; - } + void* blk_base_addr = pool.findBase(p); + size_t blk_size = pool.findSize(p); + bool has_pm = has_pointermap(attrs); + size_t pm_bitmask_size = 0; + if (has_pm) { + pm_bitmask_size = size_t.sizeof; + // Retrieve pointer map bit mask if appropriate + if (pm_bitmask is null) { + auto end_of_blk = cast(size_t**)( + blk_base_addr + blk_size - size_t.sizeof); + pm_bitmask = *end_of_blk; } + } - if (opts.options.sentinel) - { - sentinel_Invariant(p); - size_t sentinel_stored_size = *sentinel_size(p); - if (sentinel_stored_size != size) - { - void* p2 = malloc(size, attrs, pm_bitmask); - if (sentinel_stored_size < size) - size = sentinel_stored_size; - cstring.memcpy(p2, p, size); - p = p2; + if (opts.options.sentinel) { + sentinel_Invariant(p); + size_t sentinel_stored_size = *sentinel_size(p); + if (sentinel_stored_size != size) { + void* p2 = malloc(size, attrs, pm_bitmask); + if (sentinel_stored_size < size) + size = sentinel_stored_size; + cstring.memcpy(p2, p, size); + p = p2; + } + return p; + } + + size += pm_bitmask_size; + if (blk_size >= PAGESIZE && size >= PAGESIZE) { + auto psz = blk_size / PAGESIZE; + auto newsz = round_up(size, PAGESIZE); + if (newsz == psz) + return p; + + auto pagenum = (p - pool.baseAddr) / PAGESIZE; + + if (newsz < psz) { + // Shrink in place + if (opts.options.mem_stomp) + memset(p + size - pm_bitmask_size, 0xF2, + blk_size - size - pm_bitmask_size); + pool.freePages(pagenum + newsz, psz - newsz); + auto new_blk_size = (PAGESIZE * newsz); + gc.free_mem += blk_size - new_blk_size; + // update the size cache, assuming that is very likely the + // size of this block will be queried in the near future + pool.update_cache(p, new_blk_size); + if (has_pm) { + auto end_of_blk = cast(size_t**)(blk_base_addr + + new_blk_size - pm_bitmask_size); + *end_of_blk = pm_bitmask; } + return p; } - else - { - size += pm_bitmask_size; - if (blk_size >= PAGESIZE && size >= PAGESIZE) - { - auto psz = blk_size / PAGESIZE; - auto newsz = (size + PAGESIZE - 1) / PAGESIZE; - if (newsz == psz) - return p; - auto pagenum = (p - pool.baseAddr) / PAGESIZE; - - if (newsz < psz) - { - // Shrink in place + if (pagenum + newsz <= pool.npages) { + // Attempt to expand in place + for (size_t i = pagenum + psz; 1;) { + if (i == pagenum + newsz) { if (opts.options.mem_stomp) - memset(p + size - pm_bitmask_size, 0xF2, - blk_size - size - pm_bitmask_size); - pool.freePages(pagenum + newsz, psz - newsz); + memset(p + blk_size - pm_bitmask_size, 0xF0, + size - blk_size - pm_bitmask_size); + memset(pool.pagetable + pagenum + psz, B_PAGEPLUS, + newsz - psz); + auto new_blk_size = (PAGESIZE * newsz); + gc.free_mem -= new_blk_size - blk_size; + // update the size cache, assuming that is very + // likely the size of this block will be queried in + // the near future + pool.update_cache(p, new_blk_size); if (has_pm) { auto end_of_blk = cast(size_t**)( - blk_base_addr + (PAGESIZE * newsz) - - pm_bitmask_size); + blk_base_addr + new_blk_size - pm_bitmask_size); *end_of_blk = pm_bitmask; } return p; } - else if (pagenum + newsz <= pool.npages) - { - // Attempt to expand in place - for (size_t i = pagenum + psz; 1;) - { - if (i == pagenum + newsz) - { - if (opts.options.mem_stomp) - memset(p + blk_size - pm_bitmask_size, - 0xF0, size - blk_size - - pm_bitmask_size); - memset(pool.pagetable + pagenum + - psz, B_PAGEPLUS, newsz - psz); - if (has_pm) { - auto end_of_blk = cast(size_t**)( - blk_base_addr + - (PAGESIZE * newsz) - - pm_bitmask_size); - *end_of_blk = pm_bitmask; - } - return p; - } - if (i == pool.npages) - { - break; - } - if (pool.pagetable[i] != B_FREE) - break; - i++; - } - } - } - // if new size is bigger or less than half - if (blk_size < size || blk_size > size * 2) - { - size -= pm_bitmask_size; - blk_size -= pm_bitmask_size; - void* p2 = malloc(size, attrs, pm_bitmask); - if (blk_size < size) - size = blk_size; - cstring.memcpy(p2, p, size); - p = p2; + if (i == pool.npages) + break; + if (pool.pagetable[i] != B_FREE) + break; + i++; } } } + + // if new size is bigger or less than half + if (blk_size < size || blk_size > size * 2) { + size -= pm_bitmask_size; + blk_size -= pm_bitmask_size; + void* p2 = malloc(size, attrs, pm_bitmask); + if (blk_size < size) + size = blk_size; + cstring.memcpy(p2, p, size); + p = p2; + } + return p; } @@ -1478,8 +1634,8 @@ body return 0; // cannot extend buckets auto psz = blk_size / PAGESIZE; - auto minsz = (minsize + PAGESIZE - 1) / PAGESIZE; - auto maxsz = (maxsize + PAGESIZE - 1) / PAGESIZE; + auto minsz = round_up(minsize, PAGESIZE); + auto maxsz = round_up(maxsize, PAGESIZE); auto pagenum = (p - pool.baseAddr) / PAGESIZE; @@ -1507,6 +1663,10 @@ body memset(pool.pagetable + pagenum + psz, B_PAGEPLUS, sz); gc.p_cache = null; gc.size_cache = 0; + gc.free_mem -= new_size - blk_size; + // update the size cache, assuming that is very likely the size of this + // block will be queried in the near future + pool.update_cache(p, new_size); if (has_pm) { new_size -= size_t.sizeof; @@ -1547,23 +1707,34 @@ private void free(void *p) // Free pages size_t npages = 1; size_t n = pagenum; + pool.freebits.set_group(bit_i, PAGESIZE / 16); while (++n < pool.npages && pool.pagetable[n] == B_PAGEPLUS) npages++; + size_t size = npages * PAGESIZE; if (opts.options.mem_stomp) - memset(p, 0xF2, npages * PAGESIZE); + memset(p, 0xF2, size); pool.freePages(pagenum, npages); + gc.free_mem += size; + // just in case we were caching this pointer + pool.clear_cache(p); } else { // Add to free list - List *list = cast(List*)p; + List* list = cast(List*) p; if (opts.options.mem_stomp) memset(p, 0xF2, binsize[bin]); list.next = gc.free_list[bin]; + list.pool = pool; gc.free_list[bin] = list; + pool.freebits.set(bit_i); + gc.free_mem += binsize[bin]; } + double percent_free = gc.free_mem * 100.0 / gc.total_mem; + if (percent_free > opts.options.min_free) + minimize(false); } @@ -1652,9 +1823,7 @@ private void checkNoSync(void *p) if (bin < B_PAGE) { // Check that p is not on a free list - List *list; - - for (list = gc.free_list[bin]; list; list = list.next) + for (List* list = gc.free_list[bin]; list; list = list.next) { assert(cast(void*)list != p); } @@ -1720,7 +1889,7 @@ private GCStats getStats() for (n = 0; n < B_PAGE; n++) { - for (List *list = gc.free_list[n]; list; list = list.next) + for (List* list = gc.free_list[n]; list; list = list.next) flsize += binsize[n]; } @@ -1827,12 +1996,29 @@ struct Pool size_t npages; ubyte* pagetable; + /// Cache for findSize() + size_t cached_size; + void* cached_ptr; + + void clear_cache(void* ptr = null) + { + if (ptr is null || ptr is this.cached_ptr) { + this.cached_ptr = null; + this.cached_size = 0; + } + } + + void update_cache(void* ptr, size_t size) + { + this.cached_ptr = ptr; + this.cached_size = size; + } void initialize(size_t npages) { size_t poolsize = npages * PAGESIZE; assert(poolsize >= POOLSIZE); - baseAddr = cast(byte *) alloc.os_mem_map(poolsize); + baseAddr = cast(byte *) os.alloc(poolsize); // Some of the code depends on page alignment of memory pools assert((cast(size_t)baseAddr & (PAGESIZE - 1)) == 0); @@ -1842,13 +2028,27 @@ struct Pool npages = 0; poolsize = 0; } - //assert(baseAddr); topAddr = baseAddr + poolsize; - mark.alloc(cast(size_t)poolsize / 16); - scan.alloc(cast(size_t)poolsize / 16); - freebits.alloc(cast(size_t)poolsize / 16); - noscan.alloc(cast(size_t)poolsize / 16); + size_t nbits = cast(size_t)poolsize / 16; + + // if the GC will run in parallel in a fork()ed process, we need to + // share the mark bits + os.Vis vis = os.Vis.PRIV; + if (opts.options.fork) + vis = os.Vis.SHARED; + mark.alloc(nbits, vis); // shared between mark and sweep + freebits.alloc(nbits); // not used by the mark phase + scan.alloc(nbits); // only used in the mark phase + finals.alloc(nbits); // not used by the mark phase + noscan.alloc(nbits); // mark phase *MUST* have a snapshot + + // all is free when we start + freebits.set_all(); + + // avoid accidental sweeping of new pools while using eager allocation + if (collect_in_progress()) + mark.set_all(); pagetable = cast(ubyte*) cstdlib.malloc(npages); if (!pagetable) @@ -1867,7 +2067,7 @@ struct Pool if (npages) { - result = alloc.os_mem_unmap(baseAddr, npages * PAGESIZE); + result = os.dealloc(baseAddr, npages * PAGESIZE); assert(result); npages = 0; } @@ -1879,9 +2079,12 @@ struct Pool if (pagetable) cstdlib.free(pagetable); - mark.Dtor(); - scan.Dtor(); + os.Vis vis = os.Vis.PRIV; + if (opts.options.fork) + vis = os.Vis.SHARED; + mark.Dtor(vis); freebits.Dtor(); + scan.Dtor(); finals.Dtor(); noscan.Dtor(); } @@ -1983,10 +2186,15 @@ struct Pool Bins bin = cast(Bins)this.pagetable[pagenum]; if (bin != B_PAGE) return binsize[bin]; - for (size_t i = pagenum + 1; i < this.npages; i++) + if (this.cached_ptr == p) + return this.cached_size; + size_t i = pagenum + 1; + for (; i < this.npages; i++) if (this.pagetable[i] != B_PAGEPLUS) - return (i - pagenum) * PAGESIZE; - return (this.npages - pagenum) * PAGESIZE; + break; + this.cached_ptr = p; + this.cached_size = (i - pagenum) * PAGESIZE; + return this.cached_size; } @@ -2026,8 +2234,9 @@ void sentinel_init(void *p, size_t size) void sentinel_Invariant(void *p) { - assert(*sentinel_pre(p) == SENTINEL_PRE); - assert(*sentinel_post(p) == SENTINEL_POST); + if (*sentinel_pre(p) != SENTINEL_PRE || + *sentinel_post(p) != SENTINEL_POST) + cstdlib.abort(); }