]> git.llucax.com Git - software/dgc/cdgc.git/blobdiff - rt/gc/cdgc/gc.d
Make bigAlloc() a little bit more readable
[software/dgc/cdgc.git] / rt / gc / cdgc / gc.d
index 1c9905524d5153926f3048045a55308ea027d85c..11944d8dde11ced157cb9404a0cdfd27f0e4aa4d 100644 (file)
@@ -30,11 +30,7 @@ module rt.gc.cdgc.gc;
 
 /************** Debugging ***************************/
 
 
 /************** Debugging ***************************/
 
-//debug = PRINTF;               // turn on printf's
 //debug = COLLECT_PRINTF;       // turn on printf's
 //debug = COLLECT_PRINTF;       // turn on printf's
-//debug = LOGGING;              // log allocations / frees
-//debug = MEMSTOMP;             // stomp on memory
-//debug = SENTINEL;             // add underrun/overrrun protection
 //debug = PTRCHECK;             // more pointer checking
 //debug = PTRCHECK2;            // thorough but slow pointer checking
 
 //debug = PTRCHECK;             // more pointer checking
 //debug = PTRCHECK2;            // thorough but slow pointer checking
 
@@ -46,11 +42,33 @@ version = STACKGROWSDOWN;       // growing the stack means subtracting from the
 
 /***************************************************/
 
 
 /***************************************************/
 
-import rt.gc.cdgc.bits;
-import rt.gc.cdgc.stats;
-import rt.gc.cdgc.alloc;
-import rt.gc.cdgc.libc;
-
+import rt.gc.cdgc.bits: GCBits;
+import rt.gc.cdgc.stats: GCStats, Stats;
+import dynarray = rt.gc.cdgc.dynarray;
+import os = rt.gc.cdgc.os;
+import opts = rt.gc.cdgc.opts;
+
+import cstdlib = tango.stdc.stdlib;
+import cstring = tango.stdc.string;
+import cstdio = tango.stdc.stdio;
+debug(COLLECT_PRINTF) alias cstdio.printf printf;
+
+/*
+ * This is a small optimization that proved it's usefulness. For small chunks
+ * or memory memset() seems to be slower (probably because of the call) that
+ * simply doing a simple loop to set the memory.
+ */
+void memset(void* dst, int c, size_t n)
+{
+    // This number (32) has been determined empirically
+    if (n > 32) {
+        cstring.memset(dst, c, n);
+        return;
+    }
+    auto p = cast(ubyte*)(dst);
+    while (n-- > 0)
+        *p++ = c;
+}
 
 version (GNU)
 {
 
 version (GNU)
 {
@@ -58,10 +76,9 @@ version (GNU)
     //      subdirectory is elsewhere.  Instead, perhaps the functions
     //      could be declared directly or some other resolution could
     //      be found.
     //      subdirectory is elsewhere.  Instead, perhaps the functions
     //      could be declared directly or some other resolution could
     //      be found.
-    import gcc.builtins; // for __builtin_unwind_init
+    static import gcc.builtins; // for __builtin_unwind_int
 }
 
 }
 
-
 struct BlkInfo
 {
     void*  base;
 struct BlkInfo
 {
     void*  base;
@@ -69,2708 +86,1845 @@ struct BlkInfo
     uint   attr;
 }
 
     uint   attr;
 }
 
-private
+package enum BlkAttr : uint
 {
 {
-    enum BlkAttr : uint
-    {
-        FINALIZE = 0b0000_0001,
-        NO_SCAN  = 0b0000_0010,
-        NO_MOVE  = 0b0000_0100,
-        ALL_BITS = 0b1111_1111
-    }
+    FINALIZE = 0b0000_0001,
+    NO_SCAN  = 0b0000_0010,
+    NO_MOVE  = 0b0000_0100,
+    ALL_BITS = 0b1111_1111
+}
 
 
-    extern (C) void* rt_stackBottom();
-    extern (C) void* rt_stackTop();
+package bool has_pointermap(uint attrs)
+{
+    return !opts.options.conservative && !(attrs & BlkAttr.NO_SCAN);
+}
 
 
-    extern (C) void rt_finalize( void* p, bool det = true );
+private size_t round_up(size_t n, size_t to)
+{
+    return (n + to - 1) / to;
+}
 
 
+private
+{
     alias void delegate(Object) DEvent;
     alias void delegate(Object) DEvent;
-    extern (C) void rt_attachDisposeEvent(Object h, DEvent e);
-    extern (C) bool rt_detachDisposeEvent(Object h, DEvent e);
-
-
     alias void delegate( void*, void* ) scanFn;
     alias void delegate( void*, void* ) scanFn;
+    enum { OPFAIL = ~cast(size_t)0 }
 
 
-    extern (C) void rt_scanStaticData( scanFn scan );
-
-    extern (C) bool thread_needLock();
-    extern (C) void thread_suspendAll();
-    extern (C) void thread_resumeAll();
+    extern (C)
+    {
+        version (DigitalMars) version(OSX)
+            oid _d_osx_image_init();
 
 
-    extern (C) void thread_scanAll( scanFn fn, void* curStackTop = null );
+        void* rt_stackBottom();
+        void* rt_stackTop();
+        void rt_finalize( void* p, bool det = true );
+        void rt_attachDisposeEvent(Object h, DEvent e);
+        bool rt_detachDisposeEvent(Object h, DEvent e);
+        void rt_scanStaticData( scanFn scan );
 
 
-    extern (C) void onOutOfMemoryError();
+        void thread_init();
+        bool thread_needLock();
+        void thread_suspendAll();
+        void thread_resumeAll();
+        void thread_scanAll( scanFn fn, void* curStackTop = null );
 
 
-    enum
-    {
-        OPFAIL = ~cast(size_t)0
+        void onOutOfMemoryError();
     }
 }
 
 
     }
 }
 
 
-alias GC gc_t;
+enum
+{
+    PAGESIZE =    4096,
+    POOLSIZE =   (4096*256),
+}
+
+
+enum
+{
+    B_16,
+    B_32,
+    B_64,
+    B_128,
+    B_256,
+    B_512,
+    B_1024,
+    B_2048,
+    B_PAGE,             // start of large alloc
+    B_PAGEPLUS,         // continuation of large alloc
+    B_FREE,             // free page
+    B_MAX
+}
 
 
 
 
-/* ======================= Leak Detector =========================== */
+alias ubyte Bins;
 
 
 
 
-debug (LOGGING)
+struct List
 {
 {
-    struct Log
-    {
-        void*  p;
-        size_t size;
-        size_t line;
-        char*  file;
-        void*  parent;
-
-        void print()
-        {
-            printf("    p = %x, size = %d, parent = %x ", p, size, parent);
-            if (file)
-            {
-                printf("%s(%u)", file, line);
-            }
-            printf("\n");
-        }
-    }
+    List* next;
+    Pool* pool;
+}
 
 
 
 
-    struct LogArray
+struct Range
+{
+    void *pbot;
+    void *ptop;
+    int opCmp(in Range other)
     {
     {
-        size_t dim;
-        size_t allocdim;
-        Log *data;
+        if (pbot < other.pbot)
+            return -1;
+        else
+        return cast(int)(pbot > other.pbot);
+    }
+}
 
 
-        void Dtor()
-        {
-            if (data)
-                .free(data);
-            data = null;
-        }
 
 
-        void reserve(size_t nentries)
-        {
-            assert(dim <= allocdim);
-            if (allocdim - dim < nentries)
-            {
-                allocdim = (dim + nentries) * 2;
-                assert(dim + nentries <= allocdim);
-                if (!data)
-                {
-                    data = cast(Log*) .malloc(allocdim * Log.sizeof);
-                    if (!data && allocdim)
-                        onOutOfMemoryError();
-                }
-                else
-                {   Log *newdata;
-
-                    newdata = cast(Log*) .malloc(allocdim * Log.sizeof);
-                    if (!newdata && allocdim)
-                        onOutOfMemoryError();
-                    .memcpy(newdata, data, dim * Log.sizeof);
-                    .free(data);
-                    data = newdata;
-                }
-            }
-        }
+const uint binsize[B_MAX] = [ 16,32,64,128,256,512,1024,2048,4096 ];
+const uint notbinsize[B_MAX] = [ ~(16u-1),~(32u-1),~(64u-1),~(128u-1),~(256u-1),
+                                ~(512u-1),~(1024u-1),~(2048u-1),~(4096u-1) ];
 
 
 
 
-        void push(Log log)
-        {
-            reserve(1);
-            data[dim++] = log;
-        }
+/* ============================ GC =============================== */
 
 
-        void remove(size_t i)
-        {
-            .memmove(data + i, data + i + 1, (dim - i) * Log.sizeof);
-            dim--;
-        }
 
 
+class GCLock {} // just a dummy so we can get a global lock
 
 
-        size_t find(void *p)
-        {
-            for (size_t i = 0; i < dim; i++)
-            {
-                if (data[i].p == p)
-                    return i;
-            }
-            return OPFAIL; // not found
-        }
 
 
+struct GC
+{
+    // global lock
+    ClassInfo lock;
 
 
-        void copy(LogArray *from)
-        {
-            reserve(from.dim - dim);
-            assert(from.dim <= allocdim);
-            .memcpy(data, from.data, from.dim * Log.sizeof);
-            dim = from.dim;
-        }
-    }
-}
+    void* p_cache;
+    size_t size_cache;
 
 
+    // !=0 means don't scan stack
+    uint no_stack;
+    bool any_changes;
+    void* stack_bottom;
+    uint inited;
+    /// Turn off collections if > 0
+    int disabled;
 
 
-/* ============================ GC =============================== */
+    // PID of the fork()ed process doing the mark() (0 if is not running)
+    int mark_proc_pid;
 
 
+    /// min(pool.baseAddr)
+    byte *min_addr;
+    /// max(pool.topAddr)
+    byte *max_addr;
 
 
-class GCLock { }                // just a dummy so we can get a global lock
+    /// Free list for each size
+    List*[B_MAX] free_list;
 
 
+    dynarray.DynArray!(void*) roots;
+    dynarray.DynArray!(Range) ranges;
+    dynarray.DynArray!(Pool*) pools;
 
 
-const uint GCVERSION = 1;       // increment every time we change interface
-                                // to GC.
+    Stats stats;
+}
 
 
-class GC
+// call locked if necessary
+private T locked(T, alias Code)()
 {
 {
-    // For passing to debug code
-    static size_t line;
-    static char*  file;
+    if (thread_needLock())
+        synchronized (gc.lock) return Code();
+    else
+       return Code();
+}
 
 
-    uint gcversion = GCVERSION;
+private GC* gc;
 
 
-    Gcx *gcx;                   // implementation
-    static ClassInfo gcLock;    // global lock
+bool Invariant()
+{
+    assert (gc !is null);
+    if (gc.inited) {
+        for (size_t i = 0; i < gc.pools.length; i++) {
+            Pool* pool = gc.pools[i];
+            pool.Invariant();
+            if (i == 0)
+                assert(gc.min_addr == pool.baseAddr);
+            if (i + 1 < gc.pools.length)
+                assert(*pool < *gc.pools[i + 1]);
+            else if (i + 1 == gc.pools.length)
+                assert(gc.max_addr == pool.topAddr);
+        }
+
+        gc.roots.Invariant();
+        gc.ranges.Invariant();
+
+        for (size_t i = 0; i < gc.ranges.length; i++) {
+            assert(gc.ranges[i].pbot);
+            assert(gc.ranges[i].ptop);
+            assert(gc.ranges[i].pbot <= gc.ranges[i].ptop);
+        }
+
+        for (size_t i = 0; i < B_PAGE; i++) {
+            for (List *list = gc.free_list[i]; list; list = list.next) {
+                auto pool = list.pool;
+                assert (pool !is null);
+                auto p = cast(byte*) list;
+                assert (p >= pool.baseAddr);
+                assert (p < pool.topAddr);
+                assert (pool.freebits.test((p - pool.baseAddr) / 16));
+            }
+        }
+    }
+    return true;
+}
 
 
 
 
-    void initialize()
-    {
-        gcLock = GCLock.classinfo;
-        gcx = cast(Gcx*) .calloc(1, Gcx.sizeof);
-        if (!gcx)
-            onOutOfMemoryError();
-        gcx.initialize();
-        setStackBottom(rt_stackBottom());
+/**
+ * Find Pool that pointer is in.
+ * Return null if not in a Pool.
+ * Assume pools is sorted.
+ */
+Pool* findPool(void* p)
+{
+    if (p < gc.min_addr || p >= gc.max_addr)
+        return null;
+    if (gc.pools.length == 0)
+        return null;
+    if (gc.pools.length == 1)
+        return gc.pools[0];
+    /// The pooltable[] is sorted by address, so do a binary search
+    size_t low = 0;
+    size_t high = gc.pools.length - 1;
+    while (low <= high) {
+        size_t mid = (low + high) / 2;
+        auto pool = gc.pools[mid];
+        if (p < pool.baseAddr)
+            high = mid - 1;
+        else if (p >= pool.topAddr)
+            low = mid + 1;
+        else
+            return pool;
     }
     }
+    // Not found
+    return null;
+}
 
 
 
 
-    void Dtor()
-    {
-        if (gcx)
-        {
-            gcx.Dtor();
-            .free(gcx);
-            gcx = null;
-        }
-    }
+/**
+ * Determine the base address of the block containing p.  If p is not a gc
+ * allocated pointer, return null.
+ */
+BlkInfo getInfo(void* p)
+{
+    assert (p !is null);
+    Pool* pool = findPool(p);
+    if (pool is null)
+        return BlkInfo.init;
+    BlkInfo info;
+    info.base = pool.findBase(p);
+    if (info.base is null)
+        return BlkInfo.init;
+    info.size = pool.findSize(info.base);
+    size_t bit_i = (info.base - pool.baseAddr) / 16;
+    info.attr = getAttr(pool, bit_i);
+    if (has_pointermap(info.attr)) {
+        info.size -= size_t.sizeof; // PointerMap bitmask
+        // Points to the PointerMap bitmask pointer, not user data
+        if (p >= (info.base + info.size)) {
+            return BlkInfo.init;
+        }
+    }
+    if (opts.options.sentinel) {
+        info.base = sentinel_add(info.base);
+        // points to sentinel data, not user data
+        if (p < info.base || p >= sentinel_post(info.base))
+            return BlkInfo.init;
+        info.size -= SENTINEL_EXTRA;
+    }
+    return info;
+}
 
 
 
 
-    /**
-     *
-     */
-    void enable()
+/**
+ * Compute bin for size.
+ */
+Bins findBin(size_t size)
+{
+    Bins bin;
+    if (size <= 256)
     {
     {
-        if (!thread_needLock())
+        if (size <= 64)
         {
         {
-            assert(gcx.disabled > 0);
-            gcx.disabled--;
+            if (size <= 16)
+                bin = B_16;
+            else if (size <= 32)
+                bin = B_32;
+            else
+                bin = B_64;
         }
         }
-        else synchronized (gcLock)
+        else
         {
         {
-            assert(gcx.disabled > 0);
-            gcx.disabled--;
+            if (size <= 128)
+                bin = B_128;
+            else
+                bin = B_256;
         }
     }
         }
     }
-
-
-    /**
-     *
-     */
-    void disable()
+    else
     {
     {
-        if (!thread_needLock())
+        if (size <= 1024)
         {
         {
-            gcx.disabled++;
+            if (size <= 512)
+                bin = B_512;
+            else
+                bin = B_1024;
         }
         }
-        else synchronized (gcLock)
+        else
         {
         {
-            gcx.disabled++;
+            if (size <= 2048)
+                bin = B_2048;
+            else
+                bin = B_PAGE;
         }
     }
         }
     }
+    return bin;
+}
 
 
 
 
-    /**
-     *
-     */
-    uint getAttr(void* p)
-    {
-        if (!p)
-        {
-            return 0;
-        }
+/**
+ * Allocate a new pool of at least size bytes.
+ * Sort it into pools.
+ * Mark all memory in the pool as B_FREE.
+ * Return the actual number of bytes reserved or 0 on error.
+ */
+size_t reserve(size_t size)
+{
+    assert(size != 0);
+    size_t npages = round_up(size, PAGESIZE);
+    Pool*  pool = newPool(npages);
 
 
-        uint go()
-        {
-            Pool* pool = gcx.findPool(p);
-            uint  oldb = 0;
+    if (!pool)
+        return 0;
+    return pool.npages * PAGESIZE;
+}
 
 
-            if (pool)
-            {
-                auto biti = cast(size_t)(p - pool.baseAddr) / 16;
 
 
-                oldb = gcx.getBits(pool, biti);
-            }
-            return oldb;
-        }
+/**
+ * Minimizes physical memory usage by returning free pools to the OS.
+ */
+void minimize()
+{
+    // Disabled if a parallel collection is in progress because the shared mark
+    // bits of the freed pool might be used by the mark process
+    if (gc.mark_proc_pid != 0)
+        return;
 
 
-        if (!thread_needLock())
-        {
-            return go();
-        }
-        else synchronized (gcLock)
+    size_t n;
+    size_t pn;
+    Pool* pool;
+
+    for (n = 0; n < gc.pools.length; n++)
+    {
+        pool = gc.pools[n];
+        for (pn = 0; pn < pool.npages; pn++)
         {
         {
-            return go();
+            if (cast(Bins)pool.pagetable[pn] != B_FREE)
+                break;
         }
         }
+        if (pn < pool.npages)
+            continue;
+        pool.Dtor();
+        cstdlib.free(pool);
+        gc.pools.remove_at(n);
+        n--;
     }
     }
+    gc.min_addr = gc.pools[0].baseAddr;
+    gc.max_addr = gc.pools[gc.pools.length - 1].topAddr;
+}
 
 
 
 
-    /**
-     *
-     */
-    uint setAttr(void* p, uint mask)
+/**
+ * Allocate a chunk of memory that is larger than a page.
+ * Return null if out of memory.
+ */
+void* bigAlloc(size_t npages, out Pool* pool, size_t* pn)
+{
+    // This code could use some refinement when repeatedly
+    // allocating very large arrays.
+
+    void* find_block()
     {
     {
-        if (!p)
+        for (size_t n = 0; n < gc.pools.length; n++)
         {
         {
-            return 0;
+            pool = gc.pools[n];
+            *pn = pool.allocPages(npages);
+            if (*pn != OPFAIL)
+                return pool.baseAddr + *pn * PAGESIZE;
         }
         }
+        return null;
+    }
 
 
-        uint go()
-        {
-            Pool* pool = gcx.findPool(p);
-            uint  oldb = 0;
+    void* alloc_more()
+    {
+        // Release empty pools to prevent bloat
+        minimize();
+        // Allocate new pool
+        pool = newPool(npages);
+        if (!pool)
+            return null; // let malloc handle the error
+        *pn = pool.allocPages(npages);
+        assert(*pn != OPFAIL);
+        return pool.baseAddr + *pn * PAGESIZE;
+    }
 
 
-            if (pool)
-            {
-                auto biti = cast(size_t)(p - pool.baseAddr) / 16;
+    if (void* p = find_block())
+        return p;
 
 
-                oldb = gcx.getBits(pool, biti);
-                gcx.setBits(pool, biti, mask);
-            }
-            return oldb;
-        }
+    if (gc.disabled)
+        return alloc_more();
 
 
-        if (!thread_needLock())
-        {
-            return go();
-        }
-        else synchronized (gcLock)
-        {
-            return go();
-        }
+    // Try collecting
+    size_t freedpages = fullcollectshell();
+    if (freedpages >= gc.pools.length * ((POOLSIZE / PAGESIZE) / 4)) {
+        if (void* p = find_block())
+            return p;
     }
 
     }
 
+    return alloc_more();
+}
 
 
-    /**
-     *
-     */
-    uint clrAttr(void* p, uint mask)
-    {
-        if (!p)
-        {
-            return 0;
-        }
 
 
-        uint go()
-        {
-            Pool* pool = gcx.findPool(p);
-            uint  oldb = 0;
+/**
+ * Allocate a new pool with at least npages in it.
+ * Sort it into pools.
+ * Return null if failed.
+ */
+Pool *newPool(size_t npages)
+{
+    // Minimum of POOLSIZE
+    if (npages < POOLSIZE/PAGESIZE)
+        npages = POOLSIZE/PAGESIZE;
+    else if (npages > POOLSIZE/PAGESIZE)
+    {
+        // Give us 150% of requested size, so there's room to extend
+        auto n = npages + (npages >> 1);
+        if (n < size_t.max/PAGESIZE)
+            npages = n;
+    }
 
 
-            if (pool)
-            {
-                auto biti = cast(size_t)(p - pool.baseAddr) / 16;
+    // Allocate successively larger pools up to 8 megs
+    if (gc.pools.length)
+    {
+        size_t n = gc.pools.length;
+        if (n > 8)
+            n = 8;                  // cap pool size at 8 megs
+        n *= (POOLSIZE / PAGESIZE);
+        if (npages < n)
+            npages = n;
+    }
 
 
-                oldb = gcx.getBits(pool, biti);
-                gcx.clrBits(pool, biti, mask);
-            }
-            return oldb;
-        }
+    auto pool = cast(Pool*) cstdlib.calloc(1, Pool.sizeof);
+    if (pool is null)
+        return null;
+    pool.initialize(npages);
+    if (!pool.baseAddr)
+    {
+        pool.Dtor();
+        return null;
+    }
 
 
-        if (!thread_needLock())
-        {
-            return go();
-        }
-        else synchronized (gcLock)
-        {
-            return go();
-        }
+    auto inserted_pool = *gc.pools.insert_sorted!("*a < *b")(pool);
+    if (inserted_pool is null) {
+        pool.Dtor();
+        return null;
     }
     }
+    assert (inserted_pool is pool);
+    gc.min_addr = gc.pools[0].baseAddr;
+    gc.max_addr = gc.pools[gc.pools.length - 1].topAddr;
+    return pool;
+}
 
 
 
 
-    /**
-     *
-     */
-    void *malloc(size_t size, uint bits = 0)
-    {
-        if (!size)
-        {
-            return null;
-        }
+/**
+ * Allocate a page of bin's.
+ * Returns:
+ *  0       failed
+ */
+int allocPage(Bins bin)
+{
+    Pool*  pool;
+    size_t pn;
 
 
-        if (!thread_needLock())
-        {
-            return mallocNoSync(size, bits);
-        }
-        else synchronized (gcLock)
-        {
-            return mallocNoSync(size, bits);
-        }
+    for (size_t n = 0; n < gc.pools.length; n++)
+    {
+        pool = gc.pools[n];
+        pn = pool.allocPages(1);
+        if (pn != OPFAIL)
+            goto L1;
     }
     }
+    return 0;               // failed
+
+  L1:
+    pool.pagetable[pn] = cast(ubyte)bin;
 
 
+    // Convert page to free list
+    size_t size = binsize[bin];
+    auto list_head = &gc.free_list[bin];
 
 
-    //
-    //
-    //
-    private void *mallocNoSync(size_t size, uint bits = 0)
+    byte* p = pool.baseAddr + pn * PAGESIZE;
+    byte*  ptop = p + PAGESIZE;
+    size_t bit_i = pn * (PAGESIZE / 16);
+    pool.freebits.set_group(bit_i, PAGESIZE / 16);
+    for (; p < ptop; p += size)
     {
     {
-        assert(size != 0);
+        List* l = cast(List *) p;
+        l.next = *list_head;
+        l.pool = pool;
+        *list_head = l;
+    }
+    return 1;
+}
 
 
-        void *p = null;
-        Bins bin;
 
 
-        //debug(PRINTF) printf("GC::malloc(size = %d, gcx = %p)\n", size, gcx);
-        assert(gcx);
+/**
+ * Search a range of memory values and mark any pointers into the GC pool using
+ * type information (bitmask of pointer locations).
+ */
+void mark_range(void *pbot, void *ptop, size_t* pm_bitmask)
+{
+    // TODO: make our own assert because assert uses the GC
+    assert (pbot <= ptop);
+
+    const BITS_PER_WORD = size_t.sizeof * 8;
+
+    void **p1 = cast(void **)pbot;
+    void **p2 = cast(void **)ptop;
+    size_t pcache = 0;
+    bool changes = false;
+
+    size_t type_size = pm_bitmask[0];
+    size_t* pm_bits = pm_bitmask + 1;
+    bool has_type_info = type_size != 1 || pm_bits[0] != 1 || pm_bits[1] != 0;
+
+    //printf("marking range: %p -> %p\n", pbot, ptop);
+    for (; p1 + type_size <= p2; p1 += type_size) {
+        for (size_t n = 0; n < type_size; n++) {
+            // scan bit set for this word
+            if (has_type_info &&
+                    !(pm_bits[n / BITS_PER_WORD] & (1 << (n % BITS_PER_WORD))))
+                continue;
 
 
-        size += SENTINEL_EXTRA;
+            void* p = *(p1 + n);
 
 
-        // Compute size bin
-        // Cache previous binsize lookup - Dave Fladebo.
-        static size_t lastsize = -1;
-        static Bins lastbin;
-        if (size == lastsize)
-            bin = lastbin;
-        else
-        {
-            bin = gcx.findBin(size);
-            lastsize = size;
-            lastbin = bin;
-        }
+            if (p < gc.min_addr || p >= gc.max_addr)
+                continue;
 
 
-        if (bin < B_PAGE)
-        {
-            p = gcx.bucket[bin];
-            if (p is null)
+            if ((cast(size_t)p & ~(PAGESIZE-1)) == pcache)
+                continue;
+
+            Pool* pool = findPool(p);
+            if (pool)
             {
             {
-                if (!gcx.allocPage(bin) && !gcx.disabled)   // try to find a new page
+                size_t offset = cast(size_t)(p - pool.baseAddr);
+                size_t bit_i = void;
+                size_t pn = offset / PAGESIZE;
+                Bins   bin = cast(Bins)pool.pagetable[pn];
+
+                // Cache B_PAGE, B_PAGEPLUS and B_FREE lookups
+                if (bin >= B_PAGE)
+                    pcache = cast(size_t)p & ~(PAGESIZE-1);
+
+                // Adjust bit to be at start of allocated memory block
+                if (bin <= B_PAGE)
+                    bit_i = (offset & notbinsize[bin]) / 16;
+                else if (bin == B_PAGEPLUS)
                 {
                 {
-                    if (!thread_needLock())
+                    do
                     {
                     {
-                        /* Then we haven't locked it yet. Be sure
-                         * and lock for a collection, since a finalizer
-                         * may start a new thread.
-                         */
-                        synchronized (gcLock)
-                        {
-                            gcx.fullcollectshell();
-                        }
-                    }
-                    else if (!gcx.fullcollectshell())       // collect to find a new page
-                    {
-                        //gcx.newPool(1);
+                        --pn;
                     }
                     }
+                    while (cast(Bins)pool.pagetable[pn] == B_PAGEPLUS);
+                    bit_i = pn * (PAGESIZE / 16);
                 }
                 }
-                if (!gcx.bucket[bin] && !gcx.allocPage(bin))
-                {   int result;
+                else // Don't mark bits in B_FREE pages
+                    continue;
 
 
-                    gcx.newPool(1);         // allocate new pool to find a new page
-                    result = gcx.allocPage(bin);
-                    if (!result)
-                        onOutOfMemoryError();
+                if (!pool.mark.test(bit_i))
+                {
+                    pool.mark.set(bit_i);
+                    if (!pool.noscan.test(bit_i))
+                    {
+                        pool.scan.set(bit_i);
+                        changes = true;
+                    }
                 }
                 }
-                p = gcx.bucket[bin];
             }
             }
-
-            // Return next item from free list
-            gcx.bucket[bin] = (cast(List*)p).next;
-            if( !(bits & BlkAttr.NO_SCAN) )
-                .memset(p + size, 0, binsize[bin] - size);
-            //debug(PRINTF) printf("\tmalloc => %x\n", p);
-            debug (MEMSTOMP) .memset(p, 0xF0, size);
-        }
-        else
-        {
-            p = gcx.bigAlloc(size);
-            if (!p)
-                onOutOfMemoryError();
-        }
-        size -= SENTINEL_EXTRA;
-        p = sentinel_add(p);
-        sentinel_init(p, size);
-        gcx.log_malloc(p, size);
-
-        if (bits)
-        {
-            Pool *pool = gcx.findPool(p);
-            assert(pool);
-
-            gcx.setBits(pool, cast(size_t)(p - pool.baseAddr) / 16, bits);
-        }
-        return p;
-    }
-
-
-    /**
-     *
-     */
-    void *calloc(size_t size, uint bits = 0)
-    {
-        if (!size)
-        {
-            return null;
-        }
-
-        if (!thread_needLock())
-        {
-            return callocNoSync(size, bits);
-        }
-        else synchronized (gcLock)
-        {
-            return callocNoSync(size, bits);
         }
     }
         }
     }
+    if (changes)
+        gc.any_changes = true;
+}
 
 
+/**
+ * Return number of full pages free'd.
+ */
+size_t fullcollectshell()
+{
+    gc.stats.collection_started();
+    scope (exit)
+        gc.stats.collection_finished();
 
 
-    //
-    //
-    //
-    private void *callocNoSync(size_t size, uint bits = 0)
+    // The purpose of the 'shell' is to ensure all the registers
+    // get put on the stack so they'll be scanned
+    void *sp;
+    size_t result;
+    version (GNU)
     {
     {
-        assert(size != 0);
-
-        //debug(PRINTF) printf("calloc: %x len %d\n", p, len);
-        void *p = mallocNoSync(size, bits);
-        .memset(p, 0, size);
-        return p;
+        gcc.builtins.__builtin_unwind_init();
+        sp = & sp;
     }
     }
-
-
-    /**
-     *
-     */
-    void *realloc(void *p, size_t size, uint bits = 0)
+    else version(LDC)
     {
     {
-        if (!thread_needLock())
+        version(X86)
         {
         {
-            return reallocNoSync(p, size, bits);
-        }
-        else synchronized (gcLock)
-        {
-            return reallocNoSync(p, size, bits);
-        }
-    }
-
-
-    //
-    //
-    //
-    private void *reallocNoSync(void *p, size_t size, uint bits = 0)
-    {
-        if (!size)
-        {   if (p)
-            {   freeNoSync(p);
-                p = null;
+            uint eax,ecx,edx,ebx,ebp,esi,edi;
+            asm
+            {
+                mov eax[EBP], EAX      ;
+                mov ecx[EBP], ECX      ;
+                mov edx[EBP], EDX      ;
+                mov ebx[EBP], EBX      ;
+                mov ebp[EBP], EBP      ;
+                mov esi[EBP], ESI      ;
+                mov edi[EBP], EDI      ;
+                mov  sp[EBP], ESP      ;
             }
         }
             }
         }
-        else if (!p)
+        else version (X86_64)
         {
         {
-            p = mallocNoSync(size, bits);
-        }
-        else
-        {   void *p2;
-            size_t psize;
-
-            //debug(PRINTF) printf("GC::realloc(p = %x, size = %u)\n", p, size);
-            version (SENTINEL)
-            {
-                sentinel_Invariant(p);
-                psize = *sentinel_size(p);
-                if (psize != size)
-                {
-                    if (psize)
-                    {
-                        Pool *pool = gcx.findPool(p);
-
-                        if (pool)
-                        {
-                            auto biti = cast(size_t)(p - pool.baseAddr) / 16;
-
-                            if (bits)
-                            {
-                                gcx.clrBits(pool, biti, BlkAttr.ALL_BITS);
-                                gcx.setBits(pool, biti, bits);
-                            }
-                            else
-                            {
-                                bits = gcx.getBits(pool, biti);
-                            }
-                        }
-                    }
-                    p2 = mallocNoSync(size, bits);
-                    if (psize < size)
-                        size = psize;
-                    //debug(PRINTF) printf("\tcopying %d bytes\n",size);
-                    .memcpy(p2, p, size);
-                    p = p2;
-                }
-            }
-            else
+            ulong rax,rbx,rcx,rdx,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15;
+            asm
             {
             {
-                psize = gcx.findSize(p);        // find allocated size
-                if (psize >= PAGESIZE && size >= PAGESIZE)
-                {
-                    auto psz = psize / PAGESIZE;
-                    auto newsz = (size + PAGESIZE - 1) / PAGESIZE;
-                    if (newsz == psz)
-                        return p;
-
-                    auto pool = gcx.findPool(p);
-                    auto pagenum = (p - pool.baseAddr) / PAGESIZE;
-
-                    if (newsz < psz)
-                    {   // Shrink in place
-                        synchronized (gcLock)
-                        {
-                            debug (MEMSTOMP) .memset(p + size, 0xF2, psize - size);
-                            pool.freePages(pagenum + newsz, psz - newsz);
-                        }
-                        return p;
-                    }
-                    else if (pagenum + newsz <= pool.npages)
-                    {
-                        // Attempt to expand in place
-                        synchronized (gcLock)
-                        {
-                            for (size_t i = pagenum + psz; 1;)
-                            {
-                                if (i == pagenum + newsz)
-                                {
-                                    debug (MEMSTOMP) .memset(p + psize, 0xF0, size - psize);
-                                    .memset(&pool.pagetable[pagenum + psz], B_PAGEPLUS, newsz - psz);
-                                    return p;
-                                }
-                                if (i == pool.npages)
-                                {
-                                    break;
-                                }
-                                if (pool.pagetable[i] != B_FREE)
-                                    break;
-                                i++;
-                            }
-                        }
-                    }
-                }
-                if (psize < size ||             // if new size is bigger
-                    psize > size * 2)           // or less than half
-                {
-                    if (psize)
-                    {
-                        Pool *pool = gcx.findPool(p);
-
-                        if (pool)
-                        {
-                            auto biti = cast(size_t)(p - pool.baseAddr) / 16;
-
-                            if (bits)
-                            {
-                                gcx.clrBits(pool, biti, BlkAttr.ALL_BITS);
-                                gcx.setBits(pool, biti, bits);
-                            }
-                            else
-                            {
-                                bits = gcx.getBits(pool, biti);
-                            }
-                        }
-                    }
-                    p2 = mallocNoSync(size, bits);
-                    if (psize < size)
-                        size = psize;
-                    //debug(PRINTF) printf("\tcopying %d bytes\n",size);
-                    .memcpy(p2, p, size);
-                    p = p2;
-                }
+                movq rax[RBP], RAX      ;
+                movq rbx[RBP], RBX      ;
+                movq rcx[RBP], RCX      ;
+                movq rdx[RBP], RDX      ;
+                movq rbp[RBP], RBP      ;
+                movq rsi[RBP], RSI      ;
+                movq rdi[RBP], RDI      ;
+                movq r8 [RBP], R8       ;
+                movq r9 [RBP], R9       ;
+                movq r10[RBP], R10      ;
+                movq r11[RBP], R11      ;
+                movq r12[RBP], R12      ;
+                movq r13[RBP], R13      ;
+                movq r14[RBP], R14      ;
+                movq r15[RBP], R15      ;
+                movq  sp[RBP], RSP      ;
             }
         }
             }
         }
-        return p;
-    }
-
-
-    /**
-     * Attempt to in-place enlarge the memory block pointed to by p by at least
-     * minbytes beyond its current capacity, up to a maximum of maxsize.  This
-     * does not attempt to move the memory block (like realloc() does).
-     *
-     * Returns:
-     *  0 if could not extend p,
-     *  total size of entire memory block if successful.
-     */
-    size_t extend(void* p, size_t minsize, size_t maxsize)
-    {
-        if (!thread_needLock())
-        {
-            return extendNoSync(p, minsize, maxsize);
-        }
-        else synchronized (gcLock)
+        else
         {
         {
-            return extendNoSync(p, minsize, maxsize);
+            static assert( false, "Architecture not supported." );
         }
     }
         }
     }
-
-
-    //
-    //
-    //
-    private size_t extendNoSync(void* p, size_t minsize, size_t maxsize)
-    in
+    else
     {
     {
-        assert( minsize <= maxsize );
-    }
-    body
+    asm
     {
     {
-        //debug(PRINTF) printf("GC::extend(p = %x, minsize = %u, maxsize = %u)\n", p, minsize, maxsize);
-        version (SENTINEL)
-        {
-            return 0;
-        }
-        auto psize = gcx.findSize(p);   // find allocated size
-        if (psize < PAGESIZE)
-            return 0;                   // cannot extend buckets
-
-        auto psz = psize / PAGESIZE;
-        auto minsz = (minsize + PAGESIZE - 1) / PAGESIZE;
-        auto maxsz = (maxsize + PAGESIZE - 1) / PAGESIZE;
-
-        auto pool = gcx.findPool(p);
-        auto pagenum = (p - pool.baseAddr) / PAGESIZE;
-
-        size_t sz;
-        for (sz = 0; sz < maxsz; sz++)
-        {
-            auto i = pagenum + psz + sz;
-            if (i == pool.npages)
-                break;
-            if (pool.pagetable[i] != B_FREE)
-            {   if (sz < minsz)
-                    return 0;
-                break;
-            }
-        }
-        if (sz < minsz)
-            return 0;
-        debug (MEMSTOMP) .memset(p + psize, 0xF0, (psz + sz) * PAGESIZE - psize);
-        .memset(pool.pagetable + pagenum + psz, B_PAGEPLUS, sz);
-        gcx.p_cache = null;
-        gcx.size_cache = 0;
-        return (psz + sz) * PAGESIZE;
+        pushad              ;
+        mov sp[EBP],ESP     ;
     }
     }
-
-
-    /**
-     *
-     */
-    size_t reserve(size_t size)
-    {
-        if (!size)
-        {
-            return 0;
-        }
-
-        if (!thread_needLock())
-        {
-            return reserveNoSync(size);
-        }
-        else synchronized (gcLock)
-        {
-            return reserveNoSync(size);
-        }
     }
     }
-
-
-    //
-    //
-    //
-    private size_t reserveNoSync(size_t size)
+    result = fullcollect(sp);
+    version (GNU)
     {
     {
-        assert(size != 0);
-        assert(gcx);
-
-        return gcx.reserve(size);
+        // nothing to do
     }
     }
-
-
-    /**
-     *
-     */
-    void free(void *p)
+    else version(LDC)
     {
     {
-        if (!p)
-        {
-            return;
-        }
-
-        if (!thread_needLock())
-        {
-            return freeNoSync(p);
-        }
-        else synchronized (gcLock)
-        {
-            return freeNoSync(p);
-        }
+        // nothing to do
     }
     }
-
-
-    //
-    //
-    //
-    private void freeNoSync(void *p)
+    else
     {
     {
-        assert (p);
-
-        Pool*  pool;
-        size_t pagenum;
-        Bins   bin;
-        size_t biti;
-
-        // Find which page it is in
-        pool = gcx.findPool(p);
-        if (!pool)                              // if not one of ours
-            return;                             // ignore
-        sentinel_Invariant(p);
-        p = sentinel_sub(p);
-        pagenum = cast(size_t)(p - pool.baseAddr) / PAGESIZE;
-        biti = cast(size_t)(p - pool.baseAddr) / 16;
-        gcx.clrBits(pool, biti, BlkAttr.ALL_BITS);
-
-        bin = cast(Bins)pool.pagetable[pagenum];
-        if (bin == B_PAGE)              // if large alloc
-        {   size_t npages;
-            size_t n;
-
-            // Free pages
-            npages = 1;
-            n = pagenum;
-            while (++n < pool.npages && pool.pagetable[n] == B_PAGEPLUS)
-                npages++;
-            debug (MEMSTOMP) .memset(p, 0xF2, npages * PAGESIZE);
-            pool.freePages(pagenum, npages);
-        }
-        else
-        {   // Add to free list
-            List *list = cast(List*)p;
-
-            debug (MEMSTOMP) .memset(p, 0xF2, binsize[bin]);
-
-            list.next = gcx.bucket[bin];
-            gcx.bucket[bin] = list;
-        }
-        gcx.log_free(sentinel_add(p));
-    }
-
-
-    /**
-     * Determine the base address of the block containing p.  If p is not a gc
-     * allocated pointer, return null.
-     */
-    void* addrOf(void *p)
+    asm
     {
     {
-        if (!p)
-        {
-            return null;
-        }
-
-        if (!thread_needLock())
-        {
-            return addrOfNoSync(p);
-        }
-        else synchronized (gcLock)
-        {
-            return addrOfNoSync(p);
-        }
+        popad               ;
     }
     }
+    }
+    return result;
+}
 
 
 
 
-    //
-    //
-    //
-    void* addrOfNoSync(void *p)
-    {
-        if (!p)
-        {
-            return null;
+/**
+ *
+ */
+size_t fullcollect(void *stackTop)
+{
+    debug(COLLECT_PRINTF) printf("Gcx.fullcollect()\n");
+
+    // If eager allocation is used, we need to check first if there is a mark
+    // process running. If there isn't, we start a new one (see the next code
+    // block). If there is, we check if it's still running or already finished.
+    // If it's still running, we tell the caller process no memory has been
+    // recovered (it will allocated more to fulfill the current request).  If
+    // the mark process is done, we lunch the sweep phase and hope enough
+    // memory is freed (if that not the case, the caller will allocate more
+    // memory and the next time it's exhausted it will run a new collection).
+    if (opts.options.eager_alloc) {
+        if (gc.mark_proc_pid != 0) { // there is a mark process in progress
+            os.WRes r = os.wait_pid(gc.mark_proc_pid, false); // don't block
+            assert (r != os.WRes.ERROR);
+            switch (r) {
+            case os.WRes.DONE:
+                debug(COLLECT_PRINTF) printf("\t\tmark proc DONE\n");
+                gc.mark_proc_pid = 0;
+                return sweep();
+            case os.WRes.RUNNING:
+                debug(COLLECT_PRINTF) printf("\t\tmark proc RUNNING\n");
+                return 0;
+            case os.WRes.ERROR:
+                debug(COLLECT_PRINTF) printf("\t\tmark proc ERROR\n");
+                disable_fork(); // Try to keep going without forking
+                break;
+            }
         }
         }
-
-        return gcx.findBase(p);
     }
 
     }
 
-
-    /**
-     * Determine the allocated size of pointer p.  If p is an interior pointer
-     * or not a gc allocated pointer, return 0.
-     */
-    size_t sizeOf(void *p)
-    {
-        if (!p)
-        {
-            return 0;
+    // We always need to stop the world to make threads save the CPU registers
+    // in the stack and prepare themselves for thread_scanAll()
+    thread_suspendAll();
+    gc.stats.world_stopped();
+
+    // If forking is enabled, we fork() and start a new mark phase in the
+    // child. The parent process will tell the caller that no memory could be
+    // recycled if eager allocation is used, allowing the mutator to keep going
+    // almost instantly (at the expense of more memory consumption because
+    // a new allocation will be triggered to fulfill the current request). If
+    // no eager allocation is used, the parent will wait for the mark phase to
+    // finish before returning control to the mutator, but other threads are
+    // restarted and may run in parallel with the mark phase (unless they
+    // allocate or use the GC themselves, in which case the global GC lock will
+    // stop them).
+    if (opts.options.fork) {
+        cstdio.fflush(null); // avoid duplicated FILE* output
+        os.pid_t child_pid = os.fork();
+        assert (child_pid != -1); // don't accept errors in non-release mode
+        switch (child_pid) {
+        case -1: // if fork() fails, fall-back to stop-the-world
+            disable_fork();
+            break;
+        case 0: // child process (i.e. the collectors mark phase)
+            mark(stackTop);
+            cstdlib.exit(0);
+            break; // bogus, will never reach here
+        default: // parent process (i.e. the mutator)
+            // start the world again and wait for the mark phase to finish
+            thread_resumeAll();
+            gc.stats.world_started();
+            if (opts.options.eager_alloc) {
+                gc.mark_proc_pid = child_pid;
+                return 0;
+            }
+            os.WRes r = os.wait_pid(child_pid); // block until it finishes
+            assert (r == os.WRes.DONE);
+            debug(COLLECT_PRINTF) printf("\t\tmark proc DONE (block)\n");
+            if (r == os.WRes.DONE)
+                return sweep();
+            debug(COLLECT_PRINTF) printf("\tmark() proc ERROR\n");
+            // If there was some error, try to keep going without forking
+            disable_fork();
+            // Re-suspend the threads to do the marking in this process
+            thread_suspendAll();
+            gc.stats.world_stopped();
         }
 
         }
 
-        if (!thread_needLock())
-        {
-            return sizeOfNoSync(p);
-        }
-        else synchronized (gcLock)
-        {
-            return sizeOfNoSync(p);
-        }
     }
 
     }
 
+    // If we reach here, we are using the standard stop-the-world collection,
+    // either because fork was disabled in the first place, or because it was
+    // disabled because of some error.
+    mark(stackTop);
+    thread_resumeAll();
+    gc.stats.world_started();
 
 
-    //
-    //
-    //
-    private size_t sizeOfNoSync(void *p)
-    {
-        assert (p);
+    return sweep();
+}
 
 
-        version (SENTINEL)
-        {
-            p = sentinel_sub(p);
-            size_t size = gcx.findSize(p);
-
-            // Check for interior pointer
-            // This depends on:
-            // 1) size is a power of 2 for less than PAGESIZE values
-            // 2) base of memory pool is aligned on PAGESIZE boundary
-            if (cast(size_t)p & (size - 1) & (PAGESIZE - 1))
-                size = 0;
-            return size ? size - SENTINEL_EXTRA : 0;
-        }
-        else
-        {
-            if (p == gcx.p_cache)
-                return gcx.size_cache;
 
 
-            size_t size = gcx.findSize(p);
+/**
+ *
+ */
+void mark(void *stackTop)
+{
+    debug(COLLECT_PRINTF) printf("\tmark()\n");
 
 
-            // Check for interior pointer
-            // This depends on:
-            // 1) size is a power of 2 for less than PAGESIZE values
-            // 2) base of memory pool is aligned on PAGESIZE boundary
-            if (cast(size_t)p & (size - 1) & (PAGESIZE - 1))
-                size = 0;
-            else
-            {
-                gcx.p_cache = p;
-                gcx.size_cache = size;
-            }
+    gc.any_changes = false;
 
 
-            return size;
-        }
+    for (size_t n = 0; n < gc.pools.length; n++)
+    {
+        Pool* pool = gc.pools[n];
+        pool.mark.copy(&pool.freebits);
+        pool.scan.zero();
     }
 
     }
 
-
-    /**
-     * Determine the base address of the block containing p.  If p is not a gc
-     * allocated pointer, return null.
-     */
-    BlkInfo query(void *p)
+    /// Marks a range of memory in conservative mode.
+    void mark_conservative_range(void* pbot, void* ptop)
     {
     {
-        if (!p)
-        {
-            BlkInfo i;
-            return  i;
-        }
-
-        if (!thread_needLock())
-        {
-            return queryNoSync(p);
-        }
-        else synchronized (gcLock)
-        {
-            return queryNoSync(p);
-        }
+        mark_range(pbot, ptop, PointerMap.init.bits.ptr);
     }
 
     }
 
+    rt_scanStaticData(&mark_conservative_range);
 
 
-    //
-    //
-    //
-    BlkInfo queryNoSync(void *p)
+    if (!gc.no_stack)
     {
     {
-        assert(p);
-
-        return gcx.getInfo(p);
+        // Scan stacks and registers for each paused thread
+        thread_scanAll(&mark_conservative_range, stackTop);
     }
 
     }
 
+    // Scan roots
+    debug(COLLECT_PRINTF) printf("scan roots[]\n");
+    mark_conservative_range(gc.roots.ptr, gc.roots.ptr + gc.roots.length);
 
 
-    /**
-     * Verify that pointer p:
-     *  1) belongs to this memory pool
-     *  2) points to the start of an allocated piece of memory
-     *  3) is not on a free list
-     */
-    void check(void *p)
+    // Scan ranges
+    debug(COLLECT_PRINTF) printf("scan ranges[]\n");
+    for (size_t n = 0; n < gc.ranges.length; n++)
     {
     {
-        if (!p)
-        {
-            return;
-        }
-
-        if (!thread_needLock())
-        {
-            checkNoSync(p);
-        }
-        else synchronized (gcLock)
-        {
-            checkNoSync(p);
-        }
+        debug(COLLECT_PRINTF) printf("\t%x .. %x\n", gc.ranges[n].pbot, gc.ranges[n].ptop);
+        mark_conservative_range(gc.ranges[n].pbot, gc.ranges[n].ptop);
     }
 
     }
 
-
-    //
-    //
-    //
-    private void checkNoSync(void *p)
+    debug(COLLECT_PRINTF) printf("\tscan heap\n");
+    while (gc.any_changes)
     {
     {
-        assert(p);
-
-        sentinel_Invariant(p);
-        debug (PTRCHECK)
+        gc.any_changes = false;
+        for (size_t n = 0; n < gc.pools.length; n++)
         {
         {
-            Pool*  pool;
-            size_t pagenum;
-            Bins   bin;
-            size_t size;
+            uint *bbase;
+            uint *b;
+            uint *btop;
 
 
-            p = sentinel_sub(p);
-            pool = gcx.findPool(p);
-            assert(pool);
-            pagenum = cast(size_t)(p - pool.baseAddr) / PAGESIZE;
-            bin = cast(Bins)pool.pagetable[pagenum];
-            assert(bin <= B_PAGE);
-            size = binsize[bin];
-            assert((cast(size_t)p & (size - 1)) == 0);
-
-            debug (PTRCHECK2)
+            Pool* pool = gc.pools[n];
+
+            bbase = pool.scan.base();
+            btop = bbase + pool.scan.nwords;
+            for (b = bbase; b < btop;)
             {
             {
-                if (bin < B_PAGE)
+                Bins   bin;
+                size_t pn;
+                size_t u;
+                size_t bitm;
+                byte*  o;
+
+                bitm = *b;
+                if (!bitm)
                 {
                 {
-                    // Check that p is not on a free list
-                    List *list;
+                    b++;
+                    continue;
+                }
+                *b = 0;
+
+                o = pool.baseAddr + (b - bbase) * 32 * 16;
+                if (!(bitm & 0xFFFF))
+                {
+                    bitm >>= 16;
+                    o += 16 * 16;
+                }
+                for (; bitm; o += 16, bitm >>= 1)
+                {
+                    if (!(bitm & 1))
+                        continue;
 
 
-                    for (list = gcx.bucket[bin]; list; list = list.next)
+                    pn = cast(size_t)(o - pool.baseAddr) / PAGESIZE;
+                    bin = cast(Bins)pool.pagetable[pn];
+                    if (bin < B_PAGE) {
+                        if (opts.options.conservative)
+                            mark_conservative_range(o, o + binsize[bin]);
+                        else {
+                            auto end_of_blk = cast(size_t**)(o +
+                                    binsize[bin] - size_t.sizeof);
+                            size_t* pm_bitmask = *end_of_blk;
+                            mark_range(o, end_of_blk, pm_bitmask);
+                        }
+                    }
+                    else if (bin == B_PAGE || bin == B_PAGEPLUS)
                     {
                     {
-                        assert(cast(void*)list != p);
+                        if (bin == B_PAGEPLUS)
+                        {
+                            while (pool.pagetable[pn - 1] != B_PAGE)
+                                pn--;
+                        }
+                        u = 1;
+                        while (pn + u < pool.npages &&
+                                pool.pagetable[pn + u] == B_PAGEPLUS)
+                            u++;
+
+                        size_t blk_size = u * PAGESIZE;
+                        if (opts.options.conservative)
+                            mark_conservative_range(o, o + blk_size);
+                        else {
+                            auto end_of_blk = cast(size_t**)(o + blk_size -
+                                    size_t.sizeof);
+                            size_t* pm_bitmask = *end_of_blk;
+                            mark_range(o, end_of_blk, pm_bitmask);
+                        }
                     }
                 }
             }
         }
     }
                     }
                 }
             }
         }
     }
+}
 
 
 
 
-    //
-    //
-    //
-    private void setStackBottom(void *p)
-    {
-        version (STACKGROWSDOWN)
-        {
-            //p = (void *)((uint *)p + 4);
-            if (p > gcx.stackBottom)
-            {
-                //debug(PRINTF) printf("setStackBottom(%x)\n", p);
-                gcx.stackBottom = p;
-            }
-        }
-        else
-        {
-            //p = (void *)((uint *)p - 4);
-            if (p < gcx.stackBottom)
-            {
-                //debug(PRINTF) printf("setStackBottom(%x)\n", p);
-                gcx.stackBottom = cast(char*)p;
-            }
-        }
-    }
-
-
-    /**
-     * add p to list of roots
-     */
-    void addRoot(void *p)
-    {
-        if (!p)
-        {
-            return;
-        }
-
-        if (!thread_needLock())
-        {
-            gcx.addRoot(p);
-        }
-        else synchronized (gcLock)
-        {
-            gcx.addRoot(p);
-        }
-    }
-
-
-    /**
-     * remove p from list of roots
-     */
-    void removeRoot(void *p)
-    {
-        if (!p)
-        {
-            return;
-        }
-
-        if (!thread_needLock())
-        {
-            gcx.removeRoot(p);
-        }
-        else synchronized (gcLock)
-        {
-            gcx.removeRoot(p);
-        }
-    }
-
-
-    /**
-     * add range to scan for roots
-     */
-    void addRange(void *p, size_t sz)
-    {
-        if (!p || !sz)
-        {
-            return;
-        }
-
-        //debug(PRINTF) printf("+GC.addRange(pbot = x%x, ptop = x%x)\n", pbot, ptop);
-        if (!thread_needLock())
-        {
-            gcx.addRange(p, p + sz);
-        }
-        else synchronized (gcLock)
-        {
-            gcx.addRange(p, p + sz);
-        }
-        //debug(PRINTF) printf("-GC.addRange()\n");
-    }
-
-
-    /**
-     * remove range
-     */
-    void removeRange(void *p)
-    {
-        if (!p)
-        {
-            return;
-        }
-
-        if (!thread_needLock())
-        {
-            gcx.removeRange(p);
-        }
-        else synchronized (gcLock)
-        {
-            gcx.removeRange(p);
-        }
-    }
-
-
-    /**
-     * do full garbage collection
-     */
-    void fullCollect()
-    {
-        debug(PRINTF) printf("GC.fullCollect()\n");
-
-        if (!thread_needLock())
-        {
-            gcx.fullcollectshell();
-        }
-        else synchronized (gcLock)
-        {
-            gcx.fullcollectshell();
-        }
-
-        version (none)
-        {
-            GCStats stats;
-
-            getStats(stats);
-            debug(PRINTF) printf("poolsize = %x, usedsize = %x, freelistsize = %x\n",
-                    stats.poolsize, stats.usedsize, stats.freelistsize);
-        }
-
-        gcx.log_collect();
-    }
-
-
-    /**
-     * do full garbage collection ignoring roots
-     */
-    void fullCollectNoStack()
-    {
-        if (!thread_needLock())
-        {
-            gcx.noStack++;
-            gcx.fullcollectshell();
-            gcx.noStack--;
-        }
-        else synchronized (gcLock)
-        {
-            gcx.noStack++;
-            gcx.fullcollectshell();
-            gcx.noStack--;
-        }
-    }
-
-
-    /**
-     * minimize free space usage
-     */
-    void minimize()
-    {
-        if (!thread_needLock())
-        {
-            gcx.minimize();
-        }
-        else synchronized (gcLock)
-        {
-            gcx.minimize();
-        }
-    }
-
-
-    /**
-     * Retrieve statistics about garbage collection.
-     * Useful for debugging and tuning.
-     */
-    void getStats(out GCStats stats)
-    {
-        if (!thread_needLock())
-        {
-            getStatsNoSync(stats);
-        }
-        else synchronized (gcLock)
-        {
-            getStatsNoSync(stats);
-        }
-    }
-
-
-    //
-    //
-    //
-    private void getStatsNoSync(out GCStats stats)
-    {
-        size_t psize = 0;
-        size_t usize = 0;
-        size_t flsize = 0;
-
-        size_t n;
-        size_t bsize = 0;
-
-        //debug(PRINTF) printf("getStats()\n");
-        .memset(&stats, 0, GCStats.sizeof);
-
-        for (n = 0; n < gcx.npools; n++)
-        {   Pool *pool = gcx.pooltable[n];
-
-            psize += pool.npages * PAGESIZE;
-            for (size_t j = 0; j < pool.npages; j++)
-            {
-                Bins bin = cast(Bins)pool.pagetable[j];
-                if (bin == B_FREE)
-                    stats.freeblocks++;
-                else if (bin == B_PAGE)
-                    stats.pageblocks++;
-                else if (bin < B_PAGE)
-                    bsize += PAGESIZE;
-            }
-        }
-
-        for (n = 0; n < B_PAGE; n++)
-        {
-            //debug(PRINTF) printf("bin %d\n", n);
-            for (List *list = gcx.bucket[n]; list; list = list.next)
-            {
-                //debug(PRINTF) printf("\tlist %x\n", list);
-                flsize += binsize[n];
-            }
-        }
-
-        usize = bsize - flsize;
-
-        stats.poolsize = psize;
-        stats.usedsize = bsize - flsize;
-        stats.freelistsize = flsize;
-    }
-
-    /******************* weak-reference support *********************/
-
-    // call locked if necessary
-    private T locked(T)(in T delegate() code)
-    {
-        if (thread_needLock)
-            synchronized(gcLock) return code();
-        else
-           return code();
-    }
-
-    private struct WeakPointer
-    {
-        Object reference;
-
-        void ondestroy(Object r)
-        {
-            assert(r is reference);
-            // lock for memory consistency (parallel readers)
-            // also ensures that weakpointerDestroy can be called while another
-            // thread is freeing the reference with "delete"
-            locked!(void)({ reference = null; });
-        }
-    }
-
-    /**
-     * Create a weak pointer to the given object.
-     * Returns a pointer to an opaque struct allocated in C memory.
-     */
-    void* weakpointerCreate( Object r )
-    {
-        if (r)
-       {
-            // must be allocated in C memory
-            // 1. to hide the reference from the GC
-            // 2. the GC doesn't scan delegates added by rt_attachDisposeEvent
-            //    for references
-            auto wp = cast(WeakPointer*)(libc.malloc(WeakPointer.sizeof));
-            if (!wp)
-                onOutOfMemoryError();
-            wp.reference = r;
-            rt_attachDisposeEvent(r, &wp.ondestroy);
-            return wp;
-        }
-        return null;
-    }
-
-    /**
-     * Destroy a weak pointer returned by weakpointerCreate().
-     * If null is passed, nothing happens.
-     */
-    void weakpointerDestroy( void* p )
-    {
-        if (p)
-       {
-            auto wp = cast(WeakPointer*)p;
-            // must be extra careful about the GC or parallel threads
-            // finalizing the reference at the same time
-            locked!(void)({
-                   if (wp.reference)
-                       rt_detachDisposeEvent(wp.reference, &wp.ondestroy);
-                  });
-            .free(wp);
-        }
-    }
-
-    /**
-     * Query a weak pointer and return either the object passed to
-     * weakpointerCreate, or null if it was free'd in the meantime.
-     * If null is passed, null is returned.
-     */
-    Object weakpointerGet( void* p )
-    {
-        if (p)
-       {
-            // NOTE: could avoid the lock by using Fawzi style GC counters but
-            // that'd require core.sync.Atomic and lots of care about memory
-            // consistency it's an optional optimization see
-            // http://dsource.org/projects/tango/browser/trunk/user/tango/core/Lifetime.d?rev=5100#L158
-            return locked!(Object)({
-                  return (cast(WeakPointer*)p).reference;
-                  });
-            }
-    }
-}
-
-
-/* ============================ Gcx =============================== */
-
-enum
-{   PAGESIZE =    4096,
-    POOLSIZE =   (4096*256),
-}
-
-
-enum
-{
-    B_16,
-    B_32,
-    B_64,
-    B_128,
-    B_256,
-    B_512,
-    B_1024,
-    B_2048,
-    B_PAGE,             // start of large alloc
-    B_PAGEPLUS,         // continuation of large alloc
-    B_FREE,             // free page
-    B_MAX
-}
-
-
-alias ubyte Bins;
-
-
-struct List
-{
-    List *next;
-}
-
-
-struct Range
-{
-    void *pbot;
-    void *ptop;
-}
-
-
-const uint binsize[B_MAX] = [ 16,32,64,128,256,512,1024,2048,4096 ];
-const uint notbinsize[B_MAX] = [ ~(16u-1),~(32u-1),~(64u-1),~(128u-1),~(256u-1),
-                                ~(512u-1),~(1024u-1),~(2048u-1),~(4096u-1) ];
-
-/* ============================ Gcx =============================== */
-
-
-struct Gcx
-{
-
-    void *p_cache;
-    size_t size_cache;
-
-    size_t nroots;
-    size_t rootdim;
-    void **roots;
-
-    size_t nranges;
-    size_t rangedim;
-    Range *ranges;
-
-    uint noStack;       // !=0 means don't scan stack
-    uint log;           // turn on logging
-    uint anychanges;
-    void *stackBottom;
-    uint inited;
-    int disabled;       // turn off collections if >0
-
-    byte *minAddr;      // min(baseAddr)
-    byte *maxAddr;      // max(topAddr)
-
-    size_t npools;
-    Pool **pooltable;
-
-    List *bucket[B_MAX];        // free list for each size
-
-
-    void initialize()
-    {   int dummy;
-
-        (cast(byte*)this)[0 .. Gcx.sizeof] = 0;
-        stackBottom = cast(char*)&dummy;
-        log_init();
-        //printf("gcx = %p, self = %x\n", this, self);
-        inited = 1;
-    }
-
-
-    void Dtor()
-    {
-        inited = 0;
-
-        for (size_t i = 0; i < npools; i++)
-        {   Pool *pool = pooltable[i];
-
-            pool.Dtor();
-            .free(pool);
-        }
-        if (pooltable)
-            .free(pooltable);
-
-        if (roots)
-            .free(roots);
-
-        if (ranges)
-            .free(ranges);
-    }
-
-
-    void Invariant() { }
-
-
-    invariant
-    {
-        if (inited)
-        {
-        //printf("Gcx.invariant(): this = %p\n", this);
-            size_t i;
-
-            for (i = 0; i < npools; i++)
-            {   Pool *pool = pooltable[i];
-
-                pool.Invariant();
-                if (i == 0)
-                {
-                    assert(minAddr == pool.baseAddr);
-                }
-                if (i + 1 < npools)
-                {
-                    assert(pool.opCmp(pooltable[i + 1]) < 0);
-                }
-                else if (i + 1 == npools)
-                {
-                    assert(maxAddr == pool.topAddr);
-                }
-            }
-
-            if (roots)
-            {
-                assert(rootdim != 0);
-                assert(nroots <= rootdim);
-            }
-
-            if (ranges)
-            {
-                assert(rangedim != 0);
-                assert(nranges <= rangedim);
-
-                for (i = 0; i < nranges; i++)
-                {
-                    assert(ranges[i].pbot);
-                    assert(ranges[i].ptop);
-                    assert(ranges[i].pbot <= ranges[i].ptop);
-                }
-            }
-
-            for (i = 0; i < B_PAGE; i++)
-            {
-                for (List *list = bucket[i]; list; list = list.next)
-                {
-                }
-            }
-        }
-    }
-
-
-    /**
-     *
-     */
-    void addRoot(void *p)
-    {
-        if (nroots == rootdim)
-        {
-            size_t newdim = rootdim * 2 + 16;
-            void** newroots;
-
-            newroots = cast(void**) .malloc(newdim * newroots[0].sizeof);
-            if (!newroots)
-                onOutOfMemoryError();
-            if (roots)
-            {   .memcpy(newroots, roots, nroots * newroots[0].sizeof);
-                .free(roots);
-            }
-            roots = newroots;
-            rootdim = newdim;
-        }
-        roots[nroots] = p;
-        nroots++;
-    }
-
-
-    /**
-     *
-     */
-    void removeRoot(void *p)
-    {
-        for (size_t i = nroots; i--;)
-        {
-            if (roots[i] == p)
-            {
-                nroots--;
-                .memmove(roots + i, roots + i + 1, (nroots - i) * roots[0].sizeof);
-                return;
-            }
-        }
-        assert(0);
-    }
-
-
-    /**
-     *
-     */
-    void addRange(void *pbot, void *ptop)
-    {
-        debug (PRINTF) printf("%x.Gcx::addRange(%x, %x), nranges = %d\n", this,
-                pbot, ptop, nranges);
-        if (nranges == rangedim)
-        {
-            size_t newdim = rangedim * 2 + 16;
-            Range *newranges;
-
-            newranges = cast(Range*) .malloc(newdim * newranges[0].sizeof);
-            if (!newranges)
-                onOutOfMemoryError();
-            if (ranges)
-            {   .memcpy(newranges, ranges, nranges * newranges[0].sizeof);
-                .free(ranges);
-            }
-            ranges = newranges;
-            rangedim = newdim;
-        }
-        ranges[nranges].pbot = pbot;
-        ranges[nranges].ptop = ptop;
-        nranges++;
-    }
-
-
-    /**
-     *
-     */
-    void removeRange(void *pbot)
-    {
-        debug (PRINTF) printf("%x.Gcx.removeRange(%x), nranges = %d\n", this,
-                pbot, nranges);
-        for (size_t i = nranges; i--;)
-        {
-            if (ranges[i].pbot == pbot)
-            {
-                nranges--;
-                .memmove(ranges + i, ranges + i + 1, (nranges - i) * ranges[0].sizeof);
-                return;
-            }
-        }
-        debug(PRINTF) printf("Wrong thread\n");
-
-        // This is a fatal error, but ignore it.
-        // The problem is that we can get a Close() call on a thread
-        // other than the one the range was allocated on.
-        //assert(zero);
-    }
-
-
-    /**
-     * Find Pool that pointer is in.
-     * Return null if not in a Pool.
-     * Assume pooltable[] is sorted.
-     */
-    Pool *findPool(void *p)
-    {
-        if (p >= minAddr && p < maxAddr)
-        {
-            if (npools == 1)
-            {
-                return pooltable[0];
-            }
-
-            for (size_t i = 0; i < npools; i++)
-            {   Pool *pool;
-
-                pool = pooltable[i];
-                if (p < pool.topAddr)
-                {   if (pool.baseAddr <= p)
-                        return pool;
-                    break;
-                }
-            }
-        }
-        return null;
-    }
-
-
-    /**
-     * Find base address of block containing pointer p.
-     * Returns null if not a gc'd pointer
-     */
-    void* findBase(void *p)
-    {
-        Pool *pool;
-
-        pool = findPool(p);
-        if (pool)
-        {
-            size_t offset = cast(size_t)(p - pool.baseAddr);
-            size_t pn = offset / PAGESIZE;
-            Bins   bin = cast(Bins)pool.pagetable[pn];
-
-            // Adjust bit to be at start of allocated memory block
-            if (bin <= B_PAGE)
-            {
-                return pool.baseAddr + (offset & notbinsize[bin]);
-            }
-            else if (bin == B_PAGEPLUS)
-            {
-                do
-                {   --pn, offset -= PAGESIZE;
-                } while (cast(Bins)pool.pagetable[pn] == B_PAGEPLUS);
-
-                return pool.baseAddr + (offset & (offset.max ^ (PAGESIZE-1)));
-            }
-            else
-            {
-                // we are in a B_FREE page
-                return null;
-            }
-        }
-        return null;
-    }
-
-
-    /**
-     * Find size of pointer p.
-     * Returns 0 if not a gc'd pointer
-     */
-    size_t findSize(void *p)
-    {
-        Pool*  pool;
-        size_t size = 0;
-
-        pool = findPool(p);
-        if (pool)
-        {
-            size_t pagenum;
-            Bins   bin;
-
-            pagenum = cast(size_t)(p - pool.baseAddr) / PAGESIZE;
-            bin = cast(Bins)pool.pagetable[pagenum];
-            size = binsize[bin];
-            if (bin == B_PAGE)
-            {
-                ubyte* pt;
-                size_t i;
-
-                pt = &pool.pagetable[0];
-                for (i = pagenum + 1; i < pool.npages; i++)
-                {
-                    if (pt[i] != B_PAGEPLUS)
-                        break;
-                }
-                size = (i - pagenum) * PAGESIZE;
-            }
-        }
-        return size;
-    }
-
-
-    /**
-     *
-     */
-    BlkInfo getInfo(void* p)
-    {
-        Pool*   pool;
-        BlkInfo info;
-
-        pool = findPool(p);
-        if (pool)
-        {
-            size_t offset = cast(size_t)(p - pool.baseAddr);
-            size_t pn = offset / PAGESIZE;
-            Bins   bin = cast(Bins)pool.pagetable[pn];
-
-            ////////////////////////////////////////////////////////////////////
-            // findAddr
-            ////////////////////////////////////////////////////////////////////
-
-            if (bin <= B_PAGE)
-            {
-                info.base = pool.baseAddr + (offset & notbinsize[bin]);
-            }
-            else if (bin == B_PAGEPLUS)
-            {
-                do
-                {   --pn, offset -= PAGESIZE;
-                } while (cast(Bins)pool.pagetable[pn] == B_PAGEPLUS);
-
-                info.base = pool.baseAddr + (offset & (offset.max ^ (PAGESIZE-1)));
-
-                // fix bin for use by size calc below
-                bin = cast(Bins)pool.pagetable[pn];
-            }
-
-            ////////////////////////////////////////////////////////////////////
-            // findSize
-            ////////////////////////////////////////////////////////////////////
-
-            info.size = binsize[bin];
-            if (bin == B_PAGE)
-            {
-                ubyte* pt;
-                size_t i;
-
-                pt = &pool.pagetable[0];
-                for (i = pn + 1; i < pool.npages; i++)
-                {
-                    if (pt[i] != B_PAGEPLUS)
-                        break;
-                }
-                info.size = (i - pn) * PAGESIZE;
-            }
-
-            ////////////////////////////////////////////////////////////////////
-            // getBits
-            ////////////////////////////////////////////////////////////////////
-
-            info.attr = getBits(pool, cast(size_t)(offset / 16));
-        }
-        return info;
-    }
-
-
-    /**
-     * Compute bin for size.
-     */
-    static Bins findBin(size_t size)
-    {   Bins bin;
-
-        if (size <= 256)
-        {
-            if (size <= 64)
-            {
-                if (size <= 16)
-                    bin = B_16;
-                else if (size <= 32)
-                    bin = B_32;
-                else
-                    bin = B_64;
-            }
-            else
-            {
-                if (size <= 128)
-                    bin = B_128;
-                else
-                    bin = B_256;
-            }
-        }
-        else
-        {
-            if (size <= 1024)
-            {
-                if (size <= 512)
-                    bin = B_512;
-                else
-                    bin = B_1024;
-            }
-            else
-            {
-                if (size <= 2048)
-                    bin = B_2048;
-                else
-                    bin = B_PAGE;
-            }
-        }
-        return bin;
-    }
-
-
-    /**
-     * Allocate a new pool of at least size bytes.
-     * Sort it into pooltable[].
-     * Mark all memory in the pool as B_FREE.
-     * Return the actual number of bytes reserved or 0 on error.
-     */
-    size_t reserve(size_t size)
-    {
-        size_t npages = (size + PAGESIZE - 1) / PAGESIZE;
-        Pool*  pool = newPool(npages);
-
-        if (!pool)
-            return 0;
-        return pool.npages * PAGESIZE;
-    }
-
-
-    /**
-     * Minimizes physical memory usage by returning free pools to the OS.
-     */
-    void minimize()
-    {
-        size_t n;
-        size_t pn;
-        Pool*  pool;
-
-        for (n = 0; n < npools; n++)
-        {
-            pool = pooltable[n];
-            for (pn = 0; pn < pool.npages; pn++)
-            {
-                if (cast(Bins)pool.pagetable[pn] != B_FREE)
-                    break;
-            }
-            if (pn < pool.npages)
-            {
-                n++;
-                continue;
-            }
-            pool.Dtor();
-            .free(pool);
-            .memmove(pooltable + n,
-                            pooltable + n + 1,
-                            (--npools - n) * (Pool*).sizeof);
-            minAddr = pooltable[0].baseAddr;
-            maxAddr = pooltable[npools - 1].topAddr;
-        }
-    }
-
-
-    /**
-     * Allocate a chunk of memory that is larger than a page.
-     * Return null if out of memory.
-     */
-    void *bigAlloc(size_t size)
-    {
-        Pool*  pool;
-        size_t npages;
-        size_t n;
-        size_t pn;
-        size_t freedpages;
-        void*  p;
-        int    state;
-
-        npages = (size + PAGESIZE - 1) / PAGESIZE;
-
-        for (state = 0; ; )
-        {
-            // This code could use some refinement when repeatedly
-            // allocating very large arrays.
-
-            for (n = 0; n < npools; n++)
-            {
-                pool = pooltable[n];
-                pn = pool.allocPages(npages);
-                if (pn != OPFAIL)
-                    goto L1;
-            }
-
-            // Failed
-            switch (state)
-            {
-            case 0:
-                if (disabled)
-                {   state = 1;
-                    continue;
-                }
-                // Try collecting
-                freedpages = fullcollectshell();
-                if (freedpages >= npools * ((POOLSIZE / PAGESIZE) / 4))
-                {   state = 1;
-                    continue;
-                }
-                // Release empty pools to prevent bloat
-                minimize();
-                // Allocate new pool
-                pool = newPool(npages);
-                if (!pool)
-                {   state = 2;
-                    continue;
-                }
-                pn = pool.allocPages(npages);
-                assert(pn != OPFAIL);
-                goto L1;
-            case 1:
-                // Release empty pools to prevent bloat
-                minimize();
-                // Allocate new pool
-                pool = newPool(npages);
-                if (!pool)
-                    goto Lnomemory;
-                pn = pool.allocPages(npages);
-                assert(pn != OPFAIL);
-                goto L1;
-            case 2:
-                goto Lnomemory;
-            default:
-                assert(false);
-            }
-        }
-
-      L1:
-        pool.pagetable[pn] = B_PAGE;
-        if (npages > 1)
-            .memset(&pool.pagetable[pn + 1], B_PAGEPLUS, npages - 1);
-        p = pool.baseAddr + pn * PAGESIZE;
-        .memset(cast(char *)p + size, 0, npages * PAGESIZE - size);
-        debug (MEMSTOMP) .memset(p, 0xF1, size);
-        //debug(PRINTF) printf("\tp = %x\n", p);
-        return p;
-
-      Lnomemory:
-        return null; // let mallocNoSync handle the error
-    }
-
-
-    /**
-     * Allocate a new pool with at least npages in it.
-     * Sort it into pooltable[].
-     * Return null if failed.
-     */
-    Pool *newPool(size_t npages)
-    {
-        Pool*  pool;
-        Pool** newpooltable;
-        size_t newnpools;
-        size_t i;
-
-        //debug(PRINTF) printf("************Gcx::newPool(npages = %d)****************\n", npages);
-
-        // Minimum of POOLSIZE
-        if (npages < POOLSIZE/PAGESIZE)
-            npages = POOLSIZE/PAGESIZE;
-        else if (npages > POOLSIZE/PAGESIZE)
-        {   // Give us 150% of requested size, so there's room to extend
-            auto n = npages + (npages >> 1);
-            if (n < size_t.max/PAGESIZE)
-                npages = n;
-        }
-
-        // Allocate successively larger pools up to 8 megs
-        if (npools)
-        {   size_t n;
-
-            n = npools;
-            if (n > 8)
-                n = 8;                  // cap pool size at 8 megs
-            n *= (POOLSIZE / PAGESIZE);
-            if (npages < n)
-                npages = n;
-        }
-
-        pool = cast(Pool *) .calloc(1, Pool.sizeof);
-        if (pool)
-        {
-            pool.initialize(npages);
-            if (!pool.baseAddr)
-                goto Lerr;
-
-            newnpools = npools + 1;
-            newpooltable = cast(Pool **) .realloc(pooltable, newnpools * (Pool *).sizeof);
-            if (!newpooltable)
-                goto Lerr;
-
-            // Sort pool into newpooltable[]
-            for (i = 0; i < npools; i++)
-            {
-                if (pool.opCmp(newpooltable[i]) < 0)
-                     break;
-            }
-            .memmove(newpooltable + i + 1, newpooltable + i, (npools - i) * (Pool *).sizeof);
-            newpooltable[i] = pool;
-
-            pooltable = newpooltable;
-            npools = newnpools;
-
-            minAddr = pooltable[0].baseAddr;
-            maxAddr = pooltable[npools - 1].topAddr;
-        }
-        return pool;
-
-      Lerr:
-        pool.Dtor();
-        .free(pool);
-        return null;
-    }
-
-
-    /**
-     * Allocate a page of bin's.
-     * Returns:
-     *  0       failed
-     */
-    int allocPage(Bins bin)
-    {
-        Pool*  pool;
-        size_t n;
-        size_t pn;
-        byte*  p;
-        byte*  ptop;
-
-        //debug(PRINTF) printf("Gcx::allocPage(bin = %d)\n", bin);
-        for (n = 0; n < npools; n++)
-        {
-            pool = pooltable[n];
-            pn = pool.allocPages(1);
-            if (pn != OPFAIL)
-                goto L1;
-        }
-        return 0;               // failed
-
-      L1:
-        pool.pagetable[pn] = cast(ubyte)bin;
-
-        // Convert page to free list
-        size_t size = binsize[bin];
-        List **b = &bucket[bin];
-
-        p = pool.baseAddr + pn * PAGESIZE;
-        ptop = p + PAGESIZE;
-        for (; p < ptop; p += size)
-        {
-            (cast(List *)p).next = *b;
-            *b = cast(List *)p;
-        }
-        return 1;
-    }
-
-
-    /**
-     * Search a range of memory values and mark any pointers into the GC pool.
-     */
-    void mark(void *pbot, void *ptop)
-    {
-        void **p1 = cast(void **)pbot;
-        void **p2 = cast(void **)ptop;
-        size_t pcache = 0;
-        uint changes = 0;
-
-        //printf("marking range: %p -> %p\n", pbot, ptop);
-        for (; p1 < p2; p1++)
+/**
+ *
+ */
+size_t sweep()
+{
+    // Free up everything not marked
+    debug(COLLECT_PRINTF) printf("\tsweep\n");
+    gc.p_cache = null;
+    gc.size_cache = 0;
+    size_t freedpages = 0;
+    size_t freed = 0;
+    for (size_t n = 0; n < gc.pools.length; n++)
+    {
+        Pool* pool = gc.pools[n];
+        pool.clear_cache();
+        uint*  bbase = pool.mark.base();
+        size_t pn;
+        for (pn = 0; pn < pool.npages; pn++, bbase += PAGESIZE / (32 * 16))
         {
         {
-            Pool *pool;
-            byte *p = cast(byte *)(*p1);
+            Bins bin = cast(Bins)pool.pagetable[pn];
 
 
-            //if (log) debug(PRINTF) printf("\tmark %x\n", p);
-            if (p >= minAddr && p < maxAddr)
+            if (bin < B_PAGE)
             {
             {
-                if ((cast(size_t)p & ~(PAGESIZE-1)) == pcache)
-                   continue;
+                auto size = binsize[bin];
+                byte* p = pool.baseAddr + pn * PAGESIZE;
+                byte* ptop = p + PAGESIZE;
+                size_t bit_i = pn * (PAGESIZE/16);
+                size_t bit_stride = size / 16;
 
 
-                pool = findPool(p);
-                if (pool)
+version(none) // BUG: doesn't work because freebits() must also be cleared
+{
+                // If free'd entire page
+                if (bbase[0] == 0 && bbase[1] == 0 && bbase[2] == 0 &&
+                        bbase[3] == 0 && bbase[4] == 0 && bbase[5] == 0 &&
+                        bbase[6] == 0 && bbase[7] == 0)
                 {
                 {
-                    size_t offset = cast(size_t)(p - pool.baseAddr);
-                    size_t biti;
-                    size_t pn = offset / PAGESIZE;
-                    Bins   bin = cast(Bins)pool.pagetable[pn];
-
-                    //debug(PRINTF) printf("\t\tfound pool %x, base=%x, pn = %d, bin = %d, biti = x%x\n", pool, pool.baseAddr, pn, bin, biti);
-
-                    // Adjust bit to be at start of allocated memory block
-                    if (bin <= B_PAGE)
+                    for (; p < ptop; p += size, bit_i += bit_stride)
                     {
                     {
-                        biti = (offset & notbinsize[bin]) >> 4;
-                        //debug(PRINTF) printf("\t\tbiti = x%x\n", biti);
+                        if (pool.finals.testClear(bit_i)) {
+                            if (opts.options.sentinel)
+                                rt_finalize(sentinel_add(p), false/*gc.no_stack > 0*/);
+                            else
+                                rt_finalize(p, false/*gc.no_stack > 0*/);
+                        }
+                        clrAttr(pool, bit_i, BlkAttr.ALL_BITS);
+
+                        if (opts.options.mem_stomp)
+                            memset(p, 0xF3, size);
                     }
                     }
-                    else if (bin == B_PAGEPLUS)
+                    pool.pagetable[pn] = B_FREE;
+                    freed += PAGESIZE;
+                    continue;
+                }
+}
+                for (; p < ptop; p += size, bit_i += bit_stride)
+                {
+                    if (!pool.mark.test(bit_i))
                     {
                     {
-                        do
-                        {   --pn;
-                        } while (cast(Bins)pool.pagetable[pn] == B_PAGEPLUS);
-                        biti = pn * (PAGESIZE / 16);
+                        if (opts.options.sentinel)
+                            sentinel_Invariant(sentinel_add(p));
+
+                        pool.freebits.set(bit_i);
+                        if (pool.finals.testClear(bit_i)) {
+                            if (opts.options.sentinel)
+                                rt_finalize(sentinel_add(p), false/*gc.no_stack > 0*/);
+                            else
+                                rt_finalize(p, false/*gc.no_stack > 0*/);
+                        }
+                        clrAttr(pool, bit_i, BlkAttr.ALL_BITS);
+
+                        if (opts.options.mem_stomp)
+                            memset(p, 0xF3, size);
+
+                        freed += size;
                     }
                     }
-                    else
-                    {
-                        // Don't mark bits in B_FREE pages
-                        continue;
+                }
+            }
+            else if (bin == B_PAGE)
+            {
+                size_t bit_stride = PAGESIZE / 16;
+                size_t bit_i = pn * bit_stride;
+                if (!pool.mark.test(bit_i))
+                {
+                    byte *p = pool.baseAddr + pn * PAGESIZE;
+                    if (opts.options.sentinel)
+                        sentinel_Invariant(sentinel_add(p));
+                    if (pool.finals.testClear(bit_i)) {
+                        if (opts.options.sentinel)
+                            rt_finalize(sentinel_add(p), false/*gc.no_stack > 0*/);
+                        else
+                            rt_finalize(p, false/*gc.no_stack > 0*/);
                     }
                     }
+                    clrAttr(pool, bit_i, BlkAttr.ALL_BITS);
 
 
-                    if (bin >= B_PAGE) // Cache B_PAGE and B_PAGEPLUS lookups
-                        pcache = cast(size_t)p & ~(PAGESIZE-1);
-
-                    //debug(PRINTF) printf("\t\tmark(x%x) = %d\n", biti, pool.mark.test(biti));
-                    if (!pool.mark.test(biti))
+                    debug(COLLECT_PRINTF) printf("\tcollecting big %p\n", p);
+                    pool.pagetable[pn] = B_FREE;
+                    pool.freebits.set_group(bit_i, PAGESIZE / 16);
+                    freedpages++;
+                    if (opts.options.mem_stomp)
+                        memset(p, 0xF3, PAGESIZE);
+                    while (pn + 1 < pool.npages && pool.pagetable[pn + 1] == B_PAGEPLUS)
                     {
                     {
-                        //if (log) debug(PRINTF) printf("\t\tmarking %x\n", p);
-                        pool.mark.set(biti);
-                        if (!pool.noscan.test(biti))
+                        pn++;
+                        pool.pagetable[pn] = B_FREE;
+                        bit_i += bit_stride;
+                        pool.freebits.set_group(bit_i, PAGESIZE / 16);
+                        freedpages++;
+
+                        if (opts.options.mem_stomp)
                         {
                         {
-                            pool.scan.set(biti);
-                            changes = 1;
+                            p += PAGESIZE;
+                            memset(p, 0xF3, PAGESIZE);
                         }
                         }
-                        log_parent(sentinel_add(pool.baseAddr + biti * 16), sentinel_add(pbot));
                     }
                 }
             }
         }
                     }
                 }
             }
         }
-        anychanges |= changes;
     }
 
     }
 
+    // Zero buckets
+    gc.free_list[] = null;
 
 
-    /**
-     * Return number of full pages free'd.
-     */
-    size_t fullcollectshell()
+    // Free complete pages, rebuild free list
+    debug(COLLECT_PRINTF) printf("\tfree complete pages\n");
+    size_t recoveredpages = 0;
+    for (size_t n = 0; n < gc.pools.length; n++)
     {
     {
-        // The purpose of the 'shell' is to ensure all the registers
-        // get put on the stack so they'll be scanned
-        void *sp;
-        size_t result;
-        version (GNU)
+        Pool* pool = gc.pools[n];
+        for (size_t pn = 0; pn < pool.npages; pn++)
         {
         {
-            __builtin_unwind_init();
-            sp = & sp;
-        }
-        else version(LDC)
-        {
-            version(X86)
+            Bins   bin = cast(Bins)pool.pagetable[pn];
+            size_t bit_i;
+            size_t u;
+
+            if (bin < B_PAGE)
             {
             {
-                uint eax,ecx,edx,ebx,ebp,esi,edi;
-                asm
+                size_t size = binsize[bin];
+                size_t bit_stride = size / 16;
+                size_t bit_base = pn * (PAGESIZE / 16);
+                size_t bit_top = bit_base + (PAGESIZE / 16);
+                byte*  p;
+
+                bit_i = bit_base;
+                for (; bit_i < bit_top; bit_i += bit_stride)
                 {
                 {
-                    mov eax[EBP], EAX      ;
-                    mov ecx[EBP], ECX      ;
-                    mov edx[EBP], EDX      ;
-                    mov ebx[EBP], EBX      ;
-                    mov ebp[EBP], EBP      ;
-                    mov esi[EBP], ESI      ;
-                    mov edi[EBP], EDI      ;
-                    mov  sp[EBP], ESP      ;
+                    if (!pool.freebits.test(bit_i))
+                        goto Lnotfree;
                 }
                 }
-            }
-            else version (X86_64)
-            {
-                ulong rax,rbx,rcx,rdx,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15;
-                asm
+                pool.pagetable[pn] = B_FREE;
+                pool.freebits.set_group(bit_base, PAGESIZE / 16);
+                recoveredpages++;
+                continue;
+
+             Lnotfree:
+                p = pool.baseAddr + pn * PAGESIZE;
+                for (u = 0; u < PAGESIZE; u += size)
                 {
                 {
-                    movq rax[RBP], RAX      ;
-                    movq rbx[RBP], RBX      ;
-                    movq rcx[RBP], RCX      ;
-                    movq rdx[RBP], RDX      ;
-                    movq rbp[RBP], RBP      ;
-                    movq rsi[RBP], RSI      ;
-                    movq rdi[RBP], RDI      ;
-                    movq r8 [RBP], R8       ;
-                    movq r9 [RBP], R9       ;
-                    movq r10[RBP], R10      ;
-                    movq r11[RBP], R11      ;
-                    movq r12[RBP], R12      ;
-                    movq r13[RBP], R13      ;
-                    movq r14[RBP], R14      ;
-                    movq r15[RBP], R15      ;
-                    movq  sp[RBP], RSP      ;
+                    bit_i = bit_base + u / 16;
+                    if (pool.freebits.test(bit_i))
+                    {
+                        assert ((p+u) >= pool.baseAddr);
+                        assert ((p+u) < pool.topAddr);
+                        List* list = cast(List*) (p + u);
+                        // avoid unnecesary writes (it really saves time)
+                        if (list.next != gc.free_list[bin])
+                            list.next = gc.free_list[bin];
+                        if (list.pool != pool)
+                            list.pool = pool;
+                        gc.free_list[bin] = list;
+                    }
                 }
             }
                 }
             }
-            else
-            {
-                static assert( false, "Architecture not supported." );
-            }
-        }
-        else
-        {
-        asm
-        {
-            pushad              ;
-            mov sp[EBP],ESP     ;
-        }
-        }
-        result = fullcollect(sp);
-        version (GNU)
-        {
-            // nothing to do
-        }
-        else version(LDC)
-        {
-            // nothing to do
-        }
-        else
-        {
-        asm
-        {
-            popad               ;
         }
         }
-        }
-        return result;
     }
 
     }
 
+    debug(COLLECT_PRINTF) printf("recovered pages = %d\n", recoveredpages);
+    debug(COLLECT_PRINTF) printf("\tfree'd %u bytes, %u pages from %u pools\n", freed, freedpages, gc.pools.length);
 
 
-    /**
-     *
-     */
-    size_t fullcollect(void *stackTop)
+    return freedpages + recoveredpages;
+}
+
+
+/**
+ *
+ */
+uint getAttr(Pool* pool, size_t bit_i)
+in
+{
+    assert( pool );
+}
+body
+{
+    uint attrs;
+    if (pool.finals.test(bit_i))
+        attrs |= BlkAttr.FINALIZE;
+    if (pool.noscan.test(bit_i))
+        attrs |= BlkAttr.NO_SCAN;
+//        if (pool.nomove.test(bit_i))
+//            attrs |= BlkAttr.NO_MOVE;
+    return attrs;
+}
+
+
+/**
+ *
+ */
+void setAttr(Pool* pool, size_t bit_i, uint mask)
+in
+{
+    assert( pool );
+}
+body
+{
+    if (mask & BlkAttr.FINALIZE)
     {
     {
-        size_t n;
-        Pool*  pool;
+        pool.finals.set(bit_i);
+    }
+    if (mask & BlkAttr.NO_SCAN)
+    {
+        pool.noscan.set(bit_i);
+    }
+//        if (mask & BlkAttr.NO_MOVE)
+//        {
+//            if (!pool.nomove.nbits)
+//                pool.nomove.alloc(pool.mark.nbits);
+//            pool.nomove.set(bit_i);
+//        }
+}
 
 
-        debug(COLLECT_PRINTF) printf("Gcx.fullcollect()\n");
 
 
-        thread_suspendAll();
+/**
+ *
+ */
+void clrAttr(Pool* pool, size_t bit_i, uint mask)
+in
+{
+    assert( pool );
+}
+body
+{
+    if (mask & BlkAttr.FINALIZE)
+        pool.finals.clear(bit_i);
+    if (mask & BlkAttr.NO_SCAN)
+        pool.noscan.clear(bit_i);
+//        if (mask & BlkAttr.NO_MOVE && pool.nomove.nbits)
+//            pool.nomove.clear(bit_i);
+}
 
 
-        p_cache = null;
-        size_cache = 0;
 
 
-        anychanges = 0;
-        for (n = 0; n < npools; n++)
-        {
-            pool = pooltable[n];
-            pool.mark.zero();
-            pool.scan.zero();
-            pool.freebits.zero();
-        }
+void disable_fork()
+{
+    // we have to disable both options, as eager_alloc assumes fork is enabled
+    opts.options.fork = false;
+    opts.options.eager_alloc = false;
+}
 
 
-        // Mark each free entry, so it doesn't get scanned
-        for (n = 0; n < B_PAGE; n++)
-        {
-            for (List *list = bucket[n]; list; list = list.next)
-            {
-                pool = findPool(list);
-                assert(pool);
-                pool.freebits.set(cast(size_t)(cast(byte*)list - pool.baseAddr) / 16);
-            }
-        }
 
 
-        for (n = 0; n < npools; n++)
-        {
-            pool = pooltable[n];
-            pool.mark.copy(&pool.freebits);
-        }
+void initialize()
+{
+    int dummy;
+    gc.stack_bottom = cast(char*)&dummy;
+    opts.parse(cstdlib.getenv("D_GC_OPTS"));
+    // If we are going to fork, make sure we have the needed OS support
+    if (opts.options.fork)
+        opts.options.fork = os.HAVE_SHARED && os.HAVE_FORK;
+    // Eager allocation is only possible when forking
+    if (!opts.options.fork)
+        opts.options.eager_alloc = false;
+    gc.lock = GCLock.classinfo;
+    gc.inited = 1;
+    setStackBottom(rt_stackBottom());
+    gc.stats = Stats(gc);
+    if (opts.options.prealloc_npools) {
+        size_t pages = round_up(opts.options.prealloc_psize, PAGESIZE);
+        for (size_t i = 0; i < opts.options.prealloc_npools; ++i)
+            newPool(pages);
+    }
+}
 
 
-        rt_scanStaticData( &mark );
 
 
-        if (!noStack)
-        {
-            // Scan stacks and registers for each paused thread
-            thread_scanAll( &mark, stackTop );
-        }
+//
+//
+//
+private void *malloc(size_t size, uint attrs, size_t* pm_bitmask)
+{
+    assert(size != 0);
 
 
-        // Scan roots[]
-        debug(COLLECT_PRINTF) printf("scan roots[]\n");
-        mark(roots, roots + nroots);
+    gc.stats.malloc_started(size, attrs, pm_bitmask);
+    scope (exit)
+        gc.stats.malloc_finished(p);
 
 
-        // Scan ranges[]
-        debug(COLLECT_PRINTF) printf("scan ranges[]\n");
-        //log++;
-        for (n = 0; n < nranges; n++)
-        {
-            debug(COLLECT_PRINTF) printf("\t%x .. %x\n", ranges[n].pbot, ranges[n].ptop);
-            mark(ranges[n].pbot, ranges[n].ptop);
-        }
-        //log--;
+    void *p = null;
+    Bins bin;
+
+    if (opts.options.sentinel)
+        size += SENTINEL_EXTRA;
+
+    bool has_pm = has_pointermap(attrs);
+    if (has_pm)
+        size += size_t.sizeof;
+
+    // Compute size bin
+    // Cache previous binsize lookup - Dave Fladebo.
+    static size_t lastsize = -1;
+    static Bins lastbin;
+    if (size == lastsize)
+        bin = lastbin;
+    else
+    {
+        bin = findBin(size);
+        lastsize = size;
+        lastbin = bin;
+    }
 
 
-        debug(COLLECT_PRINTF) printf("\tscan heap\n");
-        while (anychanges)
+    Pool* pool = void;
+    size_t bit_i = void;
+    size_t capacity = void; // to figure out where to store the bitmask
+    if (bin < B_PAGE)
+    {
+        p = gc.free_list[bin];
+        if (p is null)
         {
         {
-            anychanges = 0;
-            for (n = 0; n < npools; n++)
+            if (!allocPage(bin) && !gc.disabled)   // try to find a new page
             {
             {
-                uint *bbase;
-                uint *b;
-                uint *btop;
-
-                pool = pooltable[n];
-
-                bbase = pool.scan.base();
-                btop = bbase + pool.scan.nwords;
-                for (b = bbase; b < btop;)
-                {   Bins   bin;
-                    size_t pn;
-                    size_t u;
-                    size_t bitm;
-                    byte*  o;
-
-                    bitm = *b;
-                    if (!bitm)
-                    {   b++;
-                        continue;
-                    }
-                    *b = 0;
-
-                    o = pool.baseAddr + (b - bbase) * 32 * 16;
-                    if (!(bitm & 0xFFFF))
-                    {
-                        bitm >>= 16;
-                        o += 16 * 16;
-                    }
-                    for (; bitm; o += 16, bitm >>= 1)
+                if (!thread_needLock())
+                {
+                    /* Then we haven't locked it yet. Be sure
+                     * and gc.lock for a collection, since a finalizer
+                     * may start a new thread.
+                     */
+                    synchronized (gc.lock)
                     {
                     {
-                        if (!(bitm & 1))
-                            continue;
-
-                        pn = cast(size_t)(o - pool.baseAddr) / PAGESIZE;
-                        bin = cast(Bins)pool.pagetable[pn];
-                        if (bin < B_PAGE)
-                        {
-                            mark(o, o + binsize[bin]);
-                        }
-                        else if (bin == B_PAGE || bin == B_PAGEPLUS)
-                        {
-                            if (bin == B_PAGEPLUS)
-                            {
-                                while (pool.pagetable[pn - 1] != B_PAGE)
-                                    pn--;
-                            }
-                            u = 1;
-                            while (pn + u < pool.npages && pool.pagetable[pn + u] == B_PAGEPLUS)
-                                u++;
-                            mark(o, o + u * PAGESIZE);
-                        }
+                        fullcollectshell();
                     }
                 }
                     }
                 }
+                else if (!fullcollectshell())       // collect to find a new page
+                {
+                    //newPool(1);
+                }
             }
             }
-        }
-
-        thread_resumeAll();
-
-        // Free up everything not marked
-        debug(COLLECT_PRINTF) printf("\tfree'ing\n");
-        size_t freedpages = 0;
-        size_t freed = 0;
-        for (n = 0; n < npools; n++)
-        {   size_t pn;
-            uint*  bbase;
-
-            pool = pooltable[n];
-            bbase = pool.mark.base();
-            for (pn = 0; pn < pool.npages; pn++, bbase += PAGESIZE / (32 * 16))
+            if (!gc.free_list[bin] && !allocPage(bin))
             {
             {
-                Bins bin = cast(Bins)pool.pagetable[pn];
-
-                if (bin < B_PAGE)
-                {   byte* p;
-                    byte* ptop;
-                    size_t biti;
-                    size_t bitstride;
-                    auto   size = binsize[bin];
+                newPool(1);         // allocate new pool to find a new page
+                // TODO: hint allocPage() to use the pool we just created
+                int result = allocPage(bin);
+                if (!result)
+                    onOutOfMemoryError();
+            }
+            p = gc.free_list[bin];
+        }
+        capacity = binsize[bin];
+
+        // Return next item from free list
+        List* list = cast(List*) p;
+        assert ((cast(byte*)list) >= list.pool.baseAddr);
+        assert ((cast(byte*)list) < list.pool.topAddr);
+        gc.free_list[bin] = list.next;
+        pool = list.pool;
+        bit_i = (p - pool.baseAddr) / 16;
+        assert (pool.freebits.test(bit_i));
+        pool.freebits.clear(bit_i);
+        if (!(attrs & BlkAttr.NO_SCAN))
+            memset(p + size, 0, capacity - size);
+        if (opts.options.mem_stomp)
+            memset(p, 0xF0, size);
+    }
+    else
+    {
+        size_t pn;
+        size_t npages = round_up(size, PAGESIZE);
+        p = bigAlloc(npages, pool, &pn);
+        if (!p)
+            onOutOfMemoryError();
+        assert (pool !is null);
 
 
-                    p = pool.baseAddr + pn * PAGESIZE;
-                    ptop = p + PAGESIZE;
-                    biti = pn * (PAGESIZE/16);
-                    bitstride = size / 16;
+        capacity = npages * PAGESIZE;
+        bit_i = pn * (PAGESIZE / 16);
+        pool.freebits.clear(bit_i);
+        pool.pagetable[pn] = B_PAGE;
+        if (npages > 1)
+            memset(&pool.pagetable[pn + 1], B_PAGEPLUS, npages - 1);
+        p = pool.baseAddr + pn * PAGESIZE;
+        memset(cast(char *)p + size, 0, npages * PAGESIZE - size);
+        if (opts.options.mem_stomp)
+            memset(p, 0xF1, size);
 
 
-    version(none) // BUG: doesn't work because freebits() must also be cleared
-    {
-                    // If free'd entire page
-                    if (bbase[0] == 0 && bbase[1] == 0 && bbase[2] == 0 && bbase[3] == 0 &&
-                        bbase[4] == 0 && bbase[5] == 0 && bbase[6] == 0 && bbase[7] == 0)
-                    {
-                        for (; p < ptop; p += size, biti += bitstride)
-                        {
-                            if (pool.finals.nbits && pool.finals.testClear(biti))
-                                rt_finalize(cast(List *)sentinel_add(p), false/*noStack > 0*/);
-                            gcx.clrBits(pool, biti, BlkAttr.ALL_BITS);
+    }
 
 
-                            List *list = cast(List *)p;
-                            //debug(PRINTF) printf("\tcollecting %x\n", list);
-                            log_free(sentinel_add(list));
+    // Store the bit mask AFTER SENTINEL_POST
+    // TODO: store it BEFORE, so the bitmask is protected too
+    if (has_pm) {
+        auto end_of_blk = cast(size_t**)(p + capacity - size_t.sizeof);
+        *end_of_blk = pm_bitmask;
+        size -= size_t.sizeof;
+    }
 
 
-                            debug (MEMSTOMP) .memset(p, 0xF3, size);
-                        }
-                        pool.pagetable[pn] = B_FREE;
-                        freed += PAGESIZE;
-                        //debug(PRINTF) printf("freeing entire page %d\n", pn);
-                        continue;
-                    }
+    if (opts.options.sentinel) {
+        size -= SENTINEL_EXTRA;
+        p = sentinel_add(p);
+        sentinel_init(p, size);
     }
     }
-                    for (; p < ptop; p += size, biti += bitstride)
-                    {
-                        if (!pool.mark.test(biti))
-                        {
-                            sentinel_Invariant(sentinel_add(p));
 
 
-                            pool.freebits.set(biti);
-                            if (pool.finals.nbits && pool.finals.testClear(biti))
-                                rt_finalize(cast(List *)sentinel_add(p), false/*noStack > 0*/);
-                            clrBits(pool, biti, BlkAttr.ALL_BITS);
+    if (attrs) {
+        setAttr(pool, bit_i, attrs);
+        assert (bin >= B_PAGE || !pool.freebits.test(bit_i));
+    }
 
 
-                            List *list = cast(List *)p;
-                            debug(PRINTF) printf("\tcollecting %x\n", list);
-                            log_free(sentinel_add(list));
+    return p;
+}
 
 
-                            debug (MEMSTOMP) .memset(p, 0xF3, size);
 
 
-                            freed += size;
-                        }
-                    }
-                }
-                else if (bin == B_PAGE)
-                {   size_t biti = pn * (PAGESIZE / 16);
+//
+//
+//
+private void *calloc(size_t size, uint attrs, size_t* pm_bitmask)
+{
+    assert(size != 0);
 
 
-                    if (!pool.mark.test(biti))
-                    {   byte *p = pool.baseAddr + pn * PAGESIZE;
+    void *p = malloc(size, attrs, pm_bitmask);
+    memset(p, 0, size);
+    return p;
+}
 
 
-                        sentinel_Invariant(sentinel_add(p));
-                        if (pool.finals.nbits && pool.finals.testClear(biti))
-                            rt_finalize(sentinel_add(p), false/*noStack > 0*/);
-                        clrBits(pool, biti, BlkAttr.ALL_BITS);
 
 
-                        debug(COLLECT_PRINTF) printf("\tcollecting big %x\n", p);
-                        log_free(sentinel_add(p));
-                        pool.pagetable[pn] = B_FREE;
-                        freedpages++;
-                        debug (MEMSTOMP) .memset(p, 0xF3, PAGESIZE);
-                        while (pn + 1 < pool.npages && pool.pagetable[pn + 1] == B_PAGEPLUS)
-                        {
-                            pn++;
-                            pool.pagetable[pn] = B_FREE;
-                            freedpages++;
+//
+//
+//
+private void *realloc(void *p, size_t size, uint attrs,
+        size_t* pm_bitmask)
+{
+    if (!size)
+    {
+        if (p)
+        {
+            free(p);
+            p = null;
+        }
+    }
+    else if (!p)
+    {
+        p = malloc(size, attrs, pm_bitmask);
+    }
+    else
+    {
+        Pool* pool = findPool(p);
+        if (pool is null)
+            return null;
 
 
-                            debug (MEMSTOMP)
-                            {   p += PAGESIZE;
-                                .memset(p, 0xF3, PAGESIZE);
-                            }
-                        }
-                    }
-                }
+        // Set or retrieve attributes as appropriate
+        auto bit_i = cast(size_t)(p - pool.baseAddr) / 16;
+        if (attrs) {
+            clrAttr(pool, bit_i, BlkAttr.ALL_BITS);
+            setAttr(pool, bit_i, attrs);
+        }
+        else
+            attrs = getAttr(pool, bit_i);
+
+        void* blk_base_addr = pool.findBase(p);
+        size_t blk_size = pool.findSize(p);
+        bool has_pm = has_pointermap(attrs);
+        size_t pm_bitmask_size = 0;
+        if (has_pm) {
+            pm_bitmask_size = size_t.sizeof;
+            // Retrieve pointer map bit mask if appropriate
+            if (pm_bitmask is null) {
+                auto end_of_blk = cast(size_t**)(blk_base_addr +
+                        blk_size - size_t.sizeof);
+                pm_bitmask = *end_of_blk;
             }
         }
 
             }
         }
 
-        // Zero buckets
-        bucket[] = null;
-
-        // Free complete pages, rebuild free list
-        debug(COLLECT_PRINTF) printf("\tfree complete pages\n");
-        size_t recoveredpages = 0;
-        for (n = 0; n < npools; n++)
-        {   size_t pn;
-
-            pool = pooltable[n];
-            for (pn = 0; pn < pool.npages; pn++)
+        if (opts.options.sentinel)
+        {
+            sentinel_Invariant(p);
+            size_t sentinel_stored_size = *sentinel_size(p);
+            if (sentinel_stored_size != size)
             {
             {
-                Bins   bin = cast(Bins)pool.pagetable[pn];
-                size_t biti;
-                size_t u;
+                void* p2 = malloc(size, attrs, pm_bitmask);
+                if (sentinel_stored_size < size)
+                    size = sentinel_stored_size;
+                cstring.memcpy(p2, p, size);
+                p = p2;
+            }
+        }
+        else
+        {
+            size += pm_bitmask_size;
+            if (blk_size >= PAGESIZE && size >= PAGESIZE)
+            {
+                auto psz = blk_size / PAGESIZE;
+                auto newsz = round_up(size, PAGESIZE);
+                if (newsz == psz)
+                    return p;
+
+                auto pagenum = (p - pool.baseAddr) / PAGESIZE;
 
 
-                if (bin < B_PAGE)
+                if (newsz < psz)
                 {
                 {
-                    size_t size = binsize[bin];
-                    size_t bitstride = size / 16;
-                    size_t bitbase = pn * (PAGESIZE / 16);
-                    size_t bittop = bitbase + (PAGESIZE / 16);
-                    byte*  p;
-
-                    biti = bitbase;
-                    for (biti = bitbase; biti < bittop; biti += bitstride)
-                    {   if (!pool.freebits.test(biti))
-                            goto Lnotfree;
+                    // Shrink in place
+                    if (opts.options.mem_stomp)
+                        memset(p + size - pm_bitmask_size, 0xF2,
+                                blk_size - size - pm_bitmask_size);
+                    pool.freePages(pagenum + newsz, psz - newsz);
+                    auto new_blk_size = (PAGESIZE * newsz);
+                    // update the size cache, assuming that is very likely the
+                    // size of this block will be queried in the near future
+                    pool.update_cache(p, new_blk_size);
+                    if (has_pm) {
+                        auto end_of_blk = cast(size_t**)(blk_base_addr +
+                                new_blk_size - pm_bitmask_size);
+                        *end_of_blk = pm_bitmask;
                     }
                     }
-                    pool.pagetable[pn] = B_FREE;
-                    recoveredpages++;
-                    continue;
-
-                 Lnotfree:
-                    p = pool.baseAddr + pn * PAGESIZE;
-                    for (u = 0; u < PAGESIZE; u += size)
-                    {   biti = bitbase + u / 16;
-                        if (pool.freebits.test(biti))
-                        {   List *list;
-
-                            list = cast(List *)(p + u);
-                            if (list.next != bucket[bin])       // avoid unnecessary writes
-                                list.next = bucket[bin];
-                            bucket[bin] = list;
+                    return p;
+                }
+                else if (pagenum + newsz <= pool.npages)
+                {
+                    // Attempt to expand in place
+                    for (size_t i = pagenum + psz; 1;)
+                    {
+                        if (i == pagenum + newsz)
+                        {
+                            if (opts.options.mem_stomp)
+                                memset(p + blk_size - pm_bitmask_size,
+                                        0xF0, size - blk_size
+                                        - pm_bitmask_size);
+                            memset(pool.pagetable + pagenum +
+                                    psz, B_PAGEPLUS, newsz - psz);
+                            auto new_blk_size = (PAGESIZE * newsz);
+                            // update the size cache, assuming that is very
+                            // likely the size of this block will be queried in
+                            // the near future
+                            pool.update_cache(p, new_blk_size);
+                            if (has_pm) {
+                                auto end_of_blk = cast(size_t**)(
+                                        blk_base_addr + new_blk_size -
+                                        pm_bitmask_size);
+                                *end_of_blk = pm_bitmask;
+                            }
+                            return p;
+                        }
+                        if (i == pool.npages)
+                        {
+                            break;
                         }
                         }
+                        if (pool.pagetable[i] != B_FREE)
+                            break;
+                        i++;
                     }
                 }
             }
                     }
                 }
             }
+            // if new size is bigger or less than half
+            if (blk_size < size || blk_size > size * 2)
+            {
+                size -= pm_bitmask_size;
+                blk_size -= pm_bitmask_size;
+                void* p2 = malloc(size, attrs, pm_bitmask);
+                if (blk_size < size)
+                    size = blk_size;
+                cstring.memcpy(p2, p, size);
+                p = p2;
+            }
         }
         }
+    }
+    return p;
+}
 
 
-        debug(COLLECT_PRINTF) printf("recovered pages = %d\n", recoveredpages);
-        debug(COLLECT_PRINTF) printf("\tfree'd %u bytes, %u pages from %u pools\n", freed, freedpages, npools);
 
 
-        return freedpages + recoveredpages;
-    }
+/**
+ * Attempt to in-place enlarge the memory block pointed to by p by at least
+ * min_size beyond its current capacity, up to a maximum of max_size.  This
+ * does not attempt to move the memory block (like realloc() does).
+ *
+ * Returns:
+ *  0 if could not extend p,
+ *  total size of entire memory block if successful.
+ */
+private size_t extend(void* p, size_t minsize, size_t maxsize)
+in
+{
+    assert( minsize <= maxsize );
+}
+body
+{
+    if (opts.options.sentinel)
+        return 0;
 
 
+    Pool* pool = findPool(p);
+    if (pool is null)
+        return 0;
 
 
-    /**
-     *
-     */
-    uint getBits(Pool* pool, size_t biti)
-    in
-    {
-        assert( pool );
-    }
-    body
-    {
-        uint bits;
+    // Retrieve attributes
+    auto bit_i = cast(size_t)(p - pool.baseAddr) / 16;
+    uint attrs = getAttr(pool, bit_i);
 
 
-        if (pool.finals.nbits &&
-            pool.finals.test(biti))
-            bits |= BlkAttr.FINALIZE;
-        if (pool.noscan.test(biti))
-            bits |= BlkAttr.NO_SCAN;
-//        if (pool.nomove.nbits &&
-//            pool.nomove.test(biti))
-//            bits |= BlkAttr.NO_MOVE;
-        return bits;
+    void* blk_base_addr = pool.findBase(p);
+    size_t blk_size = pool.findSize(p);
+    bool has_pm = has_pointermap(attrs);
+    size_t* pm_bitmask = null;
+    size_t pm_bitmask_size = 0;
+    if (has_pm) {
+        pm_bitmask_size = size_t.sizeof;
+        // Retrieve pointer map bit mask
+        auto end_of_blk = cast(size_t**)(blk_base_addr +
+                blk_size - size_t.sizeof);
+        pm_bitmask = *end_of_blk;
+
+        minsize += size_t.sizeof;
+        maxsize += size_t.sizeof;
     }
 
     }
 
+    if (blk_size < PAGESIZE)
+        return 0; // cannot extend buckets
 
 
-    /**
-     *
-     */
-    void setBits(Pool* pool, size_t biti, uint mask)
-    in
-    {
-        assert( pool );
-    }
-    body
+    auto psz = blk_size / PAGESIZE;
+    auto minsz = round_up(minsize, PAGESIZE);
+    auto maxsz = round_up(maxsize, PAGESIZE);
+
+    auto pagenum = (p - pool.baseAddr) / PAGESIZE;
+
+    size_t sz;
+    for (sz = 0; sz < maxsz; sz++)
     {
     {
-        if (mask & BlkAttr.FINALIZE)
-        {
-            if (!pool.finals.nbits)
-                pool.finals.alloc(pool.mark.nbits);
-            pool.finals.set(biti);
-        }
-        if (mask & BlkAttr.NO_SCAN)
+        auto i = pagenum + psz + sz;
+        if (i == pool.npages)
+            break;
+        if (pool.pagetable[i] != B_FREE)
         {
         {
-            pool.noscan.set(biti);
+            if (sz < minsz)
+                return 0;
+            break;
         }
         }
-//        if (mask & BlkAttr.NO_MOVE)
-//        {
-//            if (!pool.nomove.nbits)
-//                pool.nomove.alloc(pool.mark.nbits);
-//            pool.nomove.set(biti);
-//        }
     }
     }
+    if (sz < minsz)
+        return 0;
+
+    size_t new_size = (psz + sz) * PAGESIZE;
+
+    if (opts.options.mem_stomp)
+        memset(p + blk_size - pm_bitmask_size, 0xF0,
+                new_size - blk_size - pm_bitmask_size);
+    memset(pool.pagetable + pagenum + psz, B_PAGEPLUS, sz);
+    gc.p_cache = null;
+    gc.size_cache = 0;
+    // update the size cache, assuming that is very likely the size of this
+    // block will be queried in the near future
+    pool.update_cache(p, new_size);
+
+    if (has_pm) {
+        new_size -= size_t.sizeof;
+        auto end_of_blk = cast(size_t**)(blk_base_addr + new_size);
+        *end_of_blk = pm_bitmask;
+    }
+    return new_size;
+}
 
 
 
 
-    /**
-     *
-     */
-    void clrBits(Pool* pool, size_t biti, uint mask)
-    in
-    {
-        assert( pool );
+//
+//
+//
+private void free(void *p)
+{
+    assert (p);
+
+    Pool*  pool;
+    size_t pagenum;
+    Bins   bin;
+    size_t bit_i;
+
+    // Find which page it is in
+    pool = findPool(p);
+    if (!pool)                              // if not one of ours
+        return;                             // ignore
+    if (opts.options.sentinel) {
+        sentinel_Invariant(p);
+        p = sentinel_sub(p);
+    }
+    pagenum = cast(size_t)(p - pool.baseAddr) / PAGESIZE;
+    bit_i = cast(size_t)(p - pool.baseAddr) / 16;
+    clrAttr(pool, bit_i, BlkAttr.ALL_BITS);
+
+    bin = cast(Bins)pool.pagetable[pagenum];
+    if (bin == B_PAGE)              // if large alloc
+    {
+        // Free pages
+        size_t npages = 1;
+        size_t n = pagenum;
+        pool.freebits.set_group(bit_i, PAGESIZE / 16);
+        while (++n < pool.npages && pool.pagetable[n] == B_PAGEPLUS)
+            npages++;
+        if (opts.options.mem_stomp)
+            memset(p, 0xF2, npages * PAGESIZE);
+        pool.freePages(pagenum, npages);
+        // just in case we were caching this pointer
+        pool.clear_cache(p);
     }
     }
-    body
+    else
     {
     {
-        if (mask & BlkAttr.FINALIZE && pool.finals.nbits)
-            pool.finals.clear(biti);
-        if (mask & BlkAttr.NO_SCAN)
-            pool.noscan.clear(biti);
-//        if (mask & BlkAttr.NO_MOVE && pool.nomove.nbits)
-//            pool.nomove.clear(biti);
+        // Add to free list
+        List* list = cast(List*) p;
+
+        if (opts.options.mem_stomp)
+            memset(p, 0xF2, binsize[bin]);
+
+        list.next = gc.free_list[bin];
+        list.pool = pool;
+        gc.free_list[bin] = list;
+        pool.freebits.set(bit_i);
     }
     }
+}
+
 
 
+/**
+ * Determine the allocated size of pointer p.  If p is an interior pointer
+ * or not a gc allocated pointer, return 0.
+ */
+private size_t sizeOf(void *p)
+{
+    assert (p);
 
 
-    /***** Leak Detector ******/
+    if (opts.options.sentinel)
+        p = sentinel_sub(p);
 
 
+    Pool* pool = findPool(p);
+    if (pool is null)
+        return 0;
 
 
-    debug (LOGGING)
-    {
-        LogArray current;
-        LogArray prev;
+    auto biti = cast(size_t)(p - pool.baseAddr) / 16;
+    uint attrs = getAttr(pool, biti);
 
 
+    size_t size = pool.findSize(p);
+    size_t pm_bitmask_size = 0;
+    if (has_pointermap(attrs))
+        pm_bitmask_size = size_t.sizeof;
 
 
-        void log_init()
-        {
-            //debug(PRINTF) printf("+log_init()\n");
-            current.reserve(1000);
-            prev.reserve(1000);
-            //debug(PRINTF) printf("-log_init()\n");
-        }
+    if (opts.options.sentinel) {
+        // Check for interior pointer
+        // This depends on:
+        // 1) size is a power of 2 for less than PAGESIZE values
+        // 2) base of memory pool is aligned on PAGESIZE boundary
+        if (cast(size_t)p & (size - 1) & (PAGESIZE - 1))
+            return 0;
+        return size - SENTINEL_EXTRA - pm_bitmask_size;
+    }
+    else {
+        if (p == gc.p_cache)
+            return gc.size_cache;
 
 
+        // Check for interior pointer
+        // This depends on:
+        // 1) size is a power of 2 for less than PAGESIZE values
+        // 2) base of memory pool is aligned on PAGESIZE boundary
+        if (cast(size_t)p & (size - 1) & (PAGESIZE - 1))
+            return 0;
 
 
-        void log_malloc(void *p, size_t size)
-        {
-            //debug(PRINTF) printf("+log_malloc(p = %x, size = %d)\n", p, size);
-            Log log;
+        gc.p_cache = p;
+        gc.size_cache = size - pm_bitmask_size;
 
 
-            log.p = p;
-            log.size = size;
-            log.line = GC.line;
-            log.file = GC.file;
-            log.parent = null;
+        return gc.size_cache;
+    }
+}
 
 
-            GC.line = 0;
-            GC.file = null;
 
 
-            current.push(log);
-            //debug(PRINTF) printf("-log_malloc()\n");
-        }
+/**
+ * Verify that pointer p:
+ *  1) belongs to this memory pool
+ *  2) points to the start of an allocated piece of memory
+ *  3) is not on a free list
+ */
+private void checkNoSync(void *p)
+{
+    assert(p);
 
 
+    if (opts.options.sentinel)
+        sentinel_Invariant(p);
+    debug (PTRCHECK)
+    {
+        Pool*  pool;
+        size_t pagenum;
+        Bins   bin;
+        size_t size;
 
 
-        void log_free(void *p)
-        {
-            //debug(PRINTF) printf("+log_free(%x)\n", p);
-            size_t i;
+        if (opts.options.sentinel)
+            p = sentinel_sub(p);
+        pool = findPool(p);
+        assert(pool);
+        pagenum = cast(size_t)(p - pool.baseAddr) / PAGESIZE;
+        bin = cast(Bins)pool.pagetable[pagenum];
+        assert(bin <= B_PAGE);
+        size = binsize[bin];
+        assert((cast(size_t)p & (size - 1)) == 0);
 
 
-            i = current.find(p);
-            if (i == OPFAIL)
+        debug (PTRCHECK2)
+        {
+            if (bin < B_PAGE)
             {
             {
-                debug(PRINTF) printf("free'ing unallocated memory %x\n", p);
+                // Check that p is not on a free list
+                for (List* list = gc.free_list[bin]; list; list = list.next)
+                {
+                    assert(cast(void*)list != p);
+                }
             }
             }
-            else
-                current.remove(i);
-            //debug(PRINTF) printf("-log_free()\n");
         }
         }
+    }
+}
 
 
 
 
-        void log_collect()
+//
+//
+//
+private void setStackBottom(void *p)
+{
+    version (STACKGROWSDOWN)
+    {
+        //p = (void *)((uint *)p + 4);
+        if (p > gc.stack_bottom)
+        {
+            gc.stack_bottom = p;
+        }
+    }
+    else
+    {
+        //p = (void *)((uint *)p - 4);
+        if (p < gc.stack_bottom)
         {
         {
-            //debug(PRINTF) printf("+log_collect()\n");
-            // Print everything in current that is not in prev
+            gc.stack_bottom = cast(char*)p;
+        }
+    }
+}
 
 
-            debug(PRINTF) printf("New pointers this cycle: --------------------------------\n");
-            size_t used = 0;
-            for (size_t i = 0; i < current.dim; i++)
-            {
-                size_t j;
 
 
-                j = prev.find(current.data[i].p);
-                if (j == OPFAIL)
-                    current.data[i].print();
-                else
-                    used++;
-            }
+/**
+ * Retrieve statistics about garbage collection.
+ * Useful for debugging and tuning.
+ */
+private GCStats getStats()
+{
+    GCStats stats;
+    size_t psize = 0;
+    size_t usize = 0;
+    size_t flsize = 0;
 
 
-            debug(PRINTF) printf("All roots this cycle: --------------------------------\n");
-            for (size_t i = 0; i < current.dim; i++)
-            {
-                void *p;
-                size_t j;
+    size_t n;
+    size_t bsize = 0;
 
 
-                p = current.data[i].p;
-                if (!findPool(current.data[i].parent))
-                {
-                    j = prev.find(current.data[i].p);
-                    if (j == OPFAIL)
-                        debug(PRINTF) printf("N");
-                    else
-                        debug(PRINTF) printf(" ");;
-                    current.data[i].print();
-                }
-            }
+    for (n = 0; n < gc.pools.length; n++)
+    {
+        Pool* pool = gc.pools[n];
+        psize += pool.npages * PAGESIZE;
+        for (size_t j = 0; j < pool.npages; j++)
+        {
+            Bins bin = cast(Bins)pool.pagetable[j];
+            if (bin == B_FREE)
+                stats.freeblocks++;
+            else if (bin == B_PAGE)
+                stats.pageblocks++;
+            else if (bin < B_PAGE)
+                bsize += PAGESIZE;
+        }
+    }
 
 
-            debug(PRINTF) printf("Used = %d-------------------------------------------------\n", used);
-            prev.copy(&current);
+    for (n = 0; n < B_PAGE; n++)
+    {
+        for (List* list = gc.free_list[n]; list; list = list.next)
+            flsize += binsize[n];
+    }
 
 
-            debug(PRINTF) printf("-log_collect()\n");
-        }
+    usize = bsize - flsize;
 
 
+    stats.poolsize = psize;
+    stats.usedsize = bsize - flsize;
+    stats.freelistsize = flsize;
+    return stats;
+}
 
 
-        void log_parent(void *p, void *parent)
-        {
-            //debug(PRINTF) printf("+log_parent()\n");
-            size_t i;
+/******************* weak-reference support *********************/
 
 
-            i = current.find(p);
-            if (i == OPFAIL)
-            {
-                debug(PRINTF) printf("parent'ing unallocated memory %x, parent = %x\n", p, parent);
-                Pool *pool;
-                pool = findPool(p);
-                assert(pool);
-                size_t offset = cast(size_t)(p - pool.baseAddr);
-                size_t biti;
-                size_t pn = offset / PAGESIZE;
-                Bins bin = cast(Bins)pool.pagetable[pn];
-                biti = (offset & notbinsize[bin]);
-                debug(PRINTF) printf("\tbin = %d, offset = x%x, biti = x%x\n", bin, offset, biti);
-            }
-            else
-            {
-                current.data[i].parent = parent;
-            }
-            //debug(PRINTF) printf("-log_parent()\n");
-        }
+private struct WeakPointer
+{
+    Object reference;
+
+    void ondestroy(Object r)
+    {
+        assert(r is reference);
+        // lock for memory consistency (parallel readers)
+        // also ensures that weakpointerDestroy can be called while another
+        // thread is freeing the reference with "delete"
+        return locked!(void, () {
+            reference = null;
+        })();
+    }
+}
 
 
+/**
+ * Create a weak pointer to the given object.
+ * Returns a pointer to an opaque struct allocated in C memory.
+ */
+void* weakpointerCreate( Object r )
+{
+    if (r)
+    {
+        // must be allocated in C memory
+        // 1. to hide the reference from the GC
+        // 2. the GC doesn't scan delegates added by rt_attachDisposeEvent
+        //    for references
+        auto wp = cast(WeakPointer*)(cstdlib.malloc(WeakPointer.sizeof));
+        if (!wp)
+            onOutOfMemoryError();
+        wp.reference = r;
+        rt_attachDisposeEvent(r, &wp.ondestroy);
+        return wp;
     }
     }
-    else
+    return null;
+}
+
+/**
+ * Destroy a weak pointer returned by weakpointerCreate().
+ * If null is passed, nothing happens.
+ */
+void weakpointerDestroy( void* p )
+{
+    if (p)
     {
     {
-        void log_init() { }
-        void log_malloc(void *p, size_t size) { }
-        void log_free(void *p) { }
-        void log_collect() { }
-        void log_parent(void *p, void *parent) { }
+        auto wp = cast(WeakPointer*)p;
+        // must be extra careful about the GC or parallel threads
+        // finalizing the reference at the same time
+        return locked!(void, () {
+            if (wp.reference)
+                rt_detachDisposeEvent(wp.reference, &wp.ondestroy);
+        })();
+        cstdlib.free(wp);
     }
 }
 
     }
 }
 
+/**
+ * Query a weak pointer and return either the object passed to
+ * weakpointerCreate, or null if it was free'd in the meantime.
+ * If null is passed, null is returned.
+ */
+Object weakpointerGet( void* p )
+{
+    if (p)
+    {
+        // NOTE: could avoid the lock by using Fawzi style GC counters but
+        // that'd require core.sync.Atomic and lots of care about memory
+        // consistency it's an optional optimization see
+        // http://dsource.org/projects/tango/browser/trunk/user/tango/core/Lifetime.d?rev=5100#L158
+        return locked!(Object, () {
+            return (cast(WeakPointer*)p).reference;
+        })();
+        }
+}
+
 
 /* ============================ Pool  =============================== */
 
 
 /* ============================ Pool  =============================== */
 
@@ -2779,48 +1933,73 @@ struct Pool
 {
     byte* baseAddr;
     byte* topAddr;
 {
     byte* baseAddr;
     byte* topAddr;
-    GCBits mark;        // entries already scanned, or should not be scanned
-    GCBits scan;        // entries that need to be scanned
-    GCBits freebits;    // entries that are on the free list
-    GCBits finals;      // entries that need finalizer run on them
-    GCBits noscan;      // entries that should not be scanned
+    GCBits mark;     // entries already scanned, or should not be scanned
+    GCBits scan;     // entries that need to be scanned
+    GCBits freebits; // entries that are on the free list
+    GCBits finals;   // entries that need finalizer run on them
+    GCBits noscan;   // entries that should not be scanned
 
     size_t npages;
     ubyte* pagetable;
 
 
     size_t npages;
     ubyte* pagetable;
 
+    /// Cache for findSize()
+    size_t cached_size;
+    void* cached_ptr;
 
 
-    void initialize(size_t npages)
+    void clear_cache(void* ptr = null)
+    {
+        if (ptr is null || ptr is this.cached_ptr) {
+            this.cached_ptr = null;
+            this.cached_size = 0;
+        }
+    }
+
+    void update_cache(void* ptr, size_t size)
     {
     {
-        size_t poolsize;
+        this.cached_ptr = ptr;
+        this.cached_size = size;
+    }
 
 
-        //debug(PRINTF) printf("Pool::Pool(%u)\n", npages);
-        poolsize = npages * PAGESIZE;
+    void initialize(size_t npages)
+    {
+        size_t poolsize = npages * PAGESIZE;
         assert(poolsize >= POOLSIZE);
         assert(poolsize >= POOLSIZE);
-        baseAddr = cast(byte *)os_mem_map(poolsize);
+        baseAddr = cast(byte *) os.alloc(poolsize);
 
         // Some of the code depends on page alignment of memory pools
         assert((cast(size_t)baseAddr & (PAGESIZE - 1)) == 0);
 
         if (!baseAddr)
         {
 
         // Some of the code depends on page alignment of memory pools
         assert((cast(size_t)baseAddr & (PAGESIZE - 1)) == 0);
 
         if (!baseAddr)
         {
-            //debug(PRINTF) printf("GC fail: poolsize = x%x, errno = %d\n", poolsize, errno);
-            //debug(PRINTF) printf("message = '%s'\n", sys_errlist[errno]);
-
             npages = 0;
             poolsize = 0;
         }
             npages = 0;
             poolsize = 0;
         }
-        //assert(baseAddr);
         topAddr = baseAddr + poolsize;
 
         topAddr = baseAddr + poolsize;
 
-        mark.alloc(cast(size_t)poolsize / 16);
-        scan.alloc(cast(size_t)poolsize / 16);
-        freebits.alloc(cast(size_t)poolsize / 16);
-        noscan.alloc(cast(size_t)poolsize / 16);
+        size_t nbits = cast(size_t)poolsize / 16;
+
+        // if the GC will run in parallel in a fork()ed process, we need to
+        // share the mark bits
+        os.Vis vis = os.Vis.PRIV;
+        if (opts.options.fork)
+            vis = os.Vis.SHARED;
+        mark.alloc(nbits, vis); // shared between mark and sweep
+        freebits.alloc(nbits); // not used by the mark phase
+        scan.alloc(nbits); // only used in the mark phase
+        finals.alloc(nbits); // not used by the mark phase
+        noscan.alloc(nbits); // mark phase *MUST* have a snapshot
+
+        // all is free when we start
+        freebits.set_all();
+
+        // avoid accidental sweeping of new pools while using eager allocation
+        if (gc.mark_proc_pid)
+            mark.set_all();
 
 
-        pagetable = cast(ubyte*) .malloc(npages);
+        pagetable = cast(ubyte*) cstdlib.malloc(npages);
         if (!pagetable)
             onOutOfMemoryError();
         if (!pagetable)
             onOutOfMemoryError();
-       .memset(pagetable, B_FREE, npages);
+        memset(pagetable, B_FREE, npages);
 
         this.npages = npages;
     }
 
         this.npages = npages;
     }
@@ -2834,7 +2013,7 @@ struct Pool
 
             if (npages)
             {
 
             if (npages)
             {
-                result = os_mem_unmap(baseAddr, npages * PAGESIZE);
+                result = os.dealloc(baseAddr, npages * PAGESIZE);
                 assert(result);
                 npages = 0;
             }
                 assert(result);
                 npages = 0;
             }
@@ -2842,18 +2021,25 @@ struct Pool
             baseAddr = null;
             topAddr = null;
         }
             baseAddr = null;
             topAddr = null;
         }
+        // See Gcx.Dtor() for the rationale of the null check.
         if (pagetable)
         if (pagetable)
-            .free(pagetable);
+            cstdlib.free(pagetable);
 
 
-        mark.Dtor();
-        scan.Dtor();
+        os.Vis vis = os.Vis.PRIV;
+        if (opts.options.fork)
+            vis = os.Vis.SHARED;
+        mark.Dtor(vis);
         freebits.Dtor();
         freebits.Dtor();
+        scan.Dtor();
         finals.Dtor();
         noscan.Dtor();
     }
 
 
         finals.Dtor();
         noscan.Dtor();
     }
 
 
-    void Invariant() { }
+    bool Invariant()
+    {
+        return true;
+    }
 
 
     invariant
 
 
     invariant
@@ -2872,8 +2058,8 @@ struct Pool
         }
 
         for (size_t i = 0; i < npages; i++)
         }
 
         for (size_t i = 0; i < npages; i++)
-        {   Bins bin = cast(Bins)pagetable[i];
-
+        {
+            Bins bin = cast(Bins)pagetable[i];
             assert(bin < B_MAX);
         }
     }
             assert(bin < B_MAX);
         }
     }
@@ -2888,16 +2074,13 @@ struct Pool
         size_t i;
         size_t n2;
 
         size_t i;
         size_t n2;
 
-        //debug(PRINTF) printf("Pool::allocPages(n = %d)\n", n);
         n2 = n;
         for (i = 0; i < npages; i++)
         {
             if (pagetable[i] == B_FREE)
             {
                 if (--n2 == 0)
         n2 = n;
         for (i = 0; i < npages; i++)
         {
             if (pagetable[i] == B_FREE)
             {
                 if (--n2 == 0)
-                {   //debug(PRINTF) printf("\texisting pn = %d\n", i - n + 1);
                     return i - n + 1;
                     return i - n + 1;
-                }
             }
             else
                 n2 = n;
             }
             else
                 n2 = n;
@@ -2911,19 +2094,65 @@ struct Pool
      */
     void freePages(size_t pagenum, size_t npages)
     {
      */
     void freePages(size_t pagenum, size_t npages)
     {
-        .memset(&pagetable[pagenum], B_FREE, npages);
+        memset(&pagetable[pagenum], B_FREE, npages);
+    }
+
+
+    /**
+     * Find base address of block containing pointer p.
+     * Returns null if the pointer doesn't belong to this pool
+     */
+    void* findBase(void *p)
+    {
+        size_t offset = cast(size_t)(p - this.baseAddr);
+        size_t pagenum = offset / PAGESIZE;
+        Bins bin = cast(Bins)this.pagetable[pagenum];
+        // Adjust bit to be at start of allocated memory block
+        if (bin <= B_PAGE)
+            return this.baseAddr + (offset & notbinsize[bin]);
+        if (bin == B_PAGEPLUS) {
+            do {
+                --pagenum, offset -= PAGESIZE;
+            } while (cast(Bins)this.pagetable[pagenum] == B_PAGEPLUS);
+            return this.baseAddr + (offset & (offset.max ^ (PAGESIZE-1)));
+        }
+        // we are in a B_FREE page
+        return null;
+    }
+
+
+    /**
+     * Find size of pointer p.
+     * Returns 0 if p doesn't belong to this pool if if it's block size is less
+     * than a PAGE.
+     */
+    size_t findSize(void *p)
+    {
+        size_t pagenum = cast(size_t)(p - this.baseAddr) / PAGESIZE;
+        Bins bin = cast(Bins)this.pagetable[pagenum];
+        if (bin != B_PAGE)
+            return binsize[bin];
+        if (this.cached_ptr == p)
+            return this.cached_size;
+        size_t i = pagenum + 1;
+        for (; i < this.npages; i++)
+            if (this.pagetable[i] != B_PAGEPLUS)
+                break;
+        this.cached_ptr = p;
+        this.cached_size = (i - pagenum) * PAGESIZE;
+        return this.cached_size;
     }
 
 
     /**
     }
 
 
     /**
-     * Used for sorting pooltable[]
+     * Used for sorting pools
      */
      */
-    int opCmp(Pool *p2)
+    int opCmp(in Pool other)
     {
     {
-        if (baseAddr < p2.baseAddr)
+        if (baseAddr < other.baseAddr)
             return -1;
         else
             return -1;
         else
-        return cast(int)(baseAddr > p2.baseAddr);
+        return cast(int)(baseAddr > other.baseAddr);
     }
 }
 
     }
 }
 
@@ -2931,68 +2160,359 @@ struct Pool
 /* ============================ SENTINEL =============================== */
 
 
 /* ============================ SENTINEL =============================== */
 
 
-version (SENTINEL)
+const size_t SENTINEL_PRE = cast(size_t) 0xF4F4F4F4F4F4F4F4UL; // 32 or 64 bits
+const ubyte SENTINEL_POST = 0xF5;           // 8 bits
+const uint SENTINEL_EXTRA = 2 * size_t.sizeof + 1;
+
+
+size_t* sentinel_size(void *p)  { return &(cast(size_t *)p)[-2]; }
+size_t* sentinel_pre(void *p)   { return &(cast(size_t *)p)[-1]; }
+ubyte* sentinel_post(void *p) { return &(cast(ubyte *)p)[*sentinel_size(p)]; }
+
+
+void sentinel_init(void *p, size_t size)
 {
 {
-    const size_t SENTINEL_PRE = cast(size_t) 0xF4F4F4F4F4F4F4F4UL; // 32 or 64 bits
-    const ubyte SENTINEL_POST = 0xF5;           // 8 bits
-    const uint SENTINEL_EXTRA = 2 * size_t.sizeof + 1;
+    *sentinel_size(p) = size;
+    *sentinel_pre(p) = SENTINEL_PRE;
+    *sentinel_post(p) = SENTINEL_POST;
+}
 
 
 
 
-    size_t* sentinel_size(void *p)  { return &(cast(size_t *)p)[-2]; }
-    size_t* sentinel_pre(void *p)   { return &(cast(size_t *)p)[-1]; }
-    ubyte* sentinel_post(void *p) { return &(cast(ubyte *)p)[*sentinel_size(p)]; }
+void sentinel_Invariant(void *p)
+{
+    if (*sentinel_pre(p) != SENTINEL_PRE ||
+            *sentinel_post(p) != SENTINEL_POST)
+        cstdlib.abort();
+}
 
 
 
 
-    void sentinel_init(void *p, size_t size)
-    {
-        *sentinel_size(p) = size;
-        *sentinel_pre(p) = SENTINEL_PRE;
-        *sentinel_post(p) = SENTINEL_POST;
-    }
+void *sentinel_add(void *p)
+{
+    return p + 2 * size_t.sizeof;
+}
 
 
 
 
-    void sentinel_Invariant(void *p)
-    {
-        assert(*sentinel_pre(p) == SENTINEL_PRE);
-        assert(*sentinel_post(p) == SENTINEL_POST);
-    }
+void *sentinel_sub(void *p)
+{
+    return p - 2 * size_t.sizeof;
+}
 
 
 
 
-    void *sentinel_add(void *p)
-    {
-        return p + 2 * size_t.sizeof;
-    }
 
 
+/* ============================ C Public Interface ======================== */
 
 
-    void *sentinel_sub(void *p)
-    {
-        return p - 2 * size_t.sizeof;
-    }
+
+private int _termCleanupLevel=1;
+
+extern (C):
+
+/// sets the cleanup level done by gc
+/// 0: none
+/// 1: fullCollect
+/// 2: fullCollect ignoring stack roots (might crash daemonThreads)
+/// result !=0 if the value was invalid
+int gc_setTermCleanupLevel(int cLevel)
+{
+    if (cLevel<0 || cLevel>2) return cLevel;
+    _termCleanupLevel=cLevel;
+    return 0;
 }
 }
-else
+
+/// returns the cleanup level done by gc
+int gc_getTermCleanupLevel()
 {
 {
-    const uint SENTINEL_EXTRA = 0;
+    return _termCleanupLevel;
+}
 
 
+void gc_init()
+{
+    scope (exit) assert (Invariant());
+    gc = cast(GC*) cstdlib.calloc(1, GC.sizeof);
+    *gc = GC.init;
+    initialize();
+    version (DigitalMars) version(OSX) {
+        _d_osx_image_init();
+    }
+    // NOTE: The GC must initialize the thread library
+    //       before its first collection.
+    thread_init();
+}
 
 
-    void sentinel_init(void *p, size_t size)
-    {
+void gc_term()
+{
+    assert (Invariant());
+    if (_termCleanupLevel<1) {
+        // no cleanup
+    } else if (_termCleanupLevel==2){
+        // a more complete cleanup
+        // NOTE: There may be daemons threads still running when this routine is
+        //       called.  If so, cleaning memory out from under then is a good
+        //       way to make them crash horribly.
+        //       Often this probably doesn't matter much since the app is
+        //       supposed to be shutting down anyway, but for example tests might
+        //       crash (and be considerd failed even if the test was ok).
+        //       thus this is not the default and should be enabled by
+        //       I'm disabling cleanup for now until I can think about it some
+        //       more.
+        //
+        // not really a 'collect all' -- still scans static data area, roots,
+        // and ranges.
+        return locked!(void, () {
+            gc.no_stack++;
+            fullcollectshell();
+            gc.no_stack--;
+        })();
+    } else {
+        // default (safe) clenup
+        return locked!(void, () {
+            fullcollectshell();
+        })();
     }
     }
+}
 
 
+void gc_enable()
+{
+    return locked!(void, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        assert (gc.disabled > 0);
+        gc.disabled--;
+    })();
+}
 
 
-    void sentinel_Invariant(void *p)
-    {
-    }
+void gc_disable()
+{
+    return locked!(void, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        gc.disabled++;
+    })();
+}
 
 
+void gc_collect()
+{
+    return locked!(void, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        fullcollectshell();
+    })();
+}
 
 
-    void *sentinel_add(void *p)
-    {
-        return p;
-    }
 
 
+void gc_minimize()
+{
+    return locked!(void, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        minimize();
+    })();
+}
 
 
-    void *sentinel_sub(void *p)
-    {
-        return p;
-    }
+uint gc_getAttr(void* p)
+{
+    if (p is null)
+        return 0;
+    return locked!(uint, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        Pool* pool = findPool(p);
+        if (pool is null)
+            return 0u;
+        auto bit_i = cast(size_t)(p - pool.baseAddr) / 16;
+        return getAttr(pool, bit_i);
+    })();
 }
 
 }
 
+uint gc_setAttr(void* p, uint attrs)
+{
+    if (p is null)
+        return 0;
+    return locked!(uint, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        Pool* pool = findPool(p);
+        if (pool is null)
+            return 0u;
+        auto bit_i = cast(size_t)(p - pool.baseAddr) / 16;
+        uint old_attrs = getAttr(pool, bit_i);
+        setAttr(pool, bit_i, attrs);
+        return old_attrs;
+    })();
+}
+
+uint gc_clrAttr(void* p, uint attrs)
+{
+    if (p is null)
+        return 0;
+    return locked!(uint, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        Pool* pool = findPool(p);
+        if (pool is null)
+            return 0u;
+        auto bit_i = cast(size_t)(p - pool.baseAddr) / 16;
+        uint old_attrs = getAttr(pool, bit_i);
+        clrAttr(pool, bit_i, attrs);
+        return old_attrs;
+    })();
+}
+
+void* gc_malloc(size_t size, uint attrs = 0,
+        PointerMap ptrmap = PointerMap.init)
+{
+    if (size == 0)
+        return null;
+    return locked!(void*, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        return malloc(size, attrs, ptrmap.bits.ptr);
+    })();
+}
+
+void* gc_calloc(size_t size, uint attrs = 0,
+        PointerMap ptrmap = PointerMap.init)
+{
+    if (size == 0)
+        return null;
+    return locked!(void*, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        return calloc(size, attrs, ptrmap.bits.ptr);
+    })();
+}
+
+void* gc_realloc(void* p, size_t size, uint attrs = 0,
+        PointerMap ptrmap = PointerMap.init)
+{
+    return locked!(void*, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        return realloc(p, size, attrs, ptrmap.bits.ptr);
+    })();
+}
+
+size_t gc_extend(void* p, size_t min_size, size_t max_size)
+{
+    return locked!(size_t, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        return extend(p, min_size, max_size);
+    })();
+}
+
+size_t gc_reserve(size_t size)
+{
+    if (size == 0)
+        return 0;
+    return locked!(size_t, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        return reserve(size);
+    })();
+}
+
+void gc_free(void* p)
+{
+    if (p is null)
+        return;
+    return locked!(void, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        free(p);
+    })();
+}
+
+void* gc_addrOf(void* p)
+{
+    if (p is null)
+        return null;
+    return locked!(void*, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        Pool* pool = findPool(p);
+        if (pool is null)
+            return null;
+        return pool.findBase(p);
+    })();
+}
+
+size_t gc_sizeOf(void* p)
+{
+    if (p is null)
+        return 0;
+    return locked!(size_t, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        return sizeOf(p);
+    })();
+}
+
+BlkInfo gc_query(void* p)
+{
+    if (p is null)
+        return BlkInfo.init;
+    return locked!(BlkInfo, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        return getInfo(p);
+    })();
+}
+
+// NOTE: This routine is experimental.  The stats or function name may change
+//       before it is made officially available.
+GCStats gc_stats()
+{
+    return locked!(GCStats, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        return getStats();
+    })();
+}
+
+void gc_addRoot(void* p)
+{
+    if (p is null)
+        return;
+    return locked!(void, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        if (gc.roots.append(p) is null)
+            onOutOfMemoryError();
+    })();
+}
+
+void gc_addRange(void* p, size_t size)
+{
+    if (p is null || size == 0)
+        return;
+    return locked!(void, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        if (gc.ranges.append(Range(p, p + size)) is null)
+            onOutOfMemoryError();
+    })();
+}
+
+void gc_removeRoot(void* p)
+{
+    if (p is null)
+        return;
+    return locked!(void, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        bool r = gc.roots.remove(p);
+        assert (r);
+    })();
+}
+
+void gc_removeRange(void* p)
+{
+    if (p is null)
+        return;
+    return locked!(void, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        bool r = gc.ranges.remove(Range(p, null));
+        assert (r);
+    })();
+}
+
+void* gc_weakpointerCreate(Object r)
+{
+    // weakpointers do their own locking
+    return weakpointerCreate(r);
+}
+
+void gc_weakpointerDestroy(void* wp)
+{
+    // weakpointers do their own locking
+    weakpointerDestroy(wp);
+}
+
+Object gc_weakpointerGet(void* wp)
+{
+    // weakpointers do their own locking
+    return weakpointerGet(wp);
+}
+
+
+// vim: set et sw=4 sts=4 :