]> git.llucax.com Git - software/dgc/cdgc.git/blobdiff - rt/gc/cdgc/gc.d
Sync the pool block size cache properly
[software/dgc/cdgc.git] / rt / gc / cdgc / gc.d
index 05ef480ee801911f5b35539fd90f69602ad90db8..e53d5d13b20e35ccc2c6eb1fb7c989d03a1a44c0 100644 (file)
@@ -45,7 +45,7 @@ version = STACKGROWSDOWN;       // growing the stack means subtracting from the
 import rt.gc.cdgc.bits: GCBits;
 import rt.gc.cdgc.stats: GCStats, Stats;
 import dynarray = rt.gc.cdgc.dynarray;
-import alloc = rt.gc.cdgc.alloc;
+import os = rt.gc.cdgc.os;
 import opts = rt.gc.cdgc.opts;
 
 import cstdlib = tango.stdc.stdlib;
@@ -99,32 +99,29 @@ package bool has_pointermap(uint attrs)
 
 private
 {
-
-    extern (C) void* rt_stackBottom();
-    extern (C) void* rt_stackTop();
-
-    extern (C) void rt_finalize( void* p, bool det = true );
-
     alias void delegate(Object) DEvent;
-    extern (C) void rt_attachDisposeEvent(Object h, DEvent e);
-    extern (C) bool rt_detachDisposeEvent(Object h, DEvent e);
-
-
     alias void delegate( void*, void* ) scanFn;
+    enum { OPFAIL = ~cast(size_t)0 }
 
-    extern (C) void rt_scanStaticData( scanFn scan );
-
-    extern (C) bool thread_needLock();
-    extern (C) void thread_suspendAll();
-    extern (C) void thread_resumeAll();
+    extern (C)
+    {
+        version (DigitalMars) version(OSX)
+            oid _d_osx_image_init();
 
-    extern (C) void thread_scanAll( scanFn fn, void* curStackTop = null );
+        void* rt_stackBottom();
+        void* rt_stackTop();
+        void rt_finalize( void* p, bool det = true );
+        void rt_attachDisposeEvent(Object h, DEvent e);
+        bool rt_detachDisposeEvent(Object h, DEvent e);
+        void rt_scanStaticData( scanFn scan );
 
-    extern (C) void onOutOfMemoryError();
+        void thread_init();
+        bool thread_needLock();
+        void thread_suspendAll();
+        void thread_resumeAll();
+        void thread_scanAll( scanFn fn, void* curStackTop = null );
 
-    enum
-    {
-        OPFAIL = ~cast(size_t)0
+        void onOutOfMemoryError();
     }
 }
 
@@ -158,7 +155,8 @@ alias ubyte Bins;
 
 struct List
 {
-    List *next;
+    List* next;
+    Pool* pool;
 }
 
 
@@ -184,2281 +182,1701 @@ const uint notbinsize[B_MAX] = [ ~(16u-1),~(32u-1),~(64u-1),~(128u-1),~(256u-1),
 /* ============================ GC =============================== */
 
 
-class GCLock { }                // just a dummy so we can get a global lock
+class GCLock {} // just a dummy so we can get a global lock
 
 
 struct GC
 {
-    ClassInfo lock;    // global lock
+    // global lock
+    ClassInfo lock;
 
-    void *p_cache;
+    voidp_cache;
     size_t size_cache;
 
-    uint noStack;       // !=0 means don't scan stack
-    uint anychanges;
-    void *stackBottom;
+    // !=0 means don't scan stack
+    uint no_stack;
+    bool any_changes;
+    void* stack_bottom;
     uint inited;
-    int disabled;       // turn off collections if >0
+    /// Turn off collections if > 0
+    int disabled;
 
-    byte *minAddr;      // min(baseAddr)
-    byte *maxAddr;      // max(topAddr)
+    /// min(pool.baseAddr)
+    byte *min_addr;
+    /// max(pool.topAddr)
+    byte *max_addr;
 
-    List *bucket[B_MAX];        // free list for each size
+    /// Free list for each size
+    List*[B_MAX] free_list;
 
     dynarray.DynArray!(void*) roots;
     dynarray.DynArray!(Range) ranges;
-    dynarray.DynArray!(Pool) pools;
+    dynarray.DynArray!(Pool*) pools;
 
     Stats stats;
+}
 
+// call locked if necessary
+private T locked(T, alias Code)()
+{
+    if (thread_needLock())
+        synchronized (gc.lock) return Code();
+    else
+       return Code();
+}
 
-    invariant
-    {
-        if (inited)
-        {
-        //printf("Gcx.invariant(): this = %p\n", this);
-            size_t i;
+private GC* gc;
 
-            for (i = 0; i < pools.length; i++)
-            {
-                Pool* pool = pools[i];
-                pool.Invariant();
-                if (i == 0)
-                {
-                    assert(minAddr == pool.baseAddr);
-                }
-                if (i + 1 < pools.length)
-                {
-                    assert(*pool < pools[i + 1]);
-                }
-                else if (i + 1 == pools.length)
-                {
-                    assert(maxAddr == pool.topAddr);
-                }
+bool Invariant()
+{
+    assert (gc !is null);
+    if (gc.inited) {
+        for (size_t i = 0; i < gc.pools.length; i++) {
+            Pool* pool = gc.pools[i];
+            pool.Invariant();
+            if (i == 0)
+                assert(gc.min_addr == pool.baseAddr);
+            if (i + 1 < gc.pools.length)
+                assert(*pool < *gc.pools[i + 1]);
+            else if (i + 1 == gc.pools.length)
+                assert(gc.max_addr == pool.topAddr);
+        }
+
+        gc.roots.Invariant();
+        gc.ranges.Invariant();
+
+        for (size_t i = 0; i < gc.ranges.length; i++) {
+            assert(gc.ranges[i].pbot);
+            assert(gc.ranges[i].ptop);
+            assert(gc.ranges[i].pbot <= gc.ranges[i].ptop);
+        }
+
+        for (size_t i = 0; i < B_PAGE; i++) {
+            for (List *list = gc.free_list[i]; list; list = list.next) {
+                assert (list.pool !is null);
+                auto p = cast(byte*) list;
+                assert (p >= list.pool.baseAddr);
+                assert (p < list.pool.topAddr);
             }
+        }
+    }
+    return true;
+}
 
-            roots.Invariant();
-            ranges.Invariant();
 
-            for (i = 0; i < ranges.length; i++)
-            {
-                assert(ranges[i].pbot);
-                assert(ranges[i].ptop);
-                assert(ranges[i].pbot <= ranges[i].ptop);
-            }
+/**
+ * Find Pool that pointer is in.
+ * Return null if not in a Pool.
+ * Assume pools is sorted.
+ */
+Pool* findPool(void* p)
+{
+    if (p < gc.min_addr || p >= gc.max_addr)
+        return null;
+    if (gc.pools.length == 0)
+        return null;
+    if (gc.pools.length == 1)
+        return gc.pools[0];
+    /// The pooltable[] is sorted by address, so do a binary search
+    size_t low = 0;
+    size_t high = gc.pools.length - 1;
+    while (low <= high) {
+        size_t mid = (low + high) / 2;
+        auto pool = gc.pools[mid];
+        if (p < pool.baseAddr)
+            high = mid - 1;
+        else if (p >= pool.topAddr)
+            low = mid + 1;
+        else
+            return pool;
+    }
+    // Not found
+    return null;
+}
 
-            for (i = 0; i < B_PAGE; i++)
-            {
-                for (List *list = bucket[i]; list; list = list.next)
-                {
-                }
-            }
+
+/**
+ * Determine the base address of the block containing p.  If p is not a gc
+ * allocated pointer, return null.
+ */
+BlkInfo getInfo(void* p)
+{
+    assert (p !is null);
+    Pool* pool = findPool(p);
+    if (pool is null)
+        return BlkInfo.init;
+    BlkInfo info;
+    info.base = pool.findBase(p);
+    info.size = pool.findSize(info.base);
+    info.attr = getAttr(pool, cast(size_t)(info.base - pool.baseAddr) / 16u);
+    if (has_pointermap(info.attr)) {
+        info.size -= size_t.sizeof; // PointerMap bitmask
+        // Points to the PointerMap bitmask pointer, not user data
+        if (p >= (info.base + info.size)) {
+            return BlkInfo.init;
+        }
+    }
+    if (opts.options.sentinel) {
+        info.base = sentinel_add(info.base);
+        // points to sentinel data, not user data
+        if (p < info.base || p >= sentinel_post(info.base))
+            return BlkInfo.init;
+        info.size -= SENTINEL_EXTRA;
+    }
+    return info;
+}
+
+
+/**
+ * Compute bin for size.
+ */
+Bins findBin(size_t size)
+{
+    Bins bin;
+    if (size <= 256)
+    {
+        if (size <= 64)
+        {
+            if (size <= 16)
+                bin = B_16;
+            else if (size <= 32)
+                bin = B_32;
+            else
+                bin = B_64;
+        }
+        else
+        {
+            if (size <= 128)
+                bin = B_128;
+            else
+                bin = B_256;
+        }
+    }
+    else
+    {
+        if (size <= 1024)
+        {
+            if (size <= 512)
+                bin = B_512;
+            else
+                bin = B_1024;
+        }
+        else
+        {
+            if (size <= 2048)
+                bin = B_2048;
+            else
+                bin = B_PAGE;
         }
     }
+    return bin;
+}
 
 
-    /**
-     * Find Pool that pointer is in.
-     * Return null if not in a Pool.
-     * Assume pools is sorted.
-     */
-    Pool *findPool(void *p)
+/**
+ * Allocate a new pool of at least size bytes.
+ * Sort it into pools.
+ * Mark all memory in the pool as B_FREE.
+ * Return the actual number of bytes reserved or 0 on error.
+ */
+size_t reserve(size_t size)
+{
+    assert(size != 0);
+    size_t npages = (size + PAGESIZE - 1) / PAGESIZE;
+    Pool*  pool = newPool(npages);
+
+    if (!pool)
+        return 0;
+    return pool.npages * PAGESIZE;
+}
+
+
+/**
+ * Minimizes physical memory usage by returning free pools to the OS.
+ */
+void minimize()
+{
+    size_t n;
+    size_t pn;
+    Pool* pool;
+
+    for (n = 0; n < gc.pools.length; n++)
     {
-        if (p >= minAddr && p < maxAddr)
+        pool = gc.pools[n];
+        for (pn = 0; pn < pool.npages; pn++)
         {
-            if (pools.length == 1)
-            {
-                return pools[0];
-            }
-
-            for (size_t i = 0; i < pools.length; i++)
-            {
-                Pool* pool = pools[i];
-                if (p < pool.topAddr)
-                {
-                    if (pool.baseAddr <= p)
-                        return pool;
-                    break;
-                }
-            }
+            if (cast(Bins)pool.pagetable[pn] != B_FREE)
+                break;
         }
-        return null;
+        if (pn < pool.npages)
+            continue;
+        pool.Dtor();
+        cstdlib.free(pool);
+        gc.pools.remove_at(n);
+        n--;
     }
+    gc.min_addr = gc.pools[0].baseAddr;
+    gc.max_addr = gc.pools[gc.pools.length - 1].topAddr;
+}
 
 
-    /**
-     * Find base address of block containing pointer p.
-     * Returns null if not a gc'd pointer
-     */
-    void* findBase(void *p)
+/**
+ * Allocate a chunk of memory that is larger than a page.
+ * Return null if out of memory.
+ */
+void* bigAlloc(size_t size, out Pool* pool)
+{
+    size_t npages;
+    size_t n;
+    size_t pn;
+    size_t freedpages;
+    void*  p;
+    int    state;
+
+    npages = (size + PAGESIZE - 1) / PAGESIZE;
+
+    for (state = 0; ; )
     {
-        Pool *pool;
+        // This code could use some refinement when repeatedly
+        // allocating very large arrays.
 
-        pool = findPool(p);
-        if (pool)
+        for (n = 0; n < gc.pools.length; n++)
         {
-            size_t offset = cast(size_t)(p - pool.baseAddr);
-            size_t pn = offset / PAGESIZE;
-            Bins   bin = cast(Bins)pool.pagetable[pn];
+            pool = gc.pools[n];
+            pn = pool.allocPages(npages);
+            if (pn != OPFAIL)
+                goto L1;
+        }
 
-            // Adjust bit to be at start of allocated memory block
-            if (bin <= B_PAGE)
+        // Failed
+        switch (state)
+        {
+        case 0:
+            if (gc.disabled)
             {
-                return pool.baseAddr + (offset & notbinsize[bin]);
+                state = 1;
+                continue;
             }
-            else if (bin == B_PAGEPLUS)
+            // Try collecting
+            freedpages = fullcollectshell();
+            if (freedpages >= gc.pools.length * ((POOLSIZE / PAGESIZE) / 4))
             {
-                do
-                {
-                    --pn, offset -= PAGESIZE;
-                } while (cast(Bins)pool.pagetable[pn] == B_PAGEPLUS);
-
-                return pool.baseAddr + (offset & (offset.max ^ (PAGESIZE-1)));
+                state = 1;
+                continue;
             }
-            else
+            // Release empty pools to prevent bloat
+            minimize();
+            // Allocate new pool
+            pool = newPool(npages);
+            if (!pool)
             {
-                // we are in a B_FREE page
-                return null;
+                state = 2;
+                continue;
             }
-        }
-        return null;
-    }
+            pn = pool.allocPages(npages);
+            assert(pn != OPFAIL);
+            goto L1;
+        case 1:
+            // Release empty pools to prevent bloat
+            minimize();
+            // Allocate new pool
+            pool = newPool(npages);
+            if (!pool)
+                goto Lnomemory;
+            pn = pool.allocPages(npages);
+            assert(pn != OPFAIL);
+            goto L1;
+        case 2:
+            goto Lnomemory;
+        default:
+            assert(false);
+        }
+    }
+
+  L1:
+    pool.pagetable[pn] = B_PAGE;
+    if (npages > 1)
+        memset(&pool.pagetable[pn + 1], B_PAGEPLUS, npages - 1);
+    p = pool.baseAddr + pn * PAGESIZE;
+    memset(cast(char *)p + size, 0, npages * PAGESIZE - size);
+    if (opts.options.mem_stomp)
+        memset(p, 0xF1, size);
+    return p;
+
+  Lnomemory:
+    return null; // let mallocNoSync handle the error
+}
 
 
-    /**
-     * Find size of pointer p.
-     * Returns 0 if not a gc'd pointer
-     */
-    size_t findSize(void *p)
+/**
+ * Allocate a new pool with at least npages in it.
+ * Sort it into pools.
+ * Return null if failed.
+ */
+Pool *newPool(size_t npages)
+{
+    // Minimum of POOLSIZE
+    if (npages < POOLSIZE/PAGESIZE)
+        npages = POOLSIZE/PAGESIZE;
+    else if (npages > POOLSIZE/PAGESIZE)
     {
-        Pool*  pool;
-        size_t size = 0;
+        // Give us 150% of requested size, so there's room to extend
+        auto n = npages + (npages >> 1);
+        if (n < size_t.max/PAGESIZE)
+            npages = n;
+    }
 
-        pool = findPool(p);
-        if (pool)
-        {
-            size_t pagenum;
-            Bins   bin;
+    // Allocate successively larger pools up to 8 megs
+    if (gc.pools.length)
+    {
+        size_t n = gc.pools.length;
+        if (n > 8)
+            n = 8;                  // cap pool size at 8 megs
+        n *= (POOLSIZE / PAGESIZE);
+        if (npages < n)
+            npages = n;
+    }
 
-            pagenum = cast(size_t)(p - pool.baseAddr) / PAGESIZE;
-            bin = cast(Bins)pool.pagetable[pagenum];
-            size = binsize[bin];
-            if (bin == B_PAGE)
-            {
-                ubyte* pt;
-                size_t i;
+    auto pool = cast(Pool*) cstdlib.calloc(1, Pool.sizeof);
+    if (pool is null)
+        return null;
+    pool.initialize(npages);
+    if (!pool.baseAddr)
+    {
+        pool.Dtor();
+        return null;
+    }
 
-                pt = &pool.pagetable[0];
-                for (i = pagenum + 1; i < pool.npages; i++)
-                {
-                    if (pt[i] != B_PAGEPLUS)
-                        break;
-                }
-                size = (i - pagenum) * PAGESIZE;
-            }
-        }
-        return size;
+    auto inserted_pool = *gc.pools.insert_sorted!("*a < *b")(pool);
+    if (inserted_pool is null) {
+        pool.Dtor();
+        return null;
     }
+    assert (inserted_pool is pool);
+    gc.min_addr = gc.pools[0].baseAddr;
+    gc.max_addr = gc.pools[gc.pools.length - 1].topAddr;
+    return pool;
+}
 
 
-    /**
-     *
-     */
-    BlkInfo getInfo(void* p)
+/**
+ * Allocate a page of bin's.
+ * Returns:
+ *  0       failed
+ */
+int allocPage(Bins bin)
+{
+    Pool*  pool;
+    size_t n;
+    size_t pn;
+    byte*  p;
+    byte*  ptop;
+
+    for (n = 0; n < gc.pools.length; n++)
     {
-        Pool*   pool;
-        BlkInfo info;
+        pool = gc.pools[n];
+        pn = pool.allocPages(1);
+        if (pn != OPFAIL)
+            goto L1;
+    }
+    return 0;               // failed
 
-        pool = findPool(p);
-        if (pool)
-        {
-            size_t offset = cast(size_t)(p - pool.baseAddr);
-            size_t pn = offset / PAGESIZE;
-            Bins   bin = cast(Bins)pool.pagetable[pn];
+  L1:
+    pool.pagetable[pn] = cast(ubyte)bin;
 
-            ////////////////////////////////////////////////////////////////////
-            // findAddr
-            ////////////////////////////////////////////////////////////////////
+    // Convert page to free list
+    size_t size = binsize[bin];
+    auto list_head = &gc.free_list[bin];
 
-            if (bin <= B_PAGE)
-            {
-                info.base = pool.baseAddr + (offset & notbinsize[bin]);
-            }
-            else if (bin == B_PAGEPLUS)
-            {
-                do
-                {
-                    --pn, offset -= PAGESIZE;
-                }
-                while (cast(Bins)pool.pagetable[pn] == B_PAGEPLUS);
+    p = pool.baseAddr + pn * PAGESIZE;
+    ptop = p + PAGESIZE;
+    for (; p < ptop; p += size)
+    {
+        List* l = cast(List *) p;
+        l.next = *list_head;
+        l.pool = pool;
+        *list_head = l;
+    }
+    return 1;
+}
 
-                info.base = pool.baseAddr + (offset & (offset.max ^ (PAGESIZE-1)));
 
-                // fix bin for use by size calc below
-                bin = cast(Bins)pool.pagetable[pn];
-            }
+/**
+ * Search a range of memory values and mark any pointers into the GC pool using
+ * type information (bitmask of pointer locations).
+ */
+void mark_range(void *pbot, void *ptop, size_t* pm_bitmask)
+{
+    // TODO: make our own assert because assert uses the GC
+    assert (pbot <= ptop);
+
+    const BITS_PER_WORD = size_t.sizeof * 8;
+
+    void **p1 = cast(void **)pbot;
+    void **p2 = cast(void **)ptop;
+    size_t pcache = 0;
+    bool changes = false;
+
+    size_t type_size = pm_bitmask[0];
+    size_t* pm_bits = pm_bitmask + 1;
+    bool has_type_info = type_size != 1 || pm_bits[0] != 1 || pm_bits[1] != 0;
+
+    //printf("marking range: %p -> %p\n", pbot, ptop);
+    for (; p1 + type_size <= p2; p1 += type_size) {
+        for (size_t n = 0; n < type_size; n++) {
+            // scan bit set for this word
+            if (has_type_info &&
+                    !(pm_bits[n / BITS_PER_WORD] & (1 << (n % BITS_PER_WORD))))
+                continue;
+
+            void* p = *(p1 + n);
 
-            ////////////////////////////////////////////////////////////////////
-            // findSize
-            ////////////////////////////////////////////////////////////////////
+            if (p < gc.min_addr || p >= gc.max_addr)
+                continue;
+
+            if ((cast(size_t)p & ~(PAGESIZE-1)) == pcache)
+                continue;
 
-            info.size = binsize[bin];
-            if (bin == B_PAGE)
+            Pool* pool = findPool(p);
+            if (pool)
             {
-                ubyte* pt;
-                size_t i;
+                size_t offset = cast(size_t)(p - pool.baseAddr);
+                size_t bit_i = void;
+                size_t pn = offset / PAGESIZE;
+                Bins   bin = cast(Bins)pool.pagetable[pn];
+
+                // Cache B_PAGE, B_PAGEPLUS and B_FREE lookups
+                if (bin >= B_PAGE)
+                    pcache = cast(size_t)p & ~(PAGESIZE-1);
 
-                pt = &pool.pagetable[0];
-                for (i = pn + 1; i < pool.npages; i++)
+                // Adjust bit to be at start of allocated memory block
+                if (bin <= B_PAGE)
+                    bit_i = (offset & notbinsize[bin]) / 16;
+                else if (bin == B_PAGEPLUS)
                 {
-                    if (pt[i] != B_PAGEPLUS)
-                        break;
+                    do
+                    {
+                        --pn;
+                    }
+                    while (cast(Bins)pool.pagetable[pn] == B_PAGEPLUS);
+                    bit_i = pn * (PAGESIZE / 16);
                 }
-                info.size = (i - pn) * PAGESIZE;
-            }
-
-            ////////////////////////////////////////////////////////////////////
-            // getAttr
-            ////////////////////////////////////////////////////////////////////
+                else // Don't mark bits in B_FREE pages
+                    continue;
 
-            info.attr = getAttr(pool, cast(size_t)(offset / 16));
-            if (!(info.attr & BlkAttr.NO_SCAN))
-                info.size -= (size_t*).sizeof;  // bitmask
+                if (!pool.mark.test(bit_i))
+                {
+                    pool.mark.set(bit_i);
+                    if (!pool.noscan.test(bit_i))
+                    {
+                        pool.scan.set(bit_i);
+                        changes = true;
+                    }
+                }
+            }
         }
-        return info;
     }
+    if (changes)
+        gc.any_changes = true;
+}
 
+/**
+ * Return number of full pages free'd.
+ */
+size_t fullcollectshell()
+{
+    gc.stats.collection_started();
+    scope (exit)
+        gc.stats.collection_finished();
 
-    /**
-     * Compute bin for size.
-     */
-    static Bins findBin(size_t size)
+    // The purpose of the 'shell' is to ensure all the registers
+    // get put on the stack so they'll be scanned
+    void *sp;
+    size_t result;
+    version (GNU)
+    {
+        gcc.builtins.__builtin_unwind_init();
+        sp = & sp;
+    }
+    else version(LDC)
     {
-        Bins bin;
-        if (size <= 256)
+        version(X86)
         {
-            if (size <= 64)
+            uint eax,ecx,edx,ebx,ebp,esi,edi;
+            asm
             {
-                if (size <= 16)
-                    bin = B_16;
-                else if (size <= 32)
-                    bin = B_32;
-                else
-                    bin = B_64;
+                mov eax[EBP], EAX      ;
+                mov ecx[EBP], ECX      ;
+                mov edx[EBP], EDX      ;
+                mov ebx[EBP], EBX      ;
+                mov ebp[EBP], EBP      ;
+                mov esi[EBP], ESI      ;
+                mov edi[EBP], EDI      ;
+                mov  sp[EBP], ESP      ;
             }
-            else
+        }
+        else version (X86_64)
+        {
+            ulong rax,rbx,rcx,rdx,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15;
+            asm
             {
-                if (size <= 128)
-                    bin = B_128;
-                else
-                    bin = B_256;
+                movq rax[RBP], RAX      ;
+                movq rbx[RBP], RBX      ;
+                movq rcx[RBP], RCX      ;
+                movq rdx[RBP], RDX      ;
+                movq rbp[RBP], RBP      ;
+                movq rsi[RBP], RSI      ;
+                movq rdi[RBP], RDI      ;
+                movq r8 [RBP], R8       ;
+                movq r9 [RBP], R9       ;
+                movq r10[RBP], R10      ;
+                movq r11[RBP], R11      ;
+                movq r12[RBP], R12      ;
+                movq r13[RBP], R13      ;
+                movq r14[RBP], R14      ;
+                movq r15[RBP], R15      ;
+                movq  sp[RBP], RSP      ;
             }
         }
         else
         {
-            if (size <= 1024)
-            {
-                if (size <= 512)
-                    bin = B_512;
-                else
-                    bin = B_1024;
-            }
-            else
-            {
-                if (size <= 2048)
-                    bin = B_2048;
-                else
-                    bin = B_PAGE;
-            }
+            static assert( false, "Architecture not supported." );
         }
-        return bin;
     }
+    else
+    {
+    asm
+    {
+        pushad              ;
+        mov sp[EBP],ESP     ;
+    }
+    }
+    result = fullcollect(sp);
+    version (GNU)
+    {
+        // nothing to do
+    }
+    else version(LDC)
+    {
+        // nothing to do
+    }
+    else
+    {
+    asm
+    {
+        popad               ;
+    }
+    }
+    return result;
+}
 
 
-    /**
-     * Allocate a new pool of at least size bytes.
-     * Sort it into pools.
-     * Mark all memory in the pool as B_FREE.
-     * Return the actual number of bytes reserved or 0 on error.
-     */
-    size_t reserveNoSync(size_t size)
-    {
-        assert(size != 0);
-        size_t npages = (size + PAGESIZE - 1) / PAGESIZE;
-        Pool*  pool = newPool(npages);
+/**
+ *
+ */
+size_t fullcollect(void *stackTop)
+{
+    debug(COLLECT_PRINTF) printf("Gcx.fullcollect()\n");
+
+    // we always need to stop the world to make threads save the CPU registers
+    // in the stack and prepare themselves for thread_scanAll()
+    thread_suspendAll();
+    gc.stats.world_stopped();
+
+    if (opts.options.fork) {
+        os.pid_t child_pid = os.fork();
+        assert (child_pid != -1); // don't accept errors in non-release mode
+        switch (child_pid) {
+        case -1: // if fork() fails, fallback to stop-the-world
+            opts.options.fork = false;
+            break;
+        case 0: // child process (i.e. the collectors mark phase)
+            mark(stackTop);
+            cstdlib.exit(0);
+            break; // bogus, will never reach here
+        default: // parent process (i.e. the mutator)
+            // start the world again and wait for the mark phase to finish
+            thread_resumeAll();
+            gc.stats.world_started();
+            int status = void;
+            os.pid_t wait_pid = os.waitpid(child_pid, &status, 0);
+            assert (wait_pid == child_pid);
+            return sweep();
+        }
+
+    }
+
+    // if we reach here, we are using the standard stop-the-world collection
+    mark(stackTop);
+    thread_resumeAll();
+    gc.stats.world_started();
+
+    return sweep();
+}
 
-        if (!pool)
-            return 0;
-        return pool.npages * PAGESIZE;
-    }
 
+/**
+ *
+ */
+void mark(void *stackTop)
+{
+    debug(COLLECT_PRINTF) printf("\tmark()\n");
+
+    gc.p_cache = null;
+    gc.size_cache = 0;
 
-    /**
-     * Minimizes physical memory usage by returning free pools to the OS.
-     */
-    void minimizeNoSync()
+    gc.any_changes = false;
+    for (size_t n = 0; n < gc.pools.length; n++)
     {
-        size_t n;
-        size_t pn;
-        Pool*  pool;
+        Pool* pool = gc.pools[n];
+        pool.mark.zero();
+        pool.scan.zero();
+        pool.freebits.zero();
+    }
 
-        for (n = 0; n < pools.length; n++)
+    // Mark each free entry, so it doesn't get scanned
+    for (size_t n = 0; n < B_PAGE; n++)
+    {
+        for (List *list = gc.free_list[n]; list; list = list.next)
         {
-            pool = pools[n];
-            for (pn = 0; pn < pool.npages; pn++)
-            {
-                if (cast(Bins)pool.pagetable[pn] != B_FREE)
-                    break;
-            }
-            if (pn < pool.npages)
-                continue;
-            pool.Dtor();
-            pools.remove_at(n);
-            n--;
+            Pool* pool = list.pool;
+            auto ptr = cast(byte*) list;
+            assert (pool);
+            assert (pool.baseAddr <= ptr);
+            assert (ptr < pool.topAddr);
+            size_t bit_i = cast(size_t)(ptr - pool.baseAddr) / 16;
+            pool.freebits.set(bit_i);
         }
-        minAddr = pools[0].baseAddr;
-        maxAddr = pools[pools.length - 1].topAddr;
     }
 
+    for (size_t n = 0; n < gc.pools.length; n++)
+    {
+        Pool* pool = gc.pools[n];
+        pool.mark.copy(&pool.freebits);
+    }
 
-    /**
-     * Allocate a chunk of memory that is larger than a page.
-     * Return null if out of memory.
-     */
-    void *bigAlloc(size_t size)
+    /// Marks a range of memory in conservative mode.
+    void mark_conservative_range(void* pbot, void* ptop)
     {
-        Pool*  pool;
-        size_t npages;
-        size_t n;
-        size_t pn;
-        size_t freedpages;
-        void*  p;
-        int    state;
+        mark_range(pbot, ptop, PointerMap.init.bits.ptr);
+    }
+
+    rt_scanStaticData(&mark_conservative_range);
+
+    if (!gc.no_stack)
+    {
+        // Scan stacks and registers for each paused thread
+        thread_scanAll(&mark_conservative_range, stackTop);
+    }
+
+    // Scan roots
+    debug(COLLECT_PRINTF) printf("scan roots[]\n");
+    mark_conservative_range(gc.roots.ptr, gc.roots.ptr + gc.roots.length);
 
-        npages = (size + PAGESIZE - 1) / PAGESIZE;
+    // Scan ranges
+    debug(COLLECT_PRINTF) printf("scan ranges[]\n");
+    for (size_t n = 0; n < gc.ranges.length; n++)
+    {
+        debug(COLLECT_PRINTF) printf("\t%x .. %x\n", gc.ranges[n].pbot, gc.ranges[n].ptop);
+        mark_conservative_range(gc.ranges[n].pbot, gc.ranges[n].ptop);
+    }
 
-        for (state = 0; ; )
+    debug(COLLECT_PRINTF) printf("\tscan heap\n");
+    while (gc.any_changes)
+    {
+        gc.any_changes = false;
+        for (size_t n = 0; n < gc.pools.length; n++)
         {
-            // This code could use some refinement when repeatedly
-            // allocating very large arrays.
+            uint *bbase;
+            uint *b;
+            uint *btop;
 
-            for (n = 0; n < pools.length; n++)
-            {
-                pool = pools[n];
-                pn = pool.allocPages(npages);
-                if (pn != OPFAIL)
-                    goto L1;
-            }
+            Pool* pool = gc.pools[n];
 
-            // Failed
-            switch (state)
+            bbase = pool.scan.base();
+            btop = bbase + pool.scan.nwords;
+            for (b = bbase; b < btop;)
             {
-            case 0:
-                if (disabled)
+                Bins   bin;
+                size_t pn;
+                size_t u;
+                size_t bitm;
+                byte*  o;
+
+                bitm = *b;
+                if (!bitm)
                 {
-                    state = 1;
+                    b++;
                     continue;
                 }
-                // Try collecting
-                freedpages = fullcollectshell();
-                if (freedpages >= pools.length * ((POOLSIZE / PAGESIZE) / 4))
+                *b = 0;
+
+                o = pool.baseAddr + (b - bbase) * 32 * 16;
+                if (!(bitm & 0xFFFF))
                 {
-                    state = 1;
-                    continue;
+                    bitm >>= 16;
+                    o += 16 * 16;
                 }
-                // Release empty pools to prevent bloat
-                minimize();
-                // Allocate new pool
-                pool = newPool(npages);
-                if (!pool)
+                for (; bitm; o += 16, bitm >>= 1)
                 {
-                    state = 2;
-                    continue;
+                    if (!(bitm & 1))
+                        continue;
+
+                    pn = cast(size_t)(o - pool.baseAddr) / PAGESIZE;
+                    bin = cast(Bins)pool.pagetable[pn];
+                    if (bin < B_PAGE) {
+                        if (opts.options.conservative)
+                            mark_conservative_range(o, o + binsize[bin]);
+                        else {
+                            auto end_of_blk = cast(size_t**)(o +
+                                    binsize[bin] - size_t.sizeof);
+                            size_t* pm_bitmask = *end_of_blk;
+                            mark_range(o, end_of_blk, pm_bitmask);
+                        }
+                    }
+                    else if (bin == B_PAGE || bin == B_PAGEPLUS)
+                    {
+                        if (bin == B_PAGEPLUS)
+                        {
+                            while (pool.pagetable[pn - 1] != B_PAGE)
+                                pn--;
+                        }
+                        u = 1;
+                        while (pn + u < pool.npages &&
+                                pool.pagetable[pn + u] == B_PAGEPLUS)
+                            u++;
+
+                        size_t blk_size = u * PAGESIZE;
+                        if (opts.options.conservative)
+                            mark_conservative_range(o, o + blk_size);
+                        else {
+                            auto end_of_blk = cast(size_t**)(o + blk_size -
+                                    size_t.sizeof);
+                            size_t* pm_bitmask = *end_of_blk;
+                            mark_range(o, end_of_blk, pm_bitmask);
+                        }
+                    }
                 }
-                pn = pool.allocPages(npages);
-                assert(pn != OPFAIL);
-                goto L1;
-            case 1:
-                // Release empty pools to prevent bloat
-                minimize();
-                // Allocate new pool
-                pool = newPool(npages);
-                if (!pool)
-                    goto Lnomemory;
-                pn = pool.allocPages(npages);
-                assert(pn != OPFAIL);
-                goto L1;
-            case 2:
-                goto Lnomemory;
-            default:
-                assert(false);
             }
         }
-
-      L1:
-        pool.pagetable[pn] = B_PAGE;
-        if (npages > 1)
-            memset(&pool.pagetable[pn + 1], B_PAGEPLUS, npages - 1);
-        p = pool.baseAddr + pn * PAGESIZE;
-        memset(cast(char *)p + size, 0, npages * PAGESIZE - size);
-        if (opts.options.mem_stomp)
-            memset(p, 0xF1, size);
-        return p;
-
-      Lnomemory:
-        return null; // let mallocNoSync handle the error
     }
+}
 
 
-    /**
-     * Allocate a new pool with at least npages in it.
-     * Sort it into pools.
-     * Return null if failed.
-     */
-    Pool *newPool(size_t npages)
-    {
-        // Minimum of POOLSIZE
-        if (npages < POOLSIZE/PAGESIZE)
-            npages = POOLSIZE/PAGESIZE;
-        else if (npages > POOLSIZE/PAGESIZE)
+/**
+ *
+ */
+size_t sweep()
+{
+    // Free up everything not marked
+    debug(COLLECT_PRINTF) printf("\tsweep\n");
+    size_t freedpages = 0;
+    size_t freed = 0;
+    for (size_t n = 0; n < gc.pools.length; n++)
+    {
+        Pool* pool = gc.pools[n];
+        pool.clear_cache();
+        uint*  bbase = pool.mark.base();
+        size_t pn;
+        for (pn = 0; pn < pool.npages; pn++, bbase += PAGESIZE / (32 * 16))
         {
-            // Give us 150% of requested size, so there's room to extend
-            auto n = npages + (npages >> 1);
-            if (n < size_t.max/PAGESIZE)
-                npages = n;
-        }
+            Bins bin = cast(Bins)pool.pagetable[pn];
 
-        // Allocate successively larger pools up to 8 megs
-        if (pools.length)
-        {
-            size_t n = pools.length;
-            if (n > 8)
-                n = 8;                  // cap pool size at 8 megs
-            n *= (POOLSIZE / PAGESIZE);
-            if (npages < n)
-                npages = n;
-        }
+            if (bin < B_PAGE)
+            {
+                auto size = binsize[bin];
+                byte* p = pool.baseAddr + pn * PAGESIZE;
+                byte* ptop = p + PAGESIZE;
+                size_t bit_i = pn * (PAGESIZE/16);
+                size_t bit_stride = size / 16;
 
-        Pool p;
-        p.initialize(npages);
-        if (!p.baseAddr)
-        {
-            p.Dtor();
-            return null;
-        }
+version(none) // BUG: doesn't work because freebits() must also be cleared
+{
+                // If free'd entire page
+                if (bbase[0] == 0 && bbase[1] == 0 && bbase[2] == 0 &&
+                        bbase[3] == 0 && bbase[4] == 0 && bbase[5] == 0 &&
+                        bbase[6] == 0 && bbase[7] == 0)
+                {
+                    for (; p < ptop; p += size, bit_i += bit_stride)
+                    {
+                        if (pool.finals.nbits && pool.finals.testClear(bit_i)) {
+                            if (opts.options.sentinel)
+                                rt_finalize(sentinel_add(p), false/*gc.no_stack > 0*/);
+                            else
+                                rt_finalize(p, false/*gc.no_stack > 0*/);
+                        }
+                        clrAttr(pool, bit_i, BlkAttr.ALL_BITS);
 
-        Pool* pool = pools.insert_sorted(p);
-        if (pool)
-        {
-            minAddr = pools[0].baseAddr;
-            maxAddr = pools[pools.length - 1].topAddr;
+                        if (opts.options.mem_stomp)
+                            memset(p, 0xF3, size);
+                    }
+                    pool.pagetable[pn] = B_FREE;
+                    freed += PAGESIZE;
+                    continue;
+                }
+}
+                for (; p < ptop; p += size, bit_i += bit_stride)
+                {
+                    if (!pool.mark.test(bit_i))
+                    {
+                        if (opts.options.sentinel)
+                            sentinel_Invariant(sentinel_add(p));
+
+                        pool.freebits.set(bit_i);
+                        if (pool.finals.nbits && pool.finals.testClear(bit_i)) {
+                            if (opts.options.sentinel)
+                                rt_finalize(sentinel_add(p), false/*gc.no_stack > 0*/);
+                            else
+                                rt_finalize(p, false/*gc.no_stack > 0*/);
+                        }
+                        clrAttr(pool, bit_i, BlkAttr.ALL_BITS);
+
+                        if (opts.options.mem_stomp)
+                            memset(p, 0xF3, size);
+
+                        freed += size;
+                    }
+                }
+            }
+            else if (bin == B_PAGE)
+            {
+                size_t bit_i = pn * (PAGESIZE / 16);
+                if (!pool.mark.test(bit_i))
+                {
+                    byte *p = pool.baseAddr + pn * PAGESIZE;
+                    if (opts.options.sentinel)
+                        sentinel_Invariant(sentinel_add(p));
+                    if (pool.finals.nbits && pool.finals.testClear(bit_i)) {
+                        if (opts.options.sentinel)
+                            rt_finalize(sentinel_add(p), false/*gc.no_stack > 0*/);
+                        else
+                            rt_finalize(p, false/*gc.no_stack > 0*/);
+                    }
+                    clrAttr(pool, bit_i, BlkAttr.ALL_BITS);
+
+                    debug(COLLECT_PRINTF) printf("\tcollecting big %x\n", p);
+                    pool.pagetable[pn] = B_FREE;
+                    freedpages++;
+                    if (opts.options.mem_stomp)
+                        memset(p, 0xF3, PAGESIZE);
+                    while (pn + 1 < pool.npages && pool.pagetable[pn + 1] == B_PAGEPLUS)
+                    {
+                        pn++;
+                        pool.pagetable[pn] = B_FREE;
+                        freedpages++;
+
+                        if (opts.options.mem_stomp)
+                        {
+                            p += PAGESIZE;
+                            memset(p, 0xF3, PAGESIZE);
+                        }
+                    }
+                }
+            }
         }
-        return pool;
     }
 
+    // Zero buckets
+    gc.free_list[] = null;
 
-    /**
-     * Allocate a page of bin's.
-     * Returns:
-     *  0       failed
-     */
-    int allocPage(Bins bin)
+    // Free complete pages, rebuild free list
+    debug(COLLECT_PRINTF) printf("\tfree complete pages\n");
+    size_t recoveredpages = 0;
+    for (size_t n = 0; n < gc.pools.length; n++)
     {
-        Pool*  pool;
-        size_t n;
-        size_t pn;
-        byte*  p;
-        byte*  ptop;
-
-        for (n = 0; n < pools.length; n++)
+        Pool* pool = gc.pools[n];
+        for (size_t pn = 0; pn < pool.npages; pn++)
         {
-            pool = pools[n];
-            pn = pool.allocPages(1);
-            if (pn != OPFAIL)
-                goto L1;
-        }
-        return 0;               // failed
-
-      L1:
-        pool.pagetable[pn] = cast(ubyte)bin;
+            Bins   bin = cast(Bins)pool.pagetable[pn];
+            size_t bit_i;
+            size_t u;
 
-        // Convert page to free list
-        size_t size = binsize[bin];
-        List **b = &bucket[bin];
+            if (bin < B_PAGE)
+            {
+                size_t size = binsize[bin];
+                size_t bit_stride = size / 16;
+                size_t bit_base = pn * (PAGESIZE / 16);
+                size_t bit_top = bit_base + (PAGESIZE / 16);
+                byte*  p;
+
+                bit_i = bit_base;
+                for (; bit_i < bit_top; bit_i += bit_stride)
+                {
+                    if (!pool.freebits.test(bit_i))
+                        goto Lnotfree;
+                }
+                pool.pagetable[pn] = B_FREE;
+                recoveredpages++;
+                continue;
 
-        p = pool.baseAddr + pn * PAGESIZE;
-        ptop = p + PAGESIZE;
-        for (; p < ptop; p += size)
-        {
-            (cast(List *)p).next = *b;
-            *b = cast(List *)p;
+             Lnotfree:
+                p = pool.baseAddr + pn * PAGESIZE;
+                for (u = 0; u < PAGESIZE; u += size)
+                {
+                    bit_i = bit_base + u / 16;
+                    if (pool.freebits.test(bit_i))
+                    {
+                        assert ((p+u) >= pool.baseAddr);
+                        assert ((p+u) < pool.topAddr);
+                        List* list = cast(List*) (p + u);
+                        // avoid unnecesary writes (it really saves time)
+                        if (list.next != gc.free_list[bin])
+                            list.next = gc.free_list[bin];
+                        if (list.pool != pool)
+                            list.pool = pool;
+                        gc.free_list[bin] = list;
+                    }
+                }
+            }
         }
-        return 1;
     }
 
+    debug(COLLECT_PRINTF) printf("recovered pages = %d\n", recoveredpages);
+    debug(COLLECT_PRINTF) printf("\tfree'd %u bytes, %u pages from %u pools\n", freed, freedpages, gc.pools.length);
 
-    /**
-     * Marks a range of memory using the conservative bit mask.  Used for
-     * the stack, for the data segment, and additional memory ranges.
-     */
-    void mark_conservative(void* pbot, void* ptop)
+    return freedpages + recoveredpages;
+}
+
+
+/**
+ *
+ */
+uint getAttr(Pool* pool, size_t bit_i)
+in
+{
+    assert( pool );
+}
+body
+{
+    uint attrs;
+
+    if (pool.finals.nbits &&
+        pool.finals.test(bit_i))
+        attrs |= BlkAttr.FINALIZE;
+    if (pool.noscan.test(bit_i))
+        attrs |= BlkAttr.NO_SCAN;
+//        if (pool.nomove.nbits &&
+//            pool.nomove.test(bit_i))
+//            attrs |= BlkAttr.NO_MOVE;
+    return attrs;
+}
+
+
+/**
+ *
+ */
+void setAttr(Pool* pool, size_t bit_i, uint mask)
+in
+{
+    assert( pool );
+}
+body
+{
+    if (mask & BlkAttr.FINALIZE)
+    {
+        if (!pool.finals.nbits)
+            pool.finals.alloc(pool.mark.nbits);
+        pool.finals.set(bit_i);
+    }
+    if (mask & BlkAttr.NO_SCAN)
     {
-        mark(pbot, ptop, PointerMap.init.bits.ptr);
+        pool.noscan.set(bit_i);
     }
+//        if (mask & BlkAttr.NO_MOVE)
+//        {
+//            if (!pool.nomove.nbits)
+//                pool.nomove.alloc(pool.mark.nbits);
+//            pool.nomove.set(bit_i);
+//        }
+}
 
 
-    /**
-     * Search a range of memory values and mark any pointers into the GC pool.
-     */
-    void mark(void *pbot, void *ptop, size_t* pm_bitmask)
-    {
-        const BITS_PER_WORD = size_t.sizeof * 8;
+/**
+ *
+ */
+void clrAttr(Pool* pool, size_t bit_i, uint mask)
+in
+{
+    assert( pool );
+}
+body
+{
+    if (mask & BlkAttr.FINALIZE && pool.finals.nbits)
+        pool.finals.clear(bit_i);
+    if (mask & BlkAttr.NO_SCAN)
+        pool.noscan.clear(bit_i);
+//        if (mask & BlkAttr.NO_MOVE && pool.nomove.nbits)
+//            pool.nomove.clear(bit_i);
+}
 
-        void **p1 = cast(void **)pbot;
-        void **p2 = cast(void **)ptop;
-        size_t pcache = 0;
-        uint changes = 0;
 
-        size_t type_size = pm_bitmask[0];
-        size_t* pm_bits = pm_bitmask + 1;
 
-        //printf("marking range: %p -> %p\n", pbot, ptop);
-        for (; p1 + type_size <= p2; p1 += type_size) {
-            for (size_t n = 0; n < type_size; n++) {
-                // scan bit set for this word
-                if (!(pm_bits[n / BITS_PER_WORD] & (1 << (n % BITS_PER_WORD))))
-                    continue;
+void initialize()
+{
+    int dummy;
+    gc.stack_bottom = cast(char*)&dummy;
+    opts.parse(cstdlib.getenv("D_GC_OPTS"));
+    // If we are going to fork, make sure we have the needed OS support
+    if (opts.options.fork)
+        opts.options.fork = os.HAVE_SHARED && os.HAVE_FORK;
+    gc.lock = GCLock.classinfo;
+    gc.inited = 1;
+    setStackBottom(rt_stackBottom());
+    gc.stats = Stats(gc);
+}
 
-                void* p = *(p1 + n);
 
-                if (p < minAddr || p >= maxAddr)
-                    continue;
+//
+//
+//
+private void *malloc(size_t size, uint attrs, size_t* pm_bitmask)
+{
+    assert(size != 0);
 
-                if ((cast(size_t)p & ~(PAGESIZE-1)) == pcache)
-                    continue;
+    gc.stats.malloc_started(size, attrs, pm_bitmask);
+    scope (exit)
+        gc.stats.malloc_finished(p);
 
-                Pool* pool = findPool(p);
-                if (pool)
-                {
-                    size_t offset = cast(size_t)(p - pool.baseAddr);
-                    size_t bit_i;
-                    size_t pn = offset / PAGESIZE;
-                    Bins   bin = cast(Bins)pool.pagetable[pn];
-
-                    // Adjust bit to be at start of allocated memory block
-                    if (bin <= B_PAGE)
-                        bit_i = (offset & notbinsize[bin]) >> 4;
-                    else if (bin == B_PAGEPLUS)
-                    {
-                        do
-                        {
-                            --pn;
-                        }
-                        while (cast(Bins)pool.pagetable[pn] == B_PAGEPLUS);
-                        bit_i = pn * (PAGESIZE / 16);
-                    }
-                    else
-                    {
-                        // Don't mark bits in B_FREE pages
-                        continue;
-                    }
+    void *p = null;
+    Bins bin;
 
-                    if (bin >= B_PAGE) // Cache B_PAGE and B_PAGEPLUS lookups
-                        pcache = cast(size_t)p & ~(PAGESIZE-1);
+    if (opts.options.sentinel)
+        size += SENTINEL_EXTRA;
 
-                    if (!pool.mark.test(bit_i))
-                    {
-                        pool.mark.set(bit_i);
-                        if (!pool.noscan.test(bit_i))
-                        {
-                            pool.scan.set(bit_i);
-                            changes = 1;
-                        }
-                    }
-                }
-            }
-        }
-        anychanges |= changes;
+    bool has_pm = has_pointermap(attrs);
+    if (has_pm)
+        size += size_t.sizeof;
+
+    // Compute size bin
+    // Cache previous binsize lookup - Dave Fladebo.
+    static size_t lastsize = -1;
+    static Bins lastbin;
+    if (size == lastsize)
+        bin = lastbin;
+    else
+    {
+        bin = findBin(size);
+        lastsize = size;
+        lastbin = bin;
     }
 
-    /**
-     * Return number of full pages free'd.
-     */
-    size_t fullcollectshell()
+    Pool* pool = void;
+    size_t capacity = void; // to figure out where to store the bitmask
+    if (bin < B_PAGE)
     {
-        stats.collection_started();
-        scope (exit)
-            stats.collection_finished();
-
-        // The purpose of the 'shell' is to ensure all the registers
-        // get put on the stack so they'll be scanned
-        void *sp;
-        size_t result;
-        version (GNU)
+        p = gc.free_list[bin];
+        if (p is null)
         {
-            gcc.builtins.__builtin_unwind_init();
-            sp = & sp;
-        }
-        else version(LDC)
-        {
-            version(X86)
+            if (!allocPage(bin) && !gc.disabled)   // try to find a new page
             {
-                uint eax,ecx,edx,ebx,ebp,esi,edi;
-                asm
+                if (!thread_needLock())
                 {
-                    mov eax[EBP], EAX      ;
-                    mov ecx[EBP], ECX      ;
-                    mov edx[EBP], EDX      ;
-                    mov ebx[EBP], EBX      ;
-                    mov ebp[EBP], EBP      ;
-                    mov esi[EBP], ESI      ;
-                    mov edi[EBP], EDI      ;
-                    mov  sp[EBP], ESP      ;
+                    /* Then we haven't locked it yet. Be sure
+                     * and gc.lock for a collection, since a finalizer
+                     * may start a new thread.
+                     */
+                    synchronized (gc.lock)
+                    {
+                        fullcollectshell();
+                    }
                 }
-            }
-            else version (X86_64)
-            {
-                ulong rax,rbx,rcx,rdx,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15;
-                asm
+                else if (!fullcollectshell())       // collect to find a new page
                 {
-                    movq rax[RBP], RAX      ;
-                    movq rbx[RBP], RBX      ;
-                    movq rcx[RBP], RCX      ;
-                    movq rdx[RBP], RDX      ;
-                    movq rbp[RBP], RBP      ;
-                    movq rsi[RBP], RSI      ;
-                    movq rdi[RBP], RDI      ;
-                    movq r8 [RBP], R8       ;
-                    movq r9 [RBP], R9       ;
-                    movq r10[RBP], R10      ;
-                    movq r11[RBP], R11      ;
-                    movq r12[RBP], R12      ;
-                    movq r13[RBP], R13      ;
-                    movq r14[RBP], R14      ;
-                    movq r15[RBP], R15      ;
-                    movq  sp[RBP], RSP      ;
+                    //newPool(1);
                 }
             }
-            else
+            if (!gc.free_list[bin] && !allocPage(bin))
             {
-                static assert( false, "Architecture not supported." );
+                newPool(1);         // allocate new pool to find a new page
+                // TODO: hint allocPage() to use the pool we just created
+                int result = allocPage(bin);
+                if (!result)
+                    onOutOfMemoryError();
             }
-        }
-        else
-        {
-        asm
-        {
-            pushad              ;
-            mov sp[EBP],ESP     ;
-        }
-        }
-        result = fullcollect(sp);
-        version (GNU)
-        {
-            // nothing to do
-        }
-        else version(LDC)
-        {
-            // nothing to do
-        }
-        else
-        {
-        asm
-        {
-            popad               ;
-        }
-        }
-        return result;
+            p = gc.free_list[bin];
+        }
+        capacity = binsize[bin];
+
+        // Return next item from free list
+        List* list = cast(List*) p;
+        assert ((cast(byte*)list) >= list.pool.baseAddr);
+        assert ((cast(byte*)list) < list.pool.topAddr);
+        gc.free_list[bin] = list.next;
+        pool = list.pool;
+        if (!(attrs & BlkAttr.NO_SCAN))
+            memset(p + size, 0, capacity - size);
+        if (opts.options.mem_stomp)
+            memset(p, 0xF0, size);
+    }
+    else
+    {
+        p = bigAlloc(size, pool);
+        if (!p)
+            onOutOfMemoryError();
+        assert (pool !is null);
+        // Round the size up to the number of pages needed to store it
+        size_t npages = (size + PAGESIZE - 1) / PAGESIZE;
+        capacity = npages * PAGESIZE;
     }
 
+    // Store the bit mask AFTER SENTINEL_POST
+    // TODO: store it BEFORE, so the bitmask is protected too
+    if (has_pm) {
+        auto end_of_blk = cast(size_t**)(p + capacity - size_t.sizeof);
+        *end_of_blk = pm_bitmask;
+        size -= size_t.sizeof;
+    }
 
-    /**
-     *
-     */
-    size_t fullcollect(void *stackTop)
-    {
-        size_t n;
-        Pool*  pool;
+    if (opts.options.sentinel) {
+        size -= SENTINEL_EXTRA;
+        p = sentinel_add(p);
+        sentinel_init(p, size);
+    }
 
-        debug(COLLECT_PRINTF) printf("Gcx.fullcollect()\n");
+    if (attrs)
+        setAttr(pool, cast(size_t)(p - pool.baseAddr) / 16, attrs);
 
-        thread_suspendAll();
-        stats.world_stopped();
+    return p;
+}
 
-        p_cache = null;
-        size_cache = 0;
 
-        anychanges = 0;
-        for (n = 0; n < pools.length; n++)
-        {
-            pool = pools[n];
-            pool.mark.zero();
-            pool.scan.zero();
-            pool.freebits.zero();
-        }
+//
+//
+//
+private void *calloc(size_t size, uint attrs, size_t* pm_bitmask)
+{
+    assert(size != 0);
+
+    void *p = malloc(size, attrs, pm_bitmask);
+    memset(p, 0, size);
+    return p;
+}
 
-        // Mark each free entry, so it doesn't get scanned
-        for (n = 0; n < B_PAGE; n++)
-        {
-            for (List *list = bucket[n]; list; list = list.next)
-            {
-                pool = findPool(list);
-                assert(pool);
-                pool.freebits.set(cast(size_t)(cast(byte*)list - pool.baseAddr) / 16);
-            }
-        }
 
-        for (n = 0; n < pools.length; n++)
+//
+//
+//
+private void *realloc(void *p, size_t size, uint attrs,
+        size_t* pm_bitmask)
+{
+    if (!size)
+    {
+        if (p)
         {
-            pool = pools[n];
-            pool.mark.copy(&pool.freebits);
+            free(p);
+            p = null;
         }
+    }
+    else if (!p)
+    {
+        p = malloc(size, attrs, pm_bitmask);
+    }
+    else
+    {
+        Pool* pool = findPool(p);
+        if (pool is null)
+            return null;
 
-        rt_scanStaticData( &mark_conservative );
-
-        if (!noStack)
-        {
-            // Scan stacks and registers for each paused thread
-            thread_scanAll( &mark_conservative, stackTop );
+        // Set or retrieve attributes as appropriate
+        auto bit_i = cast(size_t)(p - pool.baseAddr) / 16;
+        if (attrs) {
+            clrAttr(pool, bit_i, BlkAttr.ALL_BITS);
+            setAttr(pool, bit_i, attrs);
         }
+        else
+            attrs = getAttr(pool, bit_i);
 
-        // Scan roots
-        debug(COLLECT_PRINTF) printf("scan roots[]\n");
-        mark_conservative(roots.ptr, roots.ptr + roots.length);
+        void* blk_base_addr = pool.findBase(p);
+        size_t blk_size = pool.findSize(p);
+        bool has_pm = has_pointermap(attrs);
+        size_t pm_bitmask_size = 0;
+        if (has_pm) {
+            pm_bitmask_size = size_t.sizeof;
+            // Retrieve pointer map bit mask if appropriate
+            if (pm_bitmask is null) {
+                auto end_of_blk = cast(size_t**)(blk_base_addr +
+                        blk_size - size_t.sizeof);
+                pm_bitmask = *end_of_blk;
+            }
+        }
 
-        // Scan ranges
-        debug(COLLECT_PRINTF) printf("scan ranges[]\n");
-        for (n = 0; n < ranges.length; n++)
+        if (opts.options.sentinel)
         {
-            debug(COLLECT_PRINTF) printf("\t%x .. %x\n", ranges[n].pbot, ranges[n].ptop);
-            mark_conservative(ranges[n].pbot, ranges[n].ptop);
+            sentinel_Invariant(p);
+            size_t sentinel_stored_size = *sentinel_size(p);
+            if (sentinel_stored_size != size)
+            {
+                void* p2 = malloc(size, attrs, pm_bitmask);
+                if (sentinel_stored_size < size)
+                    size = sentinel_stored_size;
+                cstring.memcpy(p2, p, size);
+                p = p2;
+            }
         }
-
-        debug(COLLECT_PRINTF) printf("\tscan heap\n");
-        while (anychanges)
+        else
         {
-            anychanges = 0;
-            for (n = 0; n < pools.length; n++)
+            size += pm_bitmask_size;
+            if (blk_size >= PAGESIZE && size >= PAGESIZE)
             {
-                uint *bbase;
-                uint *b;
-                uint *btop;
+                auto psz = blk_size / PAGESIZE;
+                auto newsz = (size + PAGESIZE - 1) / PAGESIZE;
+                if (newsz == psz)
+                    return p;
 
-                pool = pools[n];
+                auto pagenum = (p - pool.baseAddr) / PAGESIZE;
 
-                bbase = pool.scan.base();
-                btop = bbase + pool.scan.nwords;
-                for (b = bbase; b < btop;)
+                if (newsz < psz)
                 {
-                    Bins   bin;
-                    size_t pn;
-                    size_t u;
-                    size_t bitm;
-                    byte*  o;
-
-                    bitm = *b;
-                    if (!bitm)
-                    {
-                        b++;
-                        continue;
-                    }
-                    *b = 0;
-
-                    o = pool.baseAddr + (b - bbase) * 32 * 16;
-                    if (!(bitm & 0xFFFF))
-                    {
-                        bitm >>= 16;
-                        o += 16 * 16;
+                    // Shrink in place
+                    if (opts.options.mem_stomp)
+                        memset(p + size - pm_bitmask_size, 0xF2,
+                                blk_size - size - pm_bitmask_size);
+                    pool.freePages(pagenum + newsz, psz - newsz);
+                    auto new_blk_size = (PAGESIZE * newsz);
+                    // update the size cache, assuming that is very likely the
+                    // size of this block will be queried in the near future
+                    pool.update_cache(p, new_blk_size);
+                    if (has_pm) {
+                        auto end_of_blk = cast(size_t**)(blk_base_addr +
+                                new_blk_size - pm_bitmask_size);
+                        *end_of_blk = pm_bitmask;
                     }
-                    for (; bitm; o += 16, bitm >>= 1)
+                    return p;
+                }
+                else if (pagenum + newsz <= pool.npages)
+                {
+                    // Attempt to expand in place
+                    for (size_t i = pagenum + psz; 1;)
                     {
-                        if (!(bitm & 1))
-                            continue;
-
-                        pn = cast(size_t)(o - pool.baseAddr) / PAGESIZE;
-                        bin = cast(Bins)pool.pagetable[pn];
-                        if (bin < B_PAGE) {
-                            if (opts.options.conservative)
-                                mark_conservative(o, o + binsize[bin]);
-                            else {
-                                auto end_of_blk = cast(size_t**)(o +
-                                        binsize[bin] - size_t.sizeof);
-                                size_t* pm_bitmask = *end_of_blk;
-                                mark(o, end_of_blk, pm_bitmask);
+                        if (i == pagenum + newsz)
+                        {
+                            if (opts.options.mem_stomp)
+                                memset(p + blk_size - pm_bitmask_size,
+                                        0xF0, size - blk_size
+                                        - pm_bitmask_size);
+                            memset(pool.pagetable + pagenum +
+                                    psz, B_PAGEPLUS, newsz - psz);
+                            auto new_blk_size = (PAGESIZE * newsz);
+                            // update the size cache, assuming that is very
+                            // likely the size of this block will be queried in
+                            // the near future
+                            pool.update_cache(p, new_blk_size);
+                            if (has_pm) {
+                                auto end_of_blk = cast(size_t**)(
+                                        blk_base_addr + new_blk_size -
+                                        pm_bitmask_size);
+                                *end_of_blk = pm_bitmask;
                             }
+                            return p;
                         }
-                        else if (bin == B_PAGE || bin == B_PAGEPLUS)
+                        if (i == pool.npages)
                         {
-                            if (bin == B_PAGEPLUS)
-                            {
-                                while (pool.pagetable[pn - 1] != B_PAGE)
-                                    pn--;
-                            }
-                            u = 1;
-                            while (pn + u < pool.npages &&
-                                    pool.pagetable[pn + u] == B_PAGEPLUS)
-                                u++;
-
-                            size_t blk_size = u * PAGESIZE;
-                            if (opts.options.conservative)
-                                mark_conservative(o, o + blk_size);
-                            else {
-                                auto end_of_blk = cast(size_t**)(o + blk_size -
-                                        size_t.sizeof);
-                                size_t* pm_bitmask = *end_of_blk;
-                                mark(o, end_of_blk, pm_bitmask);
-                            }
+                            break;
                         }
+                        if (pool.pagetable[i] != B_FREE)
+                            break;
+                        i++;
                     }
                 }
             }
+            // if new size is bigger or less than half
+            if (blk_size < size || blk_size > size * 2)
+            {
+                size -= pm_bitmask_size;
+                blk_size -= pm_bitmask_size;
+                void* p2 = malloc(size, attrs, pm_bitmask);
+                if (blk_size < size)
+                    size = blk_size;
+                cstring.memcpy(p2, p, size);
+                p = p2;
+            }
         }
+    }
+    return p;
+}
 
-        thread_resumeAll();
-        stats.world_started();
 
-        // Free up everything not marked
-        debug(COLLECT_PRINTF) printf("\tfree'ing\n");
-        size_t freedpages = 0;
-        size_t freed = 0;
-        for (n = 0; n < pools.length; n++)
-        {
-            pool = pools[n];
-            uint*  bbase = pool.mark.base();
-            size_t pn;
-            for (pn = 0; pn < pool.npages; pn++, bbase += PAGESIZE / (32 * 16))
-            {
-                Bins bin = cast(Bins)pool.pagetable[pn];
-
-                if (bin < B_PAGE)
-                {
-                    auto size = binsize[bin];
-                    byte* p = pool.baseAddr + pn * PAGESIZE;
-                    byte* ptop = p + PAGESIZE;
-                    size_t bit_i = pn * (PAGESIZE/16);
-                    size_t bit_stride = size / 16;
-
-    version(none) // BUG: doesn't work because freebits() must also be cleared
-    {
-                    // If free'd entire page
-                    if (bbase[0] == 0 && bbase[1] == 0 && bbase[2] == 0 &&
-                            bbase[3] == 0 && bbase[4] == 0 && bbase[5] == 0 &&
-                            bbase[6] == 0 && bbase[7] == 0)
-                    {
-                        for (; p < ptop; p += size, bit_i += bit_stride)
-                        {
-                            if (pool.finals.nbits && pool.finals.testClear(bit_i)) {
-                                if (opts.options.sentinel)
-                                    rt_finalize(cast(List *)sentinel_add(p), false/*noStack > 0*/);
-                                else
-                                    rt_finalize(cast(List *)p, false/*noStack > 0*/);
-                            }
-                            this.clrAttr(pool, bit_i, BlkAttr.ALL_BITS);
-
-                            List *list = cast(List *)p;
-
-                            if (opts.options.mem_stomp)
-                                memset(p, 0xF3, size);
-                        }
-                        pool.pagetable[pn] = B_FREE;
-                        freed += PAGESIZE;
-                        continue;
-                    }
-    }
-                    for (; p < ptop; p += size, bit_i += bit_stride)
-                    {
-                        if (!pool.mark.test(bit_i))
-                        {
-                            if (opts.options.sentinel)
-                                sentinel_Invariant(sentinel_add(p));
-
-                            pool.freebits.set(bit_i);
-                            if (pool.finals.nbits && pool.finals.testClear(bit_i)) {
-                                if (opts.options.sentinel)
-                                    rt_finalize(cast(List *)sentinel_add(p), false/*noStack > 0*/);
-                                else
-                                    rt_finalize(cast(List *)p, false/*noStack > 0*/);
-                            }
-                            clrAttr(pool, bit_i, BlkAttr.ALL_BITS);
-
-                            List *list = cast(List *)p;
-
-                            if (opts.options.mem_stomp)
-                                memset(p, 0xF3, size);
-
-                            freed += size;
-                        }
-                    }
-                }
-                else if (bin == B_PAGE)
-                {
-                    size_t bit_i = pn * (PAGESIZE / 16);
-                    if (!pool.mark.test(bit_i))
-                    {
-                        byte *p = pool.baseAddr + pn * PAGESIZE;
-                        if (opts.options.sentinel)
-                            sentinel_Invariant(sentinel_add(p));
-                        if (pool.finals.nbits && pool.finals.testClear(bit_i)) {
-                            if (opts.options.sentinel)
-                                rt_finalize(sentinel_add(p), false/*noStack > 0*/);
-                            else
-                                rt_finalize(p, false/*noStack > 0*/);
-                        }
-                        clrAttr(pool, bit_i, BlkAttr.ALL_BITS);
-
-                        debug(COLLECT_PRINTF) printf("\tcollecting big %x\n", p);
-                        pool.pagetable[pn] = B_FREE;
-                        freedpages++;
-                        if (opts.options.mem_stomp)
-                            memset(p, 0xF3, PAGESIZE);
-                        while (pn + 1 < pool.npages && pool.pagetable[pn + 1] == B_PAGEPLUS)
-                        {
-                            pn++;
-                            pool.pagetable[pn] = B_FREE;
-                            freedpages++;
-
-                            if (opts.options.mem_stomp)
-                            {
-                                p += PAGESIZE;
-                                memset(p, 0xF3, PAGESIZE);
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        // Zero buckets
-        bucket[] = null;
-
-        // Free complete pages, rebuild free list
-        debug(COLLECT_PRINTF) printf("\tfree complete pages\n");
-        size_t recoveredpages = 0;
-        for (n = 0; n < pools.length; n++)
-        {
-            pool = pools[n];
-            for (size_t pn = 0; pn < pool.npages; pn++)
-            {
-                Bins   bin = cast(Bins)pool.pagetable[pn];
-                size_t bit_i;
-                size_t u;
-
-                if (bin < B_PAGE)
-                {
-                    size_t size = binsize[bin];
-                    size_t bit_stride = size / 16;
-                    size_t bit_base = pn * (PAGESIZE / 16);
-                    size_t bit_top = bit_base + (PAGESIZE / 16);
-                    byte*  p;
-
-                    bit_i = bit_base;
-                    for (; bit_i < bit_top; bit_i += bit_stride)
-                    {
-                        if (!pool.freebits.test(bit_i))
-                            goto Lnotfree;
-                    }
-                    pool.pagetable[pn] = B_FREE;
-                    recoveredpages++;
-                    continue;
-
-                 Lnotfree:
-                    p = pool.baseAddr + pn * PAGESIZE;
-                    for (u = 0; u < PAGESIZE; u += size)
-                    {
-                        bit_i = bit_base + u / 16;
-                        if (pool.freebits.test(bit_i))
-                        {
-                            List *list = cast(List *)(p + u);
-                            // avoid unnecessary writes
-                            if (list.next != bucket[bin])
-                                list.next = bucket[bin];
-                            bucket[bin] = list;
-                        }
-                    }
-                }
-            }
-        }
-
-        debug(COLLECT_PRINTF) printf("recovered pages = %d\n", recoveredpages);
-        debug(COLLECT_PRINTF) printf("\tfree'd %u bytes, %u pages from %u pools\n", freed, freedpages, pools.length);
-
-        return freedpages + recoveredpages;
-    }
-
-
-    /**
-     *
-     */
-    uint getAttr(Pool* pool, size_t bit_i)
-    in
-    {
-        assert( pool );
-    }
-    body
-    {
-        uint attrs;
-
-        if (pool.finals.nbits &&
-            pool.finals.test(bit_i))
-            attrs |= BlkAttr.FINALIZE;
-        if (pool.noscan.test(bit_i))
-            attrs |= BlkAttr.NO_SCAN;
-//        if (pool.nomove.nbits &&
-//            pool.nomove.test(bit_i))
-//            attrs |= BlkAttr.NO_MOVE;
-        return attrs;
-    }
-
-
-    /**
-     *
-     */
-    void setAttr(Pool* pool, size_t bit_i, uint mask)
-    in
-    {
-        assert( pool );
-    }
-    body
-    {
-        if (mask & BlkAttr.FINALIZE)
-        {
-            if (!pool.finals.nbits)
-                pool.finals.alloc(pool.mark.nbits);
-            pool.finals.set(bit_i);
-        }
-        if (mask & BlkAttr.NO_SCAN)
-        {
-            pool.noscan.set(bit_i);
-        }
-//        if (mask & BlkAttr.NO_MOVE)
-//        {
-//            if (!pool.nomove.nbits)
-//                pool.nomove.alloc(pool.mark.nbits);
-//            pool.nomove.set(bit_i);
-//        }
-    }
-
-
-    /**
-     *
-     */
-    void clrAttr(Pool* pool, size_t bit_i, uint mask)
-    in
-    {
-        assert( pool );
-    }
-    body
-    {
-        if (mask & BlkAttr.FINALIZE && pool.finals.nbits)
-            pool.finals.clear(bit_i);
-        if (mask & BlkAttr.NO_SCAN)
-            pool.noscan.clear(bit_i);
-//        if (mask & BlkAttr.NO_MOVE && pool.nomove.nbits)
-//            pool.nomove.clear(bit_i);
-    }
-
-
-
-    void initialize()
-    {
-        int dummy;
-        stackBottom = cast(char*)&dummy;
-        opts.parse(cstdlib.getenv("D_GC_OPTS"));
-        lock = GCLock.classinfo;
-        inited = 1;
-        setStackBottom(rt_stackBottom());
-        stats = Stats(this);
-    }
-
-
-    /**
-     *
-     */
-    void enable()
-    {
-        if (!thread_needLock())
-        {
-            assert(this.disabled > 0);
-            this.disabled--;
-        }
-        else synchronized (lock)
-        {
-            assert(this.disabled > 0);
-            this.disabled--;
-        }
-    }
-
-
-    /**
-     *
-     */
-    void disable()
-    {
-        if (!thread_needLock())
-        {
-            this.disabled++;
-        }
-        else synchronized (lock)
-        {
-            this.disabled++;
-        }
-    }
-
-
-    /**
-     *
-     */
-    uint getAttr(void* p)
-    {
-        if (!p)
-        {
-            return 0;
-        }
-
-        uint go()
-        {
-            Pool* pool = this.findPool(p);
-            uint  old_attrs = 0;
-
-            if (pool)
-            {
-                auto bit_i = cast(size_t)(p - pool.baseAddr) / 16;
-
-                old_attrs = this.getAttr(pool, bit_i);
-            }
-            return old_attrs;
-        }
-
-        if (!thread_needLock())
-        {
-            return go();
-        }
-        else synchronized (lock)
-        {
-            return go();
-        }
-    }
-
-
-    /**
-     *
-     */
-    uint setAttr(void* p, uint mask)
-    {
-        if (!p)
-        {
-            return 0;
-        }
-
-        uint go()
-        {
-            Pool* pool = this.findPool(p);
-            uint  old_attrs = 0;
-
-            if (pool)
-            {
-                auto bit_i = cast(size_t)(p - pool.baseAddr) / 16;
-
-                old_attrs = this.getAttr(pool, bit_i);
-                this.setAttr(pool, bit_i, mask);
-            }
-            return old_attrs;
-        }
-
-        if (!thread_needLock())
-        {
-            return go();
-        }
-        else synchronized (lock)
-        {
-            return go();
-        }
-    }
-
-
-    /**
-     *
-     */
-    uint clrAttr(void* p, uint mask)
-    {
-        if (!p)
-        {
-            return 0;
-        }
-
-        uint go()
-        {
-            Pool* pool = this.findPool(p);
-            uint  old_attrs = 0;
-
-            if (pool)
-            {
-                auto bit_i = cast(size_t)(p - pool.baseAddr) / 16;
-
-                old_attrs = this.getAttr(pool, bit_i);
-                this.clrAttr(pool, bit_i, mask);
-            }
-            return old_attrs;
-        }
-
-        if (!thread_needLock())
-        {
-            return go();
-        }
-        else synchronized (lock)
-        {
-            return go();
-        }
-    }
-
-
-    /**
-     *
-     */
-    void *malloc(size_t size, uint attrs, PointerMap ptrmap)
-    {
-        if (!size)
-        {
-            return null;
-        }
-
-        if (!thread_needLock())
-        {
-            return mallocNoSync(size, attrs, ptrmap.bits.ptr);
-        }
-        else synchronized (lock)
-        {
-            return mallocNoSync(size, attrs, ptrmap.bits.ptr);
-        }
-    }
-
-
-    //
-    //
-    //
-    private void *mallocNoSync(size_t size, uint attrs, size_t* pm_bitmask)
-    {
-        assert(size != 0);
-
-        stats.malloc_started(size, attrs, pm_bitmask);
-        scope (exit)
-            stats.malloc_finished(p);
-
-        void *p = null;
-        Bins bin;
-
-        if (opts.options.sentinel)
-            size += SENTINEL_EXTRA;
-
-        bool has_pm = has_pointermap(attrs);
-        if (has_pm)
-            size += size_t.sizeof;
-
-        // Compute size bin
-        // Cache previous binsize lookup - Dave Fladebo.
-        static size_t lastsize = -1;
-        static Bins lastbin;
-        if (size == lastsize)
-            bin = lastbin;
-        else
-        {
-            bin = this.findBin(size);
-            lastsize = size;
-            lastbin = bin;
-        }
-
-        size_t capacity; // to figure out where to store the bitmask
-        if (bin < B_PAGE)
-        {
-            p = this.bucket[bin];
-            if (p is null)
-            {
-                if (!this.allocPage(bin) && !this.disabled)   // try to find a new page
-                {
-                    if (!thread_needLock())
-                    {
-                        /* Then we haven't locked it yet. Be sure
-                         * and lock for a collection, since a finalizer
-                         * may start a new thread.
-                         */
-                        synchronized (lock)
-                        {
-                            this.fullcollectshell();
-                        }
-                    }
-                    else if (!this.fullcollectshell())       // collect to find a new page
-                    {
-                        //this.newPool(1);
-                    }
-                }
-                if (!this.bucket[bin] && !this.allocPage(bin))
-                {
-                    this.newPool(1);         // allocate new pool to find a new page
-                    int result = this.allocPage(bin);
-                    if (!result)
-                        onOutOfMemoryError();
-                }
-                p = this.bucket[bin];
-            }
-            capacity = binsize[bin];
-
-            // Return next item from free list
-            this.bucket[bin] = (cast(List*)p).next;
-            if (!(attrs & BlkAttr.NO_SCAN))
-                memset(p + size, 0, capacity - size);
-            if (opts.options.mem_stomp)
-                memset(p, 0xF0, size);
-        }
-        else
-        {
-            p = this.bigAlloc(size);
-            if (!p)
-                onOutOfMemoryError();
-            // Round the size up to the number of pages needed to store it
-            size_t npages = (size + PAGESIZE - 1) / PAGESIZE;
-            capacity = npages * PAGESIZE;
-        }
-
-        // Store the bit mask AFTER SENTINEL_POST
-        // TODO: store it BEFORE, so the bitmask is protected too
-        if (has_pm) {
-            auto end_of_blk = cast(size_t**)(p + capacity - size_t.sizeof);
-            *end_of_blk = pm_bitmask;
-            size -= size_t.sizeof;
-        }
-
-        if (opts.options.sentinel) {
-            size -= SENTINEL_EXTRA;
-            p = sentinel_add(p);
-            sentinel_init(p, size);
-        }
-
-        if (attrs)
-        {
-            Pool *pool = this.findPool(p);
-            assert(pool);
-
-            this.setAttr(pool, cast(size_t)(p - pool.baseAddr) / 16, attrs);
-        }
-        return p;
-    }
-
-
-    /**
-     *
-     */
-    void *calloc(size_t size, uint attrs, PointerMap ptrmap)
-    {
-        if (!size)
-        {
-            return null;
-        }
-
-        if (!thread_needLock())
-        {
-            return callocNoSync(size, attrs, ptrmap.bits.ptr);
-        }
-        else synchronized (lock)
-        {
-            return callocNoSync(size, attrs, ptrmap.bits.ptr);
-        }
-    }
-
-
-    //
-    //
-    //
-    private void *callocNoSync(size_t size, uint attrs, size_t* pm_bitmask)
-    {
-        assert(size != 0);
-
-        void *p = mallocNoSync(size, attrs, pm_bitmask);
-        memset(p, 0, size);
-        return p;
-    }
-
-
-    /**
-     *
-     */
-    void *realloc(void *p, size_t size, uint attrs, PointerMap ptrmap)
-    {
-        if (!thread_needLock())
-        {
-            return reallocNoSync(p, size, attrs, ptrmap.bits.ptr);
-        }
-        else synchronized (lock)
-        {
-            return reallocNoSync(p, size, attrs, ptrmap.bits.ptr);
-        }
-    }
-
-
-    //
-    //
-    //
-    private void *reallocNoSync(void *p, size_t size, uint attrs,
-            size_t* pm_bitmask)
-    {
-        if (!size)
-        {
-            if (p)
-            {
-                freeNoSync(p);
-                p = null;
-            }
-        }
-        else if (!p)
-        {
-            p = mallocNoSync(size, attrs, pm_bitmask);
-        }
-        else
-        {
-            Pool* pool = this.findPool(p);
-            if (pool is null)
-                return null;
-
-            // Set or retrieve attributes as appropriate
-            auto bit_i = cast(size_t)(p - pool.baseAddr) / 16;
-            if (attrs) {
-                this.clrAttr(pool, bit_i, BlkAttr.ALL_BITS);
-                this.setAttr(pool, bit_i, attrs);
-            }
-            else
-                attrs = this.getAttr(pool, bit_i);
-
-            void* blk_base_addr = this.findBase(p);
-            size_t blk_size = this.findSize(p);
-            bool has_pm = has_pointermap(attrs);
-            size_t pm_bitmask_size = 0;
-            if (has_pm) {
-                pm_bitmask_size = size_t.sizeof;
-                // Retrieve pointer map bit mask if appropriate
-                if (pm_bitmask is null) {
-                    auto end_of_blk = cast(size_t**)(blk_base_addr +
-                            blk_size - size_t.sizeof);
-                    pm_bitmask = *end_of_blk;
-                }
-            }
-
-            if (opts.options.sentinel)
-            {
-                sentinel_Invariant(p);
-                size_t sentinel_stored_size = *sentinel_size(p);
-                if (sentinel_stored_size != size)
-                {
-                    void* p2 = mallocNoSync(size, attrs, pm_bitmask);
-                    if (sentinel_stored_size < size)
-                        size = sentinel_stored_size;
-                    cstring.memcpy(p2, p, size);
-                    p = p2;
-                }
-            }
-            else
-            {
-                size += pm_bitmask_size;
-                if (blk_size >= PAGESIZE && size >= PAGESIZE)
-                {
-                    auto psz = blk_size / PAGESIZE;
-                    auto newsz = (size + PAGESIZE - 1) / PAGESIZE;
-                    if (newsz == psz)
-                        return p;
-
-                    auto pagenum = (p - pool.baseAddr) / PAGESIZE;
-
-                    if (newsz < psz)
-                    {
-                        // Shrink in place
-                        synchronized (lock)
-                        {
-                            if (opts.options.mem_stomp)
-                                memset(p + size - pm_bitmask_size, 0xF2,
-                                        blk_size - size - pm_bitmask_size);
-                            pool.freePages(pagenum + newsz, psz - newsz);
-                        }
-                        if (has_pm) {
-                            auto end_of_blk = cast(size_t**)(
-                                    blk_base_addr + (PAGESIZE * newsz) -
-                                    pm_bitmask_size);
-                            *end_of_blk = pm_bitmask;
-                        }
-                        return p;
-                    }
-                    else if (pagenum + newsz <= pool.npages)
-                    {
-                        // Attempt to expand in place
-                        synchronized (lock)
-                        {
-                            for (size_t i = pagenum + psz; 1;)
-                            {
-                                if (i == pagenum + newsz)
-                                {
-                                    if (opts.options.mem_stomp)
-                                        memset(p + blk_size - pm_bitmask_size,
-                                                0xF0, size - blk_size
-                                                - pm_bitmask_size);
-                                    memset(pool.pagetable + pagenum +
-                                            psz, B_PAGEPLUS, newsz - psz);
-                                    if (has_pm) {
-                                        auto end_of_blk = cast(size_t**)(
-                                                blk_base_addr +
-                                                (PAGESIZE * newsz) -
-                                                pm_bitmask_size);
-                                        *end_of_blk = pm_bitmask;
-                                    }
-                                    return p;
-                                }
-                                if (i == pool.npages)
-                                {
-                                    break;
-                                }
-                                if (pool.pagetable[i] != B_FREE)
-                                    break;
-                                i++;
-                            }
-                        }
-                    }
-                }
-                // if new size is bigger or less than half
-                if (blk_size < size || blk_size > size * 2)
-                {
-                    size -= pm_bitmask_size;
-                    blk_size -= pm_bitmask_size;
-                    void* p2 = mallocNoSync(size, attrs, pm_bitmask);
-                    if (blk_size < size)
-                        size = blk_size;
-                    cstring.memcpy(p2, p, size);
-                    p = p2;
-                }
-            }
-        }
-        return p;
-    }
-
-
-    /**
-     * Attempt to in-place enlarge the memory block pointed to by p by at least
-     * minbytes beyond its current capacity, up to a maximum of maxsize.  This
-     * does not attempt to move the memory block (like realloc() does).
-     *
-     * Returns:
-     *  0 if could not extend p,
-     *  total size of entire memory block if successful.
-     */
-    size_t extend(void* p, size_t minsize, size_t maxsize)
-    {
-        if (!thread_needLock())
-        {
-            return extendNoSync(p, minsize, maxsize);
-        }
-        else synchronized (lock)
-        {
-            return extendNoSync(p, minsize, maxsize);
-        }
-    }
-
-
-    //
-    //
-    //
-    private size_t extendNoSync(void* p, size_t minsize, size_t maxsize)
-    in
-    {
-        assert( minsize <= maxsize );
-    }
-    body
-    {
-        if (opts.options.sentinel)
-            return 0;
-
-        Pool* pool = this.findPool(p);
-        if (pool is null)
-            return 0;
-
-        // Retrieve attributes
-        auto bit_i = cast(size_t)(p - pool.baseAddr) / 16;
-        uint attrs = this.getAttr(pool, bit_i);
-
-        void* blk_base_addr = this.findBase(p);
-        size_t blk_size = this.findSize(p);
-        bool has_pm = has_pointermap(attrs);
-        size_t* pm_bitmask = null;
-        size_t pm_bitmask_size = 0;
-        if (has_pm) {
-            pm_bitmask_size = size_t.sizeof;
-            // Retrieve pointer map bit mask
-            auto end_of_blk = cast(size_t**)(blk_base_addr +
-                    blk_size - size_t.sizeof);
-            pm_bitmask = *end_of_blk;
-
-            minsize += size_t.sizeof;
-            maxsize += size_t.sizeof;
-        }
-
-        if (blk_size < PAGESIZE)
-            return 0; // cannot extend buckets
-
-        auto psz = blk_size / PAGESIZE;
-        auto minsz = (minsize + PAGESIZE - 1) / PAGESIZE;
-        auto maxsz = (maxsize + PAGESIZE - 1) / PAGESIZE;
-
-        auto pagenum = (p - pool.baseAddr) / PAGESIZE;
-
-        size_t sz;
-        for (sz = 0; sz < maxsz; sz++)
-        {
-            auto i = pagenum + psz + sz;
-            if (i == pool.npages)
-                break;
-            if (pool.pagetable[i] != B_FREE)
-            {
-                if (sz < minsz)
-                    return 0;
-                break;
-            }
-        }
-        if (sz < minsz)
-            return 0;
-
-        size_t new_size = (psz + sz) * PAGESIZE;
-
-        if (opts.options.mem_stomp)
-            memset(p + blk_size - pm_bitmask_size, 0xF0,
-                    new_size - blk_size - pm_bitmask_size);
-        memset(pool.pagetable + pagenum + psz, B_PAGEPLUS, sz);
-        this.p_cache = null;
-        this.size_cache = 0;
-
-        if (has_pm) {
-            new_size -= size_t.sizeof;
-            auto end_of_blk = cast(size_t**)(blk_base_addr + new_size);
-            *end_of_blk = pm_bitmask;
-        }
-        return new_size;
-    }
-
-
-    /**
-     *
-     */
-    size_t reserve(size_t size)
-    {
-        if (!size)
-        {
-            return 0;
-        }
-
-        if (!thread_needLock())
-        {
-            return reserveNoSync(size);
-        }
-        else synchronized (lock)
-        {
-            return reserveNoSync(size);
-        }
-    }
-
-
-    /**
-     *
-     */
-    void free(void *p)
-    {
-        if (!p)
-        {
-            return;
-        }
-
-        if (!thread_needLock())
-        {
-            return freeNoSync(p);
-        }
-        else synchronized (lock)
-        {
-            return freeNoSync(p);
-        }
-    }
-
-
-    //
-    //
-    //
-    private void freeNoSync(void *p)
-    {
-        assert (p);
-
-        Pool*  pool;
-        size_t pagenum;
-        Bins   bin;
-        size_t bit_i;
-
-        // Find which page it is in
-        pool = this.findPool(p);
-        if (!pool)                              // if not one of ours
-            return;                             // ignore
-        if (opts.options.sentinel) {
-            sentinel_Invariant(p);
-            p = sentinel_sub(p);
-        }
-        pagenum = cast(size_t)(p - pool.baseAddr) / PAGESIZE;
-        bit_i = cast(size_t)(p - pool.baseAddr) / 16;
-        this.clrAttr(pool, bit_i, BlkAttr.ALL_BITS);
-
-        bin = cast(Bins)pool.pagetable[pagenum];
-        if (bin == B_PAGE)              // if large alloc
-        {
-            // Free pages
-            size_t npages = 1;
-            size_t n = pagenum;
-            while (++n < pool.npages && pool.pagetable[n] == B_PAGEPLUS)
-                npages++;
-            if (opts.options.mem_stomp)
-                memset(p, 0xF2, npages * PAGESIZE);
-            pool.freePages(pagenum, npages);
-        }
-        else
-        {
-            // Add to free list
-            List *list = cast(List*)p;
-
-            if (opts.options.mem_stomp)
-                memset(p, 0xF2, binsize[bin]);
+/**
+ * Attempt to in-place enlarge the memory block pointed to by p by at least
+ * min_size beyond its current capacity, up to a maximum of max_size.  This
+ * does not attempt to move the memory block (like realloc() does).
+ *
+ * Returns:
+ *  0 if could not extend p,
+ *  total size of entire memory block if successful.
+ */
+private size_t extend(void* p, size_t minsize, size_t maxsize)
+in
+{
+    assert( minsize <= maxsize );
+}
+body
+{
+    if (opts.options.sentinel)
+        return 0;
 
-            list.next = this.bucket[bin];
-            this.bucket[bin] = list;
-        }
-    }
+    Pool* pool = findPool(p);
+    if (pool is null)
+        return 0;
 
+    // Retrieve attributes
+    auto bit_i = cast(size_t)(p - pool.baseAddr) / 16;
+    uint attrs = getAttr(pool, bit_i);
 
-    /**
-     * Determine the base address of the block containing p.  If p is not a gc
-     * allocated pointer, return null.
-     */
-    void* addrOf(void *p)
-    {
-        if (!p)
-        {
-            return null;
-        }
+    void* blk_base_addr = pool.findBase(p);
+    size_t blk_size = pool.findSize(p);
+    bool has_pm = has_pointermap(attrs);
+    size_t* pm_bitmask = null;
+    size_t pm_bitmask_size = 0;
+    if (has_pm) {
+        pm_bitmask_size = size_t.sizeof;
+        // Retrieve pointer map bit mask
+        auto end_of_blk = cast(size_t**)(blk_base_addr +
+                blk_size - size_t.sizeof);
+        pm_bitmask = *end_of_blk;
 
-        if (!thread_needLock())
-        {
-            return addrOfNoSync(p);
-        }
-        else synchronized (lock)
-        {
-            return addrOfNoSync(p);
-        }
+        minsize += size_t.sizeof;
+        maxsize += size_t.sizeof;
     }
 
+    if (blk_size < PAGESIZE)
+        return 0; // cannot extend buckets
 
-    //
-    //
-    //
-    void* addrOfNoSync(void *p)
-    {
-        if (!p)
-        {
-            return null;
-        }
-
-        return this.findBase(p);
-    }
+    auto psz = blk_size / PAGESIZE;
+    auto minsz = (minsize + PAGESIZE - 1) / PAGESIZE;
+    auto maxsz = (maxsize + PAGESIZE - 1) / PAGESIZE;
 
+    auto pagenum = (p - pool.baseAddr) / PAGESIZE;
 
-    /**
-     * Determine the allocated size of pointer p.  If p is an interior pointer
-     * or not a gc allocated pointer, return 0.
-     */
-    size_t sizeOf(void *p)
+    size_t sz;
+    for (sz = 0; sz < maxsz; sz++)
     {
-        if (!p)
-        {
-            return 0;
-        }
-
-        if (!thread_needLock())
-        {
-            return sizeOfNoSync(p);
-        }
-        else synchronized (lock)
+        auto i = pagenum + psz + sz;
+        if (i == pool.npages)
+            break;
+        if (pool.pagetable[i] != B_FREE)
         {
-            return sizeOfNoSync(p);
-        }
-    }
-
-
-    //
-    //
-    //
-    private size_t sizeOfNoSync(void *p)
-    {
-        assert (p);
-
-        if (opts.options.sentinel)
-            p = sentinel_sub(p);
-
-        Pool* pool = this.findPool(p);
-        if (pool is null)
-            return 0;
-
-        auto biti = cast(size_t)(p - pool.baseAddr) / 16;
-        uint attrs = this.getAttr(pool, biti);
-
-        size_t size = this.findSize(p);
-        size_t pm_bitmask_size = 0;
-        if (has_pointermap(attrs))
-            pm_bitmask_size = size_t.sizeof;
-
-        if (opts.options.sentinel) {
-            // Check for interior pointer
-            // This depends on:
-            // 1) size is a power of 2 for less than PAGESIZE values
-            // 2) base of memory pool is aligned on PAGESIZE boundary
-            if (cast(size_t)p & (size - 1) & (PAGESIZE - 1))
-                return 0;
-            return size - SENTINEL_EXTRA - pm_bitmask_size;
-        }
-        else {
-            if (p == this.p_cache)
-                return this.size_cache;
-
-            // Check for interior pointer
-            // This depends on:
-            // 1) size is a power of 2 for less than PAGESIZE values
-            // 2) base of memory pool is aligned on PAGESIZE boundary
-            if (cast(size_t)p & (size - 1) & (PAGESIZE - 1))
+            if (sz < minsz)
                 return 0;
-
-            this.p_cache = p;
-            this.size_cache = size - pm_bitmask_size;
-
-            return this.size_cache;
-        }
-    }
-
-
-    /**
-     * Determine the base address of the block containing p.  If p is not a gc
-     * allocated pointer, return null.
-     */
-    BlkInfo query(void *p)
-    {
-        if (!p)
-        {
-            BlkInfo i;
-            return  i;
-        }
-
-        if (!thread_needLock())
-        {
-            return queryNoSync(p);
-        }
-        else synchronized (lock)
-        {
-            return queryNoSync(p);
+            break;
         }
     }
+    if (sz < minsz)
+        return 0;
 
+    size_t new_size = (psz + sz) * PAGESIZE;
 
-    //
-    //
-    //
-    BlkInfo queryNoSync(void *p)
-    {
-        assert(p);
-
-        return this.getInfo(p);
-    }
-
-
-    /**
-     * Verify that pointer p:
-     *  1) belongs to this memory pool
-     *  2) points to the start of an allocated piece of memory
-     *  3) is not on a free list
-     */
-    void check(void *p)
-    {
-        if (!p)
-        {
-            return;
-        }
+    if (opts.options.mem_stomp)
+        memset(p + blk_size - pm_bitmask_size, 0xF0,
+                new_size - blk_size - pm_bitmask_size);
+    memset(pool.pagetable + pagenum + psz, B_PAGEPLUS, sz);
+    gc.p_cache = null;
+    gc.size_cache = 0;
+    // update the size cache, assuming that is very likely the size of this
+    // block will be queried in the near future
+    pool.update_cache(p, new_size);
 
-        if (!thread_needLock())
-        {
-            checkNoSync(p);
-        }
-        else synchronized (lock)
-        {
-            checkNoSync(p);
-        }
+    if (has_pm) {
+        new_size -= size_t.sizeof;
+        auto end_of_blk = cast(size_t**)(blk_base_addr + new_size);
+        *end_of_blk = pm_bitmask;
     }
+    return new_size;
+}
 
 
-    //
-    //
-    //
-    private void checkNoSync(void *p)
-    {
-        assert(p);
-
-        if (opts.options.sentinel)
-            sentinel_Invariant(p);
-        debug (PTRCHECK)
-        {
-            Pool*  pool;
-            size_t pagenum;
-            Bins   bin;
-            size_t size;
-
-            if (opts.options.sentinel)
-                p = sentinel_sub(p);
-            pool = this.findPool(p);
-            assert(pool);
-            pagenum = cast(size_t)(p - pool.baseAddr) / PAGESIZE;
-            bin = cast(Bins)pool.pagetable[pagenum];
-            assert(bin <= B_PAGE);
-            size = binsize[bin];
-            assert((cast(size_t)p & (size - 1)) == 0);
-
-            debug (PTRCHECK2)
-            {
-                if (bin < B_PAGE)
-                {
-                    // Check that p is not on a free list
-                    List *list;
-
-                    for (list = this.bucket[bin]; list; list = list.next)
-                    {
-                        assert(cast(void*)list != p);
-                    }
-                }
-            }
-        }
+//
+//
+//
+private void free(void *p)
+{
+    assert (p);
+
+    Pool*  pool;
+    size_t pagenum;
+    Bins   bin;
+    size_t bit_i;
+
+    // Find which page it is in
+    pool = findPool(p);
+    if (!pool)                              // if not one of ours
+        return;                             // ignore
+    if (opts.options.sentinel) {
+        sentinel_Invariant(p);
+        p = sentinel_sub(p);
+    }
+    pagenum = cast(size_t)(p - pool.baseAddr) / PAGESIZE;
+    bit_i = cast(size_t)(p - pool.baseAddr) / 16;
+    clrAttr(pool, bit_i, BlkAttr.ALL_BITS);
+
+    bin = cast(Bins)pool.pagetable[pagenum];
+    if (bin == B_PAGE)              // if large alloc
+    {
+        // Free pages
+        size_t npages = 1;
+        size_t n = pagenum;
+        while (++n < pool.npages && pool.pagetable[n] == B_PAGEPLUS)
+            npages++;
+        if (opts.options.mem_stomp)
+            memset(p, 0xF2, npages * PAGESIZE);
+        pool.freePages(pagenum, npages);
+        // just in case we were caching this pointer
+        pool.clear_cache(p);
     }
-
-
-    //
-    //
-    //
-    private void setStackBottom(void *p)
+    else
     {
-        version (STACKGROWSDOWN)
-        {
-            //p = (void *)((uint *)p + 4);
-            if (p > this.stackBottom)
-            {
-                this.stackBottom = p;
-            }
-        }
-        else
-        {
-            //p = (void *)((uint *)p - 4);
-            if (p < this.stackBottom)
-            {
-                this.stackBottom = cast(char*)p;
-            }
-        }
-    }
+        // Add to free list
+        List* list = cast(List*) p;
 
+        if (opts.options.mem_stomp)
+            memset(p, 0xF2, binsize[bin]);
 
-    /**
-     * add p to list of roots
-     */
-    void addRoot(void *p)
-    {
-        if (!p)
-        {
-            return;
-        }
-
-        if (!thread_needLock())
-        {
-            if (roots.append(p) is null)
-                onOutOfMemoryError();
-        }
-        else synchronized (lock)
-        {
-            if (roots.append(p) is null)
-                onOutOfMemoryError();
-        }
+        list.next = gc.free_list[bin];
+        list.pool = pool;
+        gc.free_list[bin] = list;
     }
+}
 
 
-    /**
-     * remove p from list of roots
-     */
-    void removeRoot(void *p)
-    {
-        if (!p)
-        {
-            return;
-        }
+/**
+ * Determine the allocated size of pointer p.  If p is an interior pointer
+ * or not a gc allocated pointer, return 0.
+ */
+private size_t sizeOf(void *p)
+{
+    assert (p);
 
-        bool r;
-        if (!thread_needLock())
-        {
-            r = roots.remove(p);
-        }
-        else synchronized (lock)
-        {
-            r = roots.remove(p);
-        }
-        assert (r);
-    }
+    if (opts.options.sentinel)
+        p = sentinel_sub(p);
 
+    Pool* pool = findPool(p);
+    if (pool is null)
+        return 0;
 
-    /**
-     * add range to scan for roots
-     */
-    void addRange(void *p, size_t sz)
-    {
-        if (!p || !sz)
-        {
-            return;
-        }
+    auto biti = cast(size_t)(p - pool.baseAddr) / 16;
+    uint attrs = getAttr(pool, biti);
 
-        if (!thread_needLock())
-        {
-            if (ranges.append(Range(p, p+sz)) is null)
-                onOutOfMemoryError();
-        }
-        else synchronized (lock)
-        {
-            if (ranges.append(Range(p, p+sz)) is null)
-                onOutOfMemoryError();
-        }
+    size_t size = pool.findSize(p);
+    size_t pm_bitmask_size = 0;
+    if (has_pointermap(attrs))
+        pm_bitmask_size = size_t.sizeof;
+
+    if (opts.options.sentinel) {
+        // Check for interior pointer
+        // This depends on:
+        // 1) size is a power of 2 for less than PAGESIZE values
+        // 2) base of memory pool is aligned on PAGESIZE boundary
+        if (cast(size_t)p & (size - 1) & (PAGESIZE - 1))
+            return 0;
+        return size - SENTINEL_EXTRA - pm_bitmask_size;
     }
+    else {
+        if (p == gc.p_cache)
+            return gc.size_cache;
 
+        // Check for interior pointer
+        // This depends on:
+        // 1) size is a power of 2 for less than PAGESIZE values
+        // 2) base of memory pool is aligned on PAGESIZE boundary
+        if (cast(size_t)p & (size - 1) & (PAGESIZE - 1))
+            return 0;
 
-    /**
-     * remove range
-     */
-    void removeRange(void *p)
-    {
-        if (!p)
-        {
-            return;
-        }
+        gc.p_cache = p;
+        gc.size_cache = size - pm_bitmask_size;
 
-        bool r;
-        if (!thread_needLock())
-        {
-            r = ranges.remove(Range(p, null));
-        }
-        else synchronized (lock)
-        {
-            r = ranges.remove(Range(p, null));
-        }
-        assert (r);
+        return gc.size_cache;
     }
+}
 
 
-    /**
-     * do full garbage collection
-     */
-    void fullCollect()
+/**
+ * Verify that pointer p:
+ *  1) belongs to this memory pool
+ *  2) points to the start of an allocated piece of memory
+ *  3) is not on a free list
+ */
+private void checkNoSync(void *p)
+{
+    assert(p);
+
+    if (opts.options.sentinel)
+        sentinel_Invariant(p);
+    debug (PTRCHECK)
     {
+        Pool*  pool;
+        size_t pagenum;
+        Bins   bin;
+        size_t size;
 
-        if (!thread_needLock())
-        {
-            this.fullcollectshell();
-        }
-        else synchronized (lock)
-        {
-            this.fullcollectshell();
-        }
+        if (opts.options.sentinel)
+            p = sentinel_sub(p);
+        pool = findPool(p);
+        assert(pool);
+        pagenum = cast(size_t)(p - pool.baseAddr) / PAGESIZE;
+        bin = cast(Bins)pool.pagetable[pagenum];
+        assert(bin <= B_PAGE);
+        size = binsize[bin];
+        assert((cast(size_t)p & (size - 1)) == 0);
 
-        version (none)
+        debug (PTRCHECK2)
         {
-            GCStats stats;
-            getStats(stats);
+            if (bin < B_PAGE)
+            {
+                // Check that p is not on a free list
+                for (List* list = gc.free_list[bin]; list; list = list.next)
+                {
+                    assert(cast(void*)list != p);
+                }
+            }
         }
-
     }
+}
 
 
-    /**
-     * do full garbage collection ignoring roots
-     */
-    void fullCollectNoStack()
+//
+//
+//
+private void setStackBottom(void *p)
+{
+    version (STACKGROWSDOWN)
     {
-        if (!thread_needLock())
-        {
-            this.noStack++;
-            this.fullcollectshell();
-            this.noStack--;
-        }
-        else synchronized (lock)
+        //p = (void *)((uint *)p + 4);
+        if (p > gc.stack_bottom)
         {
-            this.noStack++;
-            this.fullcollectshell();
-            this.noStack--;
+            gc.stack_bottom = p;
         }
     }
-
-
-    /**
-     * minimize free space usage
-     */
-    void minimize()
+    else
     {
-        if (!thread_needLock())
-        {
-            this.minimizeNoSync();
-        }
-        else synchronized (lock)
+        //p = (void *)((uint *)p - 4);
+        if (p < gc.stack_bottom)
         {
-            this.minimizeNoSync();
+            gc.stack_bottom = cast(char*)p;
         }
     }
+}
 
 
-    /**
-     * Retrieve statistics about garbage collection.
-     * Useful for debugging and tuning.
-     */
-    void getStats(out GCStats stats)
+/**
+ * Retrieve statistics about garbage collection.
+ * Useful for debugging and tuning.
+ */
+private GCStats getStats()
+{
+    GCStats stats;
+    size_t psize = 0;
+    size_t usize = 0;
+    size_t flsize = 0;
+
+    size_t n;
+    size_t bsize = 0;
+
+    for (n = 0; n < gc.pools.length; n++)
     {
-        if (!thread_needLock())
-        {
-            getStatsNoSync(stats);
-        }
-        else synchronized (lock)
+        Pool* pool = gc.pools[n];
+        psize += pool.npages * PAGESIZE;
+        for (size_t j = 0; j < pool.npages; j++)
         {
-            getStatsNoSync(stats);
+            Bins bin = cast(Bins)pool.pagetable[j];
+            if (bin == B_FREE)
+                stats.freeblocks++;
+            else if (bin == B_PAGE)
+                stats.pageblocks++;
+            else if (bin < B_PAGE)
+                bsize += PAGESIZE;
         }
     }
 
-
-    //
-    //
-    //
-    private void getStatsNoSync(out GCStats stats)
+    for (n = 0; n < B_PAGE; n++)
     {
-        size_t psize = 0;
-        size_t usize = 0;
-        size_t flsize = 0;
-
-        size_t n;
-        size_t bsize = 0;
-
-        memset(&stats, 0, GCStats.sizeof);
-
-        for (n = 0; n < pools.length; n++)
-        {
-            Pool* pool = pools[n];
-            psize += pool.npages * PAGESIZE;
-            for (size_t j = 0; j < pool.npages; j++)
-            {
-                Bins bin = cast(Bins)pool.pagetable[j];
-                if (bin == B_FREE)
-                    stats.freeblocks++;
-                else if (bin == B_PAGE)
-                    stats.pageblocks++;
-                else if (bin < B_PAGE)
-                    bsize += PAGESIZE;
-            }
-        }
+        for (List* list = gc.free_list[n]; list; list = list.next)
+            flsize += binsize[n];
+    }
 
-        for (n = 0; n < B_PAGE; n++)
-        {
-            for (List *list = this.bucket[n]; list; list = list.next)
-                flsize += binsize[n];
-        }
+    usize = bsize - flsize;
 
-        usize = bsize - flsize;
+    stats.poolsize = psize;
+    stats.usedsize = bsize - flsize;
+    stats.freelistsize = flsize;
+    return stats;
+}
 
-        stats.poolsize = psize;
-        stats.usedsize = bsize - flsize;
-        stats.freelistsize = flsize;
-    }
+/******************* weak-reference support *********************/
 
-    /******************* weak-reference support *********************/
+private struct WeakPointer
+{
+    Object reference;
 
-    // call locked if necessary
-    private T locked(T)(in T delegate() code)
+    void ondestroy(Object r)
     {
-        if (thread_needLock)
-            synchronized(lock) return code();
-        else
-           return code();
+        assert(r is reference);
+        // lock for memory consistency (parallel readers)
+        // also ensures that weakpointerDestroy can be called while another
+        // thread is freeing the reference with "delete"
+        return locked!(void, () {
+            reference = null;
+        })();
     }
+}
 
-    private struct WeakPointer
-    {
-        Object reference;
-
-        void ondestroy(Object r)
-        {
-            assert(r is reference);
-            // lock for memory consistency (parallel readers)
-            // also ensures that weakpointerDestroy can be called while another
-            // thread is freeing the reference with "delete"
-            locked!(void)({ reference = null; });
-        }
+/**
+ * Create a weak pointer to the given object.
+ * Returns a pointer to an opaque struct allocated in C memory.
+ */
+void* weakpointerCreate( Object r )
+{
+    if (r)
+    {
+        // must be allocated in C memory
+        // 1. to hide the reference from the GC
+        // 2. the GC doesn't scan delegates added by rt_attachDisposeEvent
+        //    for references
+        auto wp = cast(WeakPointer*)(cstdlib.malloc(WeakPointer.sizeof));
+        if (!wp)
+            onOutOfMemoryError();
+        wp.reference = r;
+        rt_attachDisposeEvent(r, &wp.ondestroy);
+        return wp;
     }
+    return null;
+}
 
-    /**
-     * Create a weak pointer to the given object.
-     * Returns a pointer to an opaque struct allocated in C memory.
-     */
-    void* weakpointerCreate( Object r )
+/**
+ * Destroy a weak pointer returned by weakpointerCreate().
+ * If null is passed, nothing happens.
+ */
+void weakpointerDestroy( void* p )
+{
+    if (p)
     {
-        if (r)
-       {
-            // must be allocated in C memory
-            // 1. to hide the reference from the GC
-            // 2. the GC doesn't scan delegates added by rt_attachDisposeEvent
-            //    for references
-            auto wp = cast(WeakPointer*)(cstdlib.malloc(WeakPointer.sizeof));
-            if (!wp)
-                onOutOfMemoryError();
-            wp.reference = r;
-            rt_attachDisposeEvent(r, &wp.ondestroy);
-            return wp;
-        }
-        return null;
+        auto wp = cast(WeakPointer*)p;
+        // must be extra careful about the GC or parallel threads
+        // finalizing the reference at the same time
+        return locked!(void, () {
+            if (wp.reference)
+                rt_detachDisposeEvent(wp.reference, &wp.ondestroy);
+        })();
+        cstdlib.free(wp);
     }
+}
 
-    /**
-     * Destroy a weak pointer returned by weakpointerCreate().
-     * If null is passed, nothing happens.
-     */
-    void weakpointerDestroy( void* p )
+/**
+ * Query a weak pointer and return either the object passed to
+ * weakpointerCreate, or null if it was free'd in the meantime.
+ * If null is passed, null is returned.
+ */
+Object weakpointerGet( void* p )
+{
+    if (p)
     {
-        if (p)
-       {
-            auto wp = cast(WeakPointer*)p;
-            // must be extra careful about the GC or parallel threads
-            // finalizing the reference at the same time
-            locked!(void)({
-                   if (wp.reference)
-                       rt_detachDisposeEvent(wp.reference, &wp.ondestroy);
-                  });
-            cstdlib.free(wp);
+        // NOTE: could avoid the lock by using Fawzi style GC counters but
+        // that'd require core.sync.Atomic and lots of care about memory
+        // consistency it's an optional optimization see
+        // http://dsource.org/projects/tango/browser/trunk/user/tango/core/Lifetime.d?rev=5100#L158
+        return locked!(Object, () {
+            return (cast(WeakPointer*)p).reference;
+        })();
         }
-    }
-
-    /**
-     * Query a weak pointer and return either the object passed to
-     * weakpointerCreate, or null if it was free'd in the meantime.
-     * If null is passed, null is returned.
-     */
-    Object weakpointerGet( void* p )
-    {
-        if (p)
-       {
-            // NOTE: could avoid the lock by using Fawzi style GC counters but
-            // that'd require core.sync.Atomic and lots of care about memory
-            // consistency it's an optional optimization see
-            // http://dsource.org/projects/tango/browser/trunk/user/tango/core/Lifetime.d?rev=5100#L158
-            return locked!(Object)({
-                  return (cast(WeakPointer*)p).reference;
-                  });
-            }
-    }
 }
 
 
@@ -2478,12 +1896,29 @@ struct Pool
     size_t npages;
     ubyte* pagetable;
 
+    /// Cache for findSize()
+    size_t cached_size;
+    void* cached_ptr;
+
+    void clear_cache(void* ptr = null)
+    {
+        if (ptr is null || ptr is this.cached_ptr) {
+            this.cached_ptr = null;
+            this.cached_size = 0;
+        }
+    }
+
+    void update_cache(void* ptr, size_t size)
+    {
+        this.cached_ptr = ptr;
+        this.cached_size = size;
+    }
 
     void initialize(size_t npages)
     {
         size_t poolsize = npages * PAGESIZE;
         assert(poolsize >= POOLSIZE);
-        baseAddr = cast(byte *) alloc.os_mem_map(poolsize);
+        baseAddr = cast(byte *) os.alloc(poolsize);
 
         // Some of the code depends on page alignment of memory pools
         assert((cast(size_t)baseAddr & (PAGESIZE - 1)) == 0);
@@ -2493,13 +1928,20 @@ struct Pool
             npages = 0;
             poolsize = 0;
         }
-        //assert(baseAddr);
         topAddr = baseAddr + poolsize;
 
-        mark.alloc(cast(size_t)poolsize / 16);
-        scan.alloc(cast(size_t)poolsize / 16);
-        freebits.alloc(cast(size_t)poolsize / 16);
-        noscan.alloc(cast(size_t)poolsize / 16);
+        size_t nbits = cast(size_t)poolsize / 16;
+
+        // if the GC will run in parallel in a fork()ed process, we need to
+        // share the mark bits
+        os.Vis vis = os.Vis.PRIV;
+        if (opts.options.fork)
+            vis = os.Vis.SHARED;
+        mark.alloc(nbits, vis); // shared between mark and sweep
+        freebits.alloc(nbits, vis); // ditto
+        scan.alloc(nbits); // only used in the mark phase
+        finals.alloc(nbits); // mark phase *MUST* have a snapshot
+        noscan.alloc(nbits); // ditto
 
         pagetable = cast(ubyte*) cstdlib.malloc(npages);
         if (!pagetable)
@@ -2518,7 +1960,7 @@ struct Pool
 
             if (npages)
             {
-                result = alloc.os_mem_unmap(baseAddr, npages * PAGESIZE);
+                result = os.dealloc(baseAddr, npages * PAGESIZE);
                 assert(result);
                 npages = 0;
             }
@@ -2530,15 +1972,21 @@ struct Pool
         if (pagetable)
             cstdlib.free(pagetable);
 
-        mark.Dtor();
+        os.Vis vis = os.Vis.PRIV;
+        if (opts.options.fork)
+            vis = os.Vis.SHARED;
+        mark.Dtor(vis);
+        freebits.Dtor(vis);
         scan.Dtor();
-        freebits.Dtor();
         finals.Dtor();
         noscan.Dtor();
     }
 
 
-    void Invariant() { }
+    bool Invariant()
+    {
+        return true;
+    }
 
 
     invariant
@@ -2597,6 +2045,52 @@ struct Pool
     }
 
 
+    /**
+     * Find base address of block containing pointer p.
+     * Returns null if the pointer doesn't belong to this pool
+     */
+    void* findBase(void *p)
+    {
+        size_t offset = cast(size_t)(p - this.baseAddr);
+        size_t pagenum = offset / PAGESIZE;
+        Bins bin = cast(Bins)this.pagetable[pagenum];
+        // Adjust bit to be at start of allocated memory block
+        if (bin <= B_PAGE)
+            return this.baseAddr + (offset & notbinsize[bin]);
+        if (bin == B_PAGEPLUS) {
+            do {
+                --pagenum, offset -= PAGESIZE;
+            } while (cast(Bins)this.pagetable[pagenum] == B_PAGEPLUS);
+            return this.baseAddr + (offset & (offset.max ^ (PAGESIZE-1)));
+        }
+        // we are in a B_FREE page
+        return null;
+    }
+
+
+    /**
+     * Find size of pointer p.
+     * Returns 0 if p doesn't belong to this pool if if it's block size is less
+     * than a PAGE.
+     */
+    size_t findSize(void *p)
+    {
+        size_t pagenum = cast(size_t)(p - this.baseAddr) / PAGESIZE;
+        Bins bin = cast(Bins)this.pagetable[pagenum];
+        if (bin != B_PAGE)
+            return binsize[bin];
+        if (this.cached_ptr == p)
+            return this.cached_size;
+        size_t i = pagenum + 1;
+        for (; i < this.npages; i++)
+            if (this.pagetable[i] != B_PAGEPLUS)
+                break;
+        this.cached_ptr = p;
+        this.cached_size = (i - pagenum) * PAGESIZE;
+        return this.cached_size;
+    }
+
+
     /**
      * Used for sorting pools
      */
@@ -2633,8 +2127,9 @@ void sentinel_init(void *p, size_t size)
 
 void sentinel_Invariant(void *p)
 {
-    assert(*sentinel_pre(p) == SENTINEL_PRE);
-    assert(*sentinel_post(p) == SENTINEL_POST);
+    if (*sentinel_pre(p) != SENTINEL_PRE ||
+            *sentinel_post(p) != SENTINEL_POST)
+        cstdlib.abort();
 }
 
 
@@ -2650,4 +2145,321 @@ void *sentinel_sub(void *p)
 }
 
 
+
+/* ============================ C Public Interface ======================== */
+
+
+private int _termCleanupLevel=1;
+
+extern (C):
+
+/// sets the cleanup level done by gc
+/// 0: none
+/// 1: fullCollect
+/// 2: fullCollect ignoring stack roots (might crash daemonThreads)
+/// result !=0 if the value was invalid
+int gc_setTermCleanupLevel(int cLevel)
+{
+    if (cLevel<0 || cLevel>2) return cLevel;
+    _termCleanupLevel=cLevel;
+    return 0;
+}
+
+/// returns the cleanup level done by gc
+int gc_getTermCleanupLevel()
+{
+    return _termCleanupLevel;
+}
+
+void gc_init()
+{
+    scope (exit) assert (Invariant());
+    gc = cast(GC*) cstdlib.calloc(1, GC.sizeof);
+    *gc = GC.init;
+    initialize();
+    version (DigitalMars) version(OSX) {
+        _d_osx_image_init();
+    }
+    // NOTE: The GC must initialize the thread library
+    //       before its first collection.
+    thread_init();
+}
+
+void gc_term()
+{
+    assert (Invariant());
+    if (_termCleanupLevel<1) {
+        // no cleanup
+    } else if (_termCleanupLevel==2){
+        // a more complete cleanup
+        // NOTE: There may be daemons threads still running when this routine is
+        //       called.  If so, cleaning memory out from under then is a good
+        //       way to make them crash horribly.
+        //       Often this probably doesn't matter much since the app is
+        //       supposed to be shutting down anyway, but for example tests might
+        //       crash (and be considerd failed even if the test was ok).
+        //       thus this is not the default and should be enabled by
+        //       I'm disabling cleanup for now until I can think about it some
+        //       more.
+        //
+        // not really a 'collect all' -- still scans static data area, roots,
+        // and ranges.
+        return locked!(void, () {
+            gc.no_stack++;
+            fullcollectshell();
+            gc.no_stack--;
+        })();
+    } else {
+        // default (safe) clenup
+        return locked!(void, () {
+            fullcollectshell();
+        })();
+    }
+}
+
+void gc_enable()
+{
+    return locked!(void, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        assert (gc.disabled > 0);
+        gc.disabled--;
+    })();
+}
+
+void gc_disable()
+{
+    return locked!(void, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        gc.disabled++;
+    })();
+}
+
+void gc_collect()
+{
+    return locked!(void, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        fullcollectshell();
+    })();
+}
+
+
+void gc_minimize()
+{
+    return locked!(void, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        minimize();
+    })();
+}
+
+uint gc_getAttr(void* p)
+{
+    if (p is null)
+        return 0;
+    return locked!(uint, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        Pool* pool = findPool(p);
+        if (pool is null)
+            return 0u;
+        auto bit_i = cast(size_t)(p - pool.baseAddr) / 16;
+        return getAttr(pool, bit_i);
+    })();
+}
+
+uint gc_setAttr(void* p, uint attrs)
+{
+    if (p is null)
+        return 0;
+    return locked!(uint, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        Pool* pool = findPool(p);
+        if (pool is null)
+            return 0u;
+        auto bit_i = cast(size_t)(p - pool.baseAddr) / 16;
+        uint old_attrs = getAttr(pool, bit_i);
+        setAttr(pool, bit_i, attrs);
+        return old_attrs;
+    })();
+}
+
+uint gc_clrAttr(void* p, uint attrs)
+{
+    if (p is null)
+        return 0;
+    return locked!(uint, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        Pool* pool = findPool(p);
+        if (pool is null)
+            return 0u;
+        auto bit_i = cast(size_t)(p - pool.baseAddr) / 16;
+        uint old_attrs = getAttr(pool, bit_i);
+        clrAttr(pool, bit_i, attrs);
+        return old_attrs;
+    })();
+}
+
+void* gc_malloc(size_t size, uint attrs = 0,
+        PointerMap ptrmap = PointerMap.init)
+{
+    if (size == 0)
+        return null;
+    return locked!(void*, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        return malloc(size, attrs, ptrmap.bits.ptr);
+    })();
+}
+
+void* gc_calloc(size_t size, uint attrs = 0,
+        PointerMap ptrmap = PointerMap.init)
+{
+    if (size == 0)
+        return null;
+    return locked!(void*, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        return calloc(size, attrs, ptrmap.bits.ptr);
+    })();
+}
+
+void* gc_realloc(void* p, size_t size, uint attrs = 0,
+        PointerMap ptrmap = PointerMap.init)
+{
+    return locked!(void*, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        return realloc(p, size, attrs, ptrmap.bits.ptr);
+    })();
+}
+
+size_t gc_extend(void* p, size_t min_size, size_t max_size)
+{
+    return locked!(size_t, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        return extend(p, min_size, max_size);
+    })();
+}
+
+size_t gc_reserve(size_t size)
+{
+    if (size == 0)
+        return 0;
+    return locked!(size_t, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        return reserve(size);
+    })();
+}
+
+void gc_free(void* p)
+{
+    if (p is null)
+        return;
+    return locked!(void, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        free(p);
+    })();
+}
+
+void* gc_addrOf(void* p)
+{
+    if (p is null)
+        return null;
+    return locked!(void*, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        Pool* pool = findPool(p);
+        if (pool is null)
+            return null;
+        return pool.findBase(p);
+    })();
+}
+
+size_t gc_sizeOf(void* p)
+{
+    if (p is null)
+        return 0;
+    return locked!(size_t, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        return sizeOf(p);
+    })();
+}
+
+BlkInfo gc_query(void* p)
+{
+    if (p is null)
+        return BlkInfo.init;
+    return locked!(BlkInfo, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        return getInfo(p);
+    })();
+}
+
+// NOTE: This routine is experimental.  The stats or function name may change
+//       before it is made officially available.
+GCStats gc_stats()
+{
+    return locked!(GCStats, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        return getStats();
+    })();
+}
+
+void gc_addRoot(void* p)
+{
+    if (p is null)
+        return;
+    return locked!(void, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        if (gc.roots.append(p) is null)
+            onOutOfMemoryError();
+    })();
+}
+
+void gc_addRange(void* p, size_t size)
+{
+    if (p is null || size == 0)
+        return;
+    return locked!(void, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        if (gc.ranges.append(Range(p, p + size)) is null)
+            onOutOfMemoryError();
+    })();
+}
+
+void gc_removeRoot(void* p)
+{
+    if (p is null)
+        return;
+    return locked!(void, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        bool r = gc.roots.remove(p);
+        assert (r);
+    })();
+}
+
+void gc_removeRange(void* p)
+{
+    if (p is null)
+        return;
+    return locked!(void, () {
+        assert (Invariant()); scope (exit) assert (Invariant());
+        bool r = gc.ranges.remove(Range(p, null));
+        assert (r);
+    })();
+}
+
+void* gc_weakpointerCreate(Object r)
+{
+    // weakpointers do their own locking
+    return weakpointerCreate(r);
+}
+
+void gc_weakpointerDestroy(void* wp)
+{
+    // weakpointers do their own locking
+    weakpointerDestroy(wp);
+}
+
+Object gc_weakpointerGet(void* wp)
+{
+    // weakpointers do their own locking
+    return weakpointerGet(wp);
+}
+
+
 // vim: set et sw=4 sts=4 :