From 7e73b6245c0d7f6af8da2228a2fdbc9404ad69ec Mon Sep 17 00:00:00 2001 From: Leandro Lucarella Date: Wed, 9 Jun 2010 19:47:05 -0300 Subject: [PATCH] Add statistics collection Statistics meassure this metrics. For each collection: * Time spent in the malloc that triggered the collection. * Time spent with the world stopped. * Time spent doing the collection. * Memory info before and after the collection: used, free, overhead and wasted memory. Used is the memory used by the mutator, free is the memory the mutator can request, overhead is the memory used by the collector itself and wasted is memory that is not used by either the mutator or collector and that can't be requested by the mutator either. For each malloc() call: * Time spent. * Amount of memory requested. * Attributes of the requested memory. * A flag to tell if this call triggered a collection. Statistics collection is controlled via the D_GC_OPTS environment variable. To collect malloc statistics, use the option malloc_stats_file, the value is the path to the file where to store the malloc statistics (the contents will be replaced). To collect garbage collection statistics, use the option collect_stats_file, the value is the path to the file where to store the malloc statistics (the contents will be replaced). The generated files are in CSV format and have headers that make them self explanatory. --- rt/gc/cdgc/dynarray.d | 8 + rt/gc/cdgc/gc.d | 31 ++-- rt/gc/cdgc/opts.d | 10 ++ rt/gc/cdgc/stats.d | 362 +++++++++++++++++++++++++++++++++++++++++- 4 files changed, 401 insertions(+), 10 deletions(-) diff --git a/rt/gc/cdgc/dynarray.d b/rt/gc/cdgc/dynarray.d index 5a83217..feddd98 100644 --- a/rt/gc/cdgc/dynarray.d +++ b/rt/gc/cdgc/dynarray.d @@ -79,6 +79,14 @@ public: return this._capacity; } + /** + * Get the total ammount of bytes the array consumes. + */ + size_t size_of() + { + return this.sizeof + this._capacity * (T.sizeof + (T*).sizeof); + } + /** * Get the pointer to the array's data. * diff --git a/rt/gc/cdgc/gc.d b/rt/gc/cdgc/gc.d index bb137f1..73fd1fa 100644 --- a/rt/gc/cdgc/gc.d +++ b/rt/gc/cdgc/gc.d @@ -43,7 +43,7 @@ version = STACKGROWSDOWN; // growing the stack means subtracting from the /***************************************************/ import rt.gc.cdgc.bits: GCBits; -import rt.gc.cdgc.stats: GCStats; +import rt.gc.cdgc.stats: GCStats, Stats; import rt.gc.cdgc.dynarray: DynArray; import alloc = rt.gc.cdgc.alloc; import opts = rt.gc.cdgc.opts; @@ -84,15 +84,16 @@ struct BlkInfo uint attr; } +package enum BlkAttr : uint +{ + FINALIZE = 0b0000_0001, + NO_SCAN = 0b0000_0010, + NO_MOVE = 0b0000_0100, + ALL_BITS = 0b1111_1111 +} + private { - enum BlkAttr : uint - { - FINALIZE = 0b0000_0001, - NO_SCAN = 0b0000_0010, - NO_MOVE = 0b0000_0100, - ALL_BITS = 0b1111_1111 - } extern (C) void* rt_stackBottom(); extern (C) void* rt_stackTop(); @@ -135,6 +136,8 @@ class GCLock { } // just a dummy so we can get a global lock const uint GCVERSION = 1; // increment every time we change interface // to GC. +Stats stats; + class GC { // For passing to debug code @@ -156,6 +159,7 @@ class GC onOutOfMemoryError(); gcx.initialize(); setStackBottom(rt_stackBottom()); + stats = Stats(this); } @@ -328,6 +332,10 @@ class GC { assert(size != 0); + stats.malloc_started(size, bits); + scope (exit) + stats.malloc_finished(); + void *p = null; Bins bin; @@ -1943,12 +1951,15 @@ struct Gcx anychanges |= changes; } - /** * Return number of full pages free'd. */ size_t fullcollectshell() { + stats.collection_started(); + scope (exit) + stats.collection_finished(); + // The purpose of the 'shell' is to ensure all the registers // get put on the stack so they'll be scanned void *sp; @@ -2042,6 +2053,7 @@ struct Gcx debug(COLLECT_PRINTF) printf("Gcx.fullcollect()\n"); thread_suspendAll(); + stats.world_stopped(); p_cache = null; size_cache = 0; @@ -2159,6 +2171,7 @@ struct Gcx } thread_resumeAll(); + stats.world_started(); // Free up everything not marked debug(COLLECT_PRINTF) printf("\tfree'ing\n"); diff --git a/rt/gc/cdgc/opts.d b/rt/gc/cdgc/opts.d index ccfe9b6..6e93622 100644 --- a/rt/gc/cdgc/opts.d +++ b/rt/gc/cdgc/opts.d @@ -47,6 +47,8 @@ struct Options { uint verbose = 0; char[MAX_OPT_LEN] log_file = ""; + char[MAX_OPT_LEN] malloc_stats_file = ""; + char[MAX_OPT_LEN] collect_stats_file = ""; bool sentinel = false; bool mem_stomp = false; } @@ -64,6 +66,14 @@ void process_option(char* opt_name, char* opt_value) { cstring.strcpy(options.log_file.ptr, opt_value); } + else if (cstring.strcmp(opt_name, "malloc_stats_file") == 0) + { + cstring.strcpy(options.malloc_stats_file.ptr, opt_value); + } + else if (cstring.strcmp(opt_name, "collect_stats_file") == 0) + { + cstring.strcpy(options.collect_stats_file.ptr, opt_value); + } else if (cstring.strcmp(opt_name, "sentinel") == 0) { if (opt_value[0] == '\0') diff --git a/rt/gc/cdgc/stats.d b/rt/gc/cdgc/stats.d index ec8c7ad..562d955 100644 --- a/rt/gc/cdgc/stats.d +++ b/rt/gc/cdgc/stats.d @@ -21,11 +21,371 @@ * be misrepresented as being the original software. * o This notice may not be removed or altered from any source * distribution. - * Authors: Walter Bright, Sean Kelly + * Authors: Walter Bright, Sean Kelly, Leandro Lucarella */ module rt.gc.cdgc.stats; +import gc = rt.gc.cdgc.gc; +import rt.gc.cdgc.bits: GCBits; +import rt.gc.cdgc.opts: options; + +import cstdio = tango.stdc.stdio; +import ctime = tango.stdc.posix.sys.time; + + +private: + + +/** + * Time information for a collection. + * + * This struct groups all the timing information for a particular collection. + * It stores how much time it took the different parts of a collection, splat + * in: allocation time (the time the mutator spent in the malloc() call that + * triggered the collection), time the world was stopped because of the + * collection and the time spent in the collection itself. The time the world + * was stopped includes the time spent in the collection (in this + * implementation, which is not concurrent) and the time spent in the + * allocation includes the time the world was stopped (this is probably true + * for any implementation). + */ +struct TimeInfo +{ + /// Collection time (in seconds). + double collection = -1.0; + /// Stop-the-world time (in seconds). + double stop_the_world = -1.0; + /// Time spent in the malloc that triggered the collection (in seconds). + double malloc = -1.0; +} + + +/** + * Memory (space) information for a collection. + * + * This struct groups all the space information for a particular collection. + * The space is partitioned in four: used, free, wasted and overhead. "used" is + * the net space needed by the mutator; "free" is the space the GC has ready to + * be given to the mutator when needed; "wasted" is memory that can't be used + * by neither the collector nor the mutator (usually because of fragmentation) + * and "overhead" is the space needed by the GC itself. + */ +struct MemoryInfo +{ + /// Heap memory used by the program (in bytes). + size_t used = 0; + /// Free heap memory (in bytes). + size_t free = 0; + /// Memory that can't be used at all (in bytes). + size_t wasted = 0; + /// Memory used by the GC for bookkeeping (in bytes). + size_t overhead = 0; +} + + +/** + * Information about a particular collection. + * + * This struct groups all information related to a particular collection. The + * timings for the collection as stored and the space requirements are both, + * before and after that collection, logged to easily measure how effective + * the collection was in terms of space. The time when this collection was + * triggered is logged too (relative to the program start, in seconds). + * + * See_Also: TimeInfo and MemoryInfo structs. + */ +struct CollectionInfo +{ + /// When this information were taken (seconds since the program started). + double timestamp = -1.0; + /// Time statistics for this collection. + TimeInfo time; + /// Memory statistics before the collection. + MemoryInfo before; + /// Memory statistics after the collection. + MemoryInfo after; +} + + +/** + * Information about a particular allocation. + * + * This struct groups all the information about a particular allocation. The + * size requested in that allocation is logged, as well as the attributes + * assigned to that cell, the time malloc() took to complete and if + * a collection was triggered. The time when this allocation was requested is + * logged too (relative to the program start, in seconds). + */ +struct MallocInfo +{ + /// When this information were taken (seconds since the program started). + double timestamp = -1.0; + /// Time spent in the malloc() call (in seconds). + double time = -1.0; + /// Memory requested by the malloc() call (in bytes). + size_t size = 0; + /// Memory attributes as BlkAttr flags. + uint attr = 0; + /// True if this malloc() triggered a collection. + bool collected = false; +} + + +package: + +/** + * Control and store the GC statistics. + * + * This is the interface to this module, it has methods for the GC to inform + * when a relevant event has happened. The events are the start and finish of + * an allocation, when the world is stopped and restarted and when + * a collection is triggered and done. + * + * All the data is logged in memory and printed to the standard output when + * requested (usually at the end of the program). + * + * See_Also: CollectionInfo and MallocInfo structs. + */ +struct Stats +{ + +private: + + /// The GC instance we are collecting stats from. + .gc.GC gc = null; + + /// True if statistics should be collected. + bool active = false; + + /// Current collection information (printed when the malloc finishes). + MallocInfo malloc_info; + + /// File where to write the malloc information to. + cstdio.FILE* mallocs_file; + + /// Current collection information (printed when the collection finishes). + CollectionInfo collection_info; + + /// File where to write the collections information to. + cstdio.FILE* collections_file; + + /// Time when the program started. + double program_start = -1.0; + + /// Return the current time as seconds since the epoch. + static double now() + { + ctime.timeval tv; + ctime.gettimeofday(&tv, null); + return cast(double) tv.tv_sec + cast(double) tv.tv_usec / 1_000_000.0; + } + + /// Fill a MemoryInfo struct with the current state of the GC heap. + void fill_memory_info(MemoryInfo* mem_info) + { + mem_info.overhead += .gc.GC.classinfo.init.length + .gc.Gcx.sizeof + + .gc.pools.size_of + .gc.roots.size_of + .gc.ranges.size_of; + + // pools + for (size_t i = 0; i < .gc.pools.length; i++) + { + auto pool = .gc.pools[i]; + mem_info.overhead += pool.npages * ubyte.sizeof; + // the 5 bitmaps (mark, scan, free, final, noscan) + mem_info.overhead += 5 * (GCBits.sizeof + + (pool.mark.nwords + 2) * uint.sizeof); + + for (size_t pn = 0; pn < pool.npages; pn++) + { + auto bin = cast(.gc.Bins) pool.pagetable[pn]; + if (bin < .gc.B_PAGE) + { + size_t size = .gc.binsize[bin]; + size_t bitstride = size / 16; + size_t bitbase = pn * (.gc.PAGESIZE / 16); + size_t bittop = bitbase + (.gc.PAGESIZE / 16); + for (auto biti = bitbase; biti < bittop; biti += bitstride) + { + if (pool.freebits.test(biti)) + mem_info.free += size; + else + mem_info.used += size; // TODO: wasted + } + } + else if (bin == .gc.B_FREE) + mem_info.free += .gc.PAGESIZE; + else // B_PAGE / B_PAGEPLUS + mem_info.used += .gc.PAGESIZE; // TODO: wasted + } + } + } + + cstdio.FILE* start_file(char* filename, char* header) + { + cstdio.FILE* file = cstdio.fopen(filename, "w"); + assert (file !is null); + cstdio.fputs(header, file); + return file; + } + + void print_malloc() + { + if (this.mallocs_file is null) + return; + cstdio.fprintf(this.mallocs_file, + "%f,%f,%zu,%zu,%zu,%zu,%zu\n", + //0 1 2 3 4 5 6 + this.malloc_info.timestamp, // 0 + this.malloc_info.time, // 1 + this.malloc_info.size, // 2 + this.malloc_info.collected ? 1u : 0u, // 3 + this.malloc_info.attr & .gc.BlkAttr.FINALIZE, // 4 + this.malloc_info.attr & .gc.BlkAttr.NO_SCAN, // 5 + this.malloc_info.attr & .gc.BlkAttr.NO_MOVE); // 6 + } + + void print_collection() + { + if (this.collections_file is null) + return; + cstdio.fprintf(this.collections_file, + "%f,%f,%f,%f,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%zu\n", + //0 1 2 3 4 5 6 7 8 9 10 11 + this.collection_info.timestamp, // 0 + this.collection_info.time.malloc, // 1 + this.collection_info.time.collection, // 2 + this.collection_info.time.stop_the_world, // 3 + this.collection_info.before.used, // 4 + this.collection_info.before.free, // 5 + this.collection_info.before.wasted, // 6 + this.collection_info.before.overhead, // 7 + this.collection_info.after.used, // 8 + this.collection_info.after.free, // 9 + this.collection_info.after.wasted, // 10 + this.collection_info.after.overhead); // 11 + } + + +public: + + /** + * Construct a Stats object (useful for easy initialization). + * + * This function should be always used to initialize a Stats object because + * the program start time (in seconds since the epoch) needs to be taken to + * properly add timestamps to allocations and collections. + */ + static Stats opCall(.gc.GC gc) + { + Stats this_; + this_.gc = gc; + if (options.malloc_stats_file[0] != '\0') { + this_.active = true; + this_.mallocs_file = this_.start_file( + options.malloc_stats_file.ptr, + "Timestamp,Time,Size,Collection triggered," + "Finalize,No scan,No move\n"); + } + // collection + if (options.collect_stats_file[0] != '\0') { + this_.active = true; + this_.collections_file = this_.start_file( + options.collect_stats_file.ptr, + "Timestamp,Malloc time,Collection time,Stop-the-world time," + "Used before,Free before,Wasted before,Overhead before," + "Used after,Free after,Wasted after,Overhead after\n"); + } + this_.program_start = this_.now(); + return this_; + } + + void finalize() + { + if (this.mallocs_file !is null) + cstdio.fclose(this.mallocs_file); + if (this.collections_file !is null) + cstdio.fclose(this.collections_file); + } + + /// Inform the start of an allocation. + void malloc_started(size_t size, uint attr) + { + if (!this.active) + return; + auto now = this.now(); + auto relative_now = now - this.program_start; + // malloc + this.malloc_info = this.malloc_info.init; + this.malloc_info.timestamp = relative_now; + this.malloc_info.time = now; + this.malloc_info.size = size; + this.malloc_info.attr = attr; + // this.malloc_info.collected is filled in malloc_finished() + // collection + this.collection_info = this.collection_info.init; + this.collection_info.timestamp = relative_now; + // this.collection_info.time.malloc is the same as malloc_info.time + } + + /// Inform the end of an allocation. + void malloc_finished() + { + if (!this.active) + return; + auto now = this.now(); + auto collected = !(this.collection_info.time.collection < 0.0); + // malloc + this.malloc_info.time = now - this.malloc_info.time; + if (collected) + this.malloc_info.collected = true; + this.print_malloc(); + if (!collected) + return; + // collection + this.collection_info.time.malloc = this.malloc_info.time; + this.print_collection(); + } + + /// Inform that all threads (the world) have been stopped. + void world_stopped() + { + if (!this.active) + return; + this.collection_info.time.stop_the_world = this.now(); + } + + /// Inform that all threads (the world) have been resumed. + void world_started() + { + if (!this.active) + return; + this.collection_info.time.stop_the_world = + this.now() - this.collection_info.time.stop_the_world; + } + + /// Inform the start of a collection. + void collection_started() + { + if (!this.active) + return; + this.fill_memory_info(&this.collection_info.before); + this.collection_info.time.collection = this.now(); + } + + /// Inform the end of a collection. + void collection_finished() + { + if (!this.active) + return; + this.collection_info.time.collection = + this.now() - this.collection_info.time.collection; + this.fill_memory_info(&this.collection_info.after); + } + +} + + /** * */ -- 2.43.0