From: Leandro Lucarella Date: Thu, 27 Aug 2009 00:06:17 +0000 (-0300) Subject: micro: Improve generation of data to plot the histogram X-Git-Url: https://git.llucax.com/software/dgc/dgcbench.git/commitdiff_plain/37dd82c0a3363318958cd62cfdad2c73cc56338a micro: Improve generation of data to plot the histogram If a program allocates a lot of cell with different sizes the histogram would have a lot of bars to plot, making it very hard to read. The script to generate the histogram data is improved to take a maximum number of cell sizes. If that limit is passed, the cell sizes are grouped together in ranges of cell sizes, keeping the histogram readable. --- diff --git a/micro/Makefile b/micro/Makefile index e48a2e2..ddbbac7 100644 --- a/micro/Makefile +++ b/micro/Makefile @@ -72,9 +72,8 @@ $(STAT_DIR)/%.c.csv $(STAT_DIR)/%.a.csv: $(BIN_DIR)/% $(STAT_DIR)/split.c.csv $(STAT_DIR)/split.a.csv: override args := bible.txt .PRECIOUS: $(STAT_DIR)/%.h.csv -$(STAT_DIR)/%.h.csv: $(STAT_DIR)/%.a.csv - $(P_AWK) awk -F, 'BEGIN { print "Size,Count" } NR > 1 { a[$$3]++ } \ - END { for (i in a) print i "," a[i] }' $< | sort > $@ +$(STAT_DIR)/%.h.csv: $(STAT_DIR)/%.a.csv hist.awk + $(P_AWK) awk -F, -f $(lastword $^) $< > $@ .PHONY: plot plot: $(graphs) diff --git a/micro/hist.awk b/micro/hist.awk new file mode 100644 index 0000000..adeeb7b --- /dev/null +++ b/micro/hist.awk @@ -0,0 +1,78 @@ +#!/usr/bin/env awk -F, -f + +BEGIN { + MAX_SAMPLES = 50 + # output CSV header + print "Size,Scan,No Scan"; +} + +NR == 2 { + min = max = int($3) +} + +NR > 1 { # skip the input CVS header + n = int($3) + if (int($6) > 0) { # cell has the NO_SCAN bit + no_scan[n]++ + if (!(n in scan)) + scan[n] = 0 + } else { # cell doesn't have the NO_SCAN bit + scan[n]++ + if (!(n in no_scan)) + no_scan[n] = 0 + } + if (n < min) + min = n + else if (n > max) + max = n +} + +function h(val) { + if (val >= 1048576) # 1 M + r = sprintf("%uM", val / 1048576) + else if (val >= 1024) # 1 K + r = sprintf("%uK", val / 1024) + else + r = sprintf("%u", val) + return r +} + +function p(s, ns, o, n) { + for (i = 1; i <= n; i++) + print o[i] "," s[o[i]] "," ns[o[i]] +} + +END { + # reduce the number of elements in the histogram if there are too many + if (length(scan) > MAX_SAMPLES) { + step = int((max - min) / MAX_SAMPLES) + 1 + for (i in scan) { + i = int(i) + for (from = min; from < max; from += step) { + to = from + step + if ((from <= i) && (i < to)) { + j = sprintf("%s-%s", h(from), h(to)) + scan2[j] += scan[i] + no_scan2[j] += no_scan[i] + break + } + } + } + n = 1 + for (from = min; from < max; from += step) { + v = sprintf("%s-%s", h(from), h(from + step)) + if (v in scan2) + order[n++] = v + } + } + # print output data + if (length(scan2)) { + p(scan2, no_scan2, order, n) + } else { + for (i in scan) + order[i++] = int(i) + n = asort(order) + p(scan, no_scan, order, n) + } +} +