From 37dd82c0a3363318958cd62cfdad2c73cc56338a Mon Sep 17 00:00:00 2001 From: Leandro Lucarella Date: Wed, 26 Aug 2009 21:06:17 -0300 Subject: [PATCH] micro: Improve generation of data to plot the histogram If a program allocates a lot of cell with different sizes the histogram would have a lot of bars to plot, making it very hard to read. The script to generate the histogram data is improved to take a maximum number of cell sizes. If that limit is passed, the cell sizes are grouped together in ranges of cell sizes, keeping the histogram readable. --- micro/Makefile | 5 ++-- micro/hist.awk | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 3 deletions(-) create mode 100644 micro/hist.awk diff --git a/micro/Makefile b/micro/Makefile index e48a2e2..ddbbac7 100644 --- a/micro/Makefile +++ b/micro/Makefile @@ -72,9 +72,8 @@ $(STAT_DIR)/%.c.csv $(STAT_DIR)/%.a.csv: $(BIN_DIR)/% $(STAT_DIR)/split.c.csv $(STAT_DIR)/split.a.csv: override args := bible.txt .PRECIOUS: $(STAT_DIR)/%.h.csv -$(STAT_DIR)/%.h.csv: $(STAT_DIR)/%.a.csv - $(P_AWK) awk -F, 'BEGIN { print "Size,Count" } NR > 1 { a[$$3]++ } \ - END { for (i in a) print i "," a[i] }' $< | sort > $@ +$(STAT_DIR)/%.h.csv: $(STAT_DIR)/%.a.csv hist.awk + $(P_AWK) awk -F, -f $(lastword $^) $< > $@ .PHONY: plot plot: $(graphs) diff --git a/micro/hist.awk b/micro/hist.awk new file mode 100644 index 0000000..adeeb7b --- /dev/null +++ b/micro/hist.awk @@ -0,0 +1,78 @@ +#!/usr/bin/env awk -F, -f + +BEGIN { + MAX_SAMPLES = 50 + # output CSV header + print "Size,Scan,No Scan"; +} + +NR == 2 { + min = max = int($3) +} + +NR > 1 { # skip the input CVS header + n = int($3) + if (int($6) > 0) { # cell has the NO_SCAN bit + no_scan[n]++ + if (!(n in scan)) + scan[n] = 0 + } else { # cell doesn't have the NO_SCAN bit + scan[n]++ + if (!(n in no_scan)) + no_scan[n] = 0 + } + if (n < min) + min = n + else if (n > max) + max = n +} + +function h(val) { + if (val >= 1048576) # 1 M + r = sprintf("%uM", val / 1048576) + else if (val >= 1024) # 1 K + r = sprintf("%uK", val / 1024) + else + r = sprintf("%u", val) + return r +} + +function p(s, ns, o, n) { + for (i = 1; i <= n; i++) + print o[i] "," s[o[i]] "," ns[o[i]] +} + +END { + # reduce the number of elements in the histogram if there are too many + if (length(scan) > MAX_SAMPLES) { + step = int((max - min) / MAX_SAMPLES) + 1 + for (i in scan) { + i = int(i) + for (from = min; from < max; from += step) { + to = from + step + if ((from <= i) && (i < to)) { + j = sprintf("%s-%s", h(from), h(to)) + scan2[j] += scan[i] + no_scan2[j] += no_scan[i] + break + } + } + } + n = 1 + for (from = min; from < max; from += step) { + v = sprintf("%s-%s", h(from), h(from + step)) + if (v in scan2) + order[n++] = v + } + } + # print output data + if (length(scan2)) { + p(scan2, no_scan2, order, n) + } else { + for (i in scan) + order[i++] = int(i) + n = asort(order) + p(scan, no_scan, order, n) + } +} + -- 2.43.0