micro: Improve generation of data to plot the histogram

author Leandro Lucarella <llucax@gmail.com>

Thu, 27 Aug 2009 00:06:17 +0000 (21:06 -0300)

committer Leandro Lucarella <llucax@gmail.com>

Thu, 27 Aug 2009 00:06:17 +0000 (21:06 -0300)
author Leandro Lucarella <llucax@gmail.com>
Thu, 27 Aug 2009 00:06:17 +0000 (21:06 -0300)
committer Leandro Lucarella <llucax@gmail.com>
Thu, 27 Aug 2009 00:06:17 +0000 (21:06 -0300)
diff --git a/micro/Makefile b/micro/Makefile

index e48a2e2fe810dd67cfa38a39ce2f85ee4a2354f9..ddbbac7cfc08ff713af980389d281bc1a1c832d1 100644 (file)
--- a/micro/Makefile
+++ b/micro/Makefile
@@ -72,9 +72,8 @@ $(STAT_DIR)/%.c.csv $(STAT_DIR)/%.a.csv: $(BIN_DIR)/%
  $(STAT_DIR)/split.c.csv $(STAT_DIR)/split.a.csv: override args := bible.txt
  
  .PRECIOUS: $(STAT_DIR)/%.h.csv
  $(STAT_DIR)/split.c.csv $(STAT_DIR)/split.a.csv: override args := bible.txt
  
  .PRECIOUS: $(STAT_DIR)/%.h.csv
-$(STAT_DIR)/%.h.csv: $(STAT_DIR)/%.a.csv
-       $(P_AWK) awk -F, 'BEGIN  { print "Size,Count" } NR > 1 { a[$$3]++ } \
-                       END { for (i in a) print i "," a[i] }' $< | sort > $@
+$(STAT_DIR)/%.h.csv: $(STAT_DIR)/%.a.csv hist.awk
+       $(P_AWK) awk -F, -f $(lastword $^) $< > $@
  
  .PHONY: plot
  plot: $(graphs)
  
  .PHONY: plot
  plot: $(graphs)
diff --git a/micro/hist.awk b/micro/hist.awk

new file mode 100644 (file)

index 0000000..adeeb7b
--- /dev/null
+++ b/micro/hist.awk
@@ -0,0 +1,78 @@
+#!/usr/bin/env awk -F, -f
+
+BEGIN {
+       MAX_SAMPLES = 50
+       # output CSV header
+       print "Size,Scan,No Scan";
+}
+
+NR == 2 {
+       min = max = int($3)
+}
+
+NR > 1 { # skip the input CVS header
+       n = int($3)
+       if (int($6) > 0) { # cell has the NO_SCAN bit
+               no_scan[n]++
+               if (!(n in scan))
+                       scan[n] = 0
+       } else { # cell doesn't have the NO_SCAN bit
+               scan[n]++
+               if (!(n in no_scan))
+                       no_scan[n] = 0
+       }
+       if (n < min)
+               min = n
+       else if (n > max)
+               max = n
+}
+
+function h(val) {
+       if (val >= 1048576) # 1 M
+               r = sprintf("%uM", val / 1048576)
+       else if (val >= 1024) # 1 K
+               r = sprintf("%uK", val / 1024)
+       else
+               r = sprintf("%u", val)
+       return r
+}
+
+function p(s, ns, o, n) {
+       for (i = 1; i <= n; i++)
+               print o[i] "," s[o[i]] "," ns[o[i]]
+}
+
+END {
+       # reduce the number of elements in the histogram if there are too many
+       if (length(scan) > MAX_SAMPLES) {
+               step = int((max - min) / MAX_SAMPLES) + 1
+               for (i in scan) {
+                       i = int(i)
+                       for (from = min; from < max; from += step) {
+                               to = from + step
+                               if ((from <= i) && (i < to)) {
+                                       j = sprintf("%s-%s", h(from), h(to))
+                                       scan2[j] += scan[i]
+                                       no_scan2[j] += no_scan[i]
+                                       break
+                               }
+                       }
+               }
+               n = 1
+               for (from = min; from < max; from += step) {
+                       v = sprintf("%s-%s", h(from), h(from + step))
+                       if (v in scan2)
+                               order[n++] = v
+               }
+       }
+       # print output data
+       if (length(scan2)) {
+               p(scan2, no_scan2, order, n)
+       } else {
+               for (i in scan)
+                       order[i++] = int(i)
+               n = asort(order)
+               p(scan, no_scan, order, n)
+       }
+}
+
author	Leandro Lucarella <llucax@gmail.com>
	Thu, 27 Aug 2009 00:06:17 +0000 (21:06 -0300)
committer	Leandro Lucarella <llucax@gmail.com>
	Thu, 27 Aug 2009 00:06:17 +0000 (21:06 -0300)
micro/Makefile		patch \| blob \| history
micro/hist.awk	[new file with mode: 0644]	patch \| blob