+#!/usr/bin/env awk -F, -f
+
+BEGIN {
+ MAX_SAMPLES = 50
+ # output CSV header
+ print "Size,Scan,No Scan";
+}
+
+NR == 2 {
+ min = max = int($3)
+}
+
+NR > 1 { # skip the input CVS header
+ n = int($3)
+ if (int($6) > 0) { # cell has the NO_SCAN bit
+ no_scan[n]++
+ if (!(n in scan))
+ scan[n] = 0
+ } else { # cell doesn't have the NO_SCAN bit
+ scan[n]++
+ if (!(n in no_scan))
+ no_scan[n] = 0
+ }
+ if (n < min)
+ min = n
+ else if (n > max)
+ max = n
+}
+
+function h(val) {
+ if (val >= 1048576) # 1 M
+ r = sprintf("%uM", val / 1048576)
+ else if (val >= 1024) # 1 K
+ r = sprintf("%uK", val / 1024)
+ else
+ r = sprintf("%u", val)
+ return r
+}
+
+function p(s, ns, o, n) {
+ for (i = 1; i <= n; i++)
+ print o[i] "," s[o[i]] "," ns[o[i]]
+}
+
+END {
+ # reduce the number of elements in the histogram if there are too many
+ if (length(scan) > MAX_SAMPLES) {
+ step = int((max - min) / MAX_SAMPLES) + 1
+ for (i in scan) {
+ i = int(i)
+ for (from = min; from < max; from += step) {
+ to = from + step
+ if ((from <= i) && (i < to)) {
+ j = sprintf("%s-%s", h(from), h(to))
+ scan2[j] += scan[i]
+ no_scan2[j] += no_scan[i]
+ break
+ }
+ }
+ }
+ n = 1
+ for (from = min; from < max; from += step) {
+ v = sprintf("%s-%s", h(from), h(from + step))
+ if (v in scan2)
+ order[n++] = v
+ }
+ }
+ # print output data
+ if (length(scan2)) {
+ p(scan2, no_scan2, order, n)
+ } else {
+ for (i in scan)
+ order[i++] = int(i)
+ n = asort(order)
+ p(scan, no_scan, order, n)
+ }
+}
+