#!/usr/bin/env awk -F, -f BEGIN { MAX_SAMPLES = 50 # output CSV header print "Size,Scan,No Scan"; } NR == 2 { min = max = int($3) } NR > 1 { # skip the input CVS header n = int($3) if (int($6) > 0) { # cell has the NO_SCAN bit no_scan[n]++ if (!(n in scan)) scan[n] = 0 } else { # cell doesn't have the NO_SCAN bit scan[n]++ if (!(n in no_scan)) no_scan[n] = 0 } if (n < min) min = n else if (n > max) max = n } function h(val) { if (val >= 1048576) # 1 M r = sprintf("%uM", val / 1048576) else if (val >= 1024) # 1 K r = sprintf("%uK", val / 1024) else r = sprintf("%u", val) return r } function p(s, ns, o, n) { for (i = 1; i <= n; i++) print o[i] "," s[o[i]] "," ns[o[i]] } END { # reduce the number of elements in the histogram if there are too many if (length(scan) > MAX_SAMPLES) { step = int((max - min) / MAX_SAMPLES) + 1 for (i in scan) { i = int(i) for (from = min; from < max; from += step) { to = from + step if ((from <= i) && (i < to)) { j = sprintf("%s-%s", h(from), h(to)) scan2[j] += scan[i] no_scan2[j] += no_scan[i] break } } } n = 1 for (from = min; from < max; from += step) { v = sprintf("%s-%s", h(from), h(from + step)) if (v in scan2) order[n++] = v } } # print output data if (length(scan2)) { p(scan2, no_scan2, order, n) } else { for (i in scan) order[i++] = int(i) n = asort(order) p(scan, no_scan, order, n) } }