1#!/sw/bin/gawk -f
2
3# Usage: histogramm.awk <input file>
4#
5# Read a number of positive integers and print (and plot) a
6# distribution.
7#
8
9
10BEGIN{
11   minimum  = 2000000000;
12   maximum  = 0;
13   maxcount = 0;
14   compress = 7;
15   ignore_low_limit=0;
16   all_int_keys = 1;
17
18   for(i=0; i< ARGC; i++)
19   {
20      if(ARGV[i] == "--ignore-lower-limit")
21      {
22         ignore_low_limit = ARGV[i+1];
23         ARGV[i] = "";
24         ARGV[i+1] = "";
25      }
26   }
27}
28
29/^\#/{
30   print "Skipping comment";
31   next;
32}
33
34/[A-Za-z0-9]+/{
35   if(0+$1 < ignore_low_limit)
36   {
37      next;
38   }
39   if(int($1) != $1)
40   {
41      all_int_keys = 0;
42   }
43   array[$1]++;
44   if(array[$1] > maxcount)
45   {
46      maxcount = array[$1]
47   }
48   if(0+$1 < minimum)
49   {
50      minimum = 0+$1;
51   }
52   if(0+$1 > maximum)
53   {
54      maximum = 0+$1;
55   }
56}
57
58function line(val,    res,limit,i)
59{
60   res   = "";
61   limit = val *60 / maxcount;
62   for(i=0; i<limit; i++)
63   {
64      res = res "#";
65   }
66   return res;
67}
68
69
70END{
71   sum = 0;
72
73   if(! all_int_keys)
74   {
75      j = 1;
76      for (i in array)
77      {
78         ind[j] = i;    # index value becomes element value
79         j++;
80      }
81      n = asort(ind);    # index values are now sorted
82      for (i = 1; i <= n; i++)
83      {
84          curr = 0+array[ind[i]];
85          printf "%8.4f: %4d  ", ind[i], curr;
86          print line(curr);
87          sum+=curr;
88      }
89      l1 = sum/3;
90      c1 = 0;
91      c2 = 0;
92      l2 = sum*2/3;
93      sum = 0;
94      for(i=minimum; i<=maximum; i++)
95      {
96         curr = 0+array[i];
97         sum += curr;
98         if(sum >= l1 &&!c1)
99         {
100            c1 = i;
101         }
102         if(sum >= l2 &&!c2)
103         {
104            c2 = i;
105         }
106      }
107   }
108   else
109   {
110      for(i=minimum; i<=maximum; i++)
111      {
112         curr = 0+array[i];
113         printf "%4d: %4d  ", i, curr;
114         print line(curr);
115         sum+=curr;
116         if(!curr)
117         {
118            for(j=i; !array[j]&&(j<maximum); j++);
119# print j, i, j-i, compress
120            if(j-i >= compress)
121            {
122               print ".\n.";
123               i=j-2;
124            }
125         }
126      }
127      l1 = sum/3;
128      c1 = 0;
129      c2 = 0;
130      l2 = sum*2/3;
131      sum = 0;
132      for(i=minimum; i<=maximum; i++)
133      {
134         curr = 0+array[i];
135         sum += curr;
136         if(sum >= l1 &&!c1)
137         {
138            c1 = i;
139         }
140         if(sum >= l2 &&!c2)
141         {
142            c2 = i;
143         }
144      }
145   }
146   printf "Suggested partition: %d-%d, %d-%d, %d-%d\n",
147      minimum,c1,c1, c2,c2,maximum;
148   printf "some_limit=%d, many_limit=%d\n",c1,c2;
149}
150