1#!/sw/bin/gawk -f 2 3# Usage: histogramm.awk <input file> 4# 5# Read a number of positive integers and print (and plot) a 6# distribution. 7# 8 9 10BEGIN{ 11 minimum = 2000000000; 12 maximum = 0; 13 maxcount = 0; 14 compress = 7; 15 ignore_low_limit=0; 16 all_int_keys = 1; 17 18 for(i=0; i< ARGC; i++) 19 { 20 if(ARGV[i] == "--ignore-lower-limit") 21 { 22 ignore_low_limit = ARGV[i+1]; 23 ARGV[i] = ""; 24 ARGV[i+1] = ""; 25 } 26 } 27} 28 29/^\#/{ 30 print "Skipping comment"; 31 next; 32} 33 34/[A-Za-z0-9]+/{ 35 if(0+$1 < ignore_low_limit) 36 { 37 next; 38 } 39 if(int($1) != $1) 40 { 41 all_int_keys = 0; 42 } 43 array[$1]++; 44 if(array[$1] > maxcount) 45 { 46 maxcount = array[$1] 47 } 48 if(0+$1 < minimum) 49 { 50 minimum = 0+$1; 51 } 52 if(0+$1 > maximum) 53 { 54 maximum = 0+$1; 55 } 56} 57 58function line(val, res,limit,i) 59{ 60 res = ""; 61 limit = val *60 / maxcount; 62 for(i=0; i<limit; i++) 63 { 64 res = res "#"; 65 } 66 return res; 67} 68 69 70END{ 71 sum = 0; 72 73 if(! all_int_keys) 74 { 75 j = 1; 76 for (i in array) 77 { 78 ind[j] = i; # index value becomes element value 79 j++; 80 } 81 n = asort(ind); # index values are now sorted 82 for (i = 1; i <= n; i++) 83 { 84 curr = 0+array[ind[i]]; 85 printf "%8.4f: %4d ", ind[i], curr; 86 print line(curr); 87 sum+=curr; 88 } 89 l1 = sum/3; 90 c1 = 0; 91 c2 = 0; 92 l2 = sum*2/3; 93 sum = 0; 94 for(i=minimum; i<=maximum; i++) 95 { 96 curr = 0+array[i]; 97 sum += curr; 98 if(sum >= l1 &&!c1) 99 { 100 c1 = i; 101 } 102 if(sum >= l2 &&!c2) 103 { 104 c2 = i; 105 } 106 } 107 } 108 else 109 { 110 for(i=minimum; i<=maximum; i++) 111 { 112 curr = 0+array[i]; 113 printf "%4d: %4d ", i, curr; 114 print line(curr); 115 sum+=curr; 116 if(!curr) 117 { 118 for(j=i; !array[j]&&(j<maximum); j++); 119# print j, i, j-i, compress 120 if(j-i >= compress) 121 { 122 print ".\n."; 123 i=j-2; 124 } 125 } 126 } 127 l1 = sum/3; 128 c1 = 0; 129 c2 = 0; 130 l2 = sum*2/3; 131 sum = 0; 132 for(i=minimum; i<=maximum; i++) 133 { 134 curr = 0+array[i]; 135 sum += curr; 136 if(sum >= l1 &&!c1) 137 { 138 c1 = i; 139 } 140 if(sum >= l2 &&!c2) 141 { 142 c2 = i; 143 } 144 } 145 } 146 printf "Suggested partition: %d-%d, %d-%d, %d-%d\n", 147 minimum,c1,c1, c2,c2,maximum; 148 printf "some_limit=%d, many_limit=%d\n",c1,c2; 149} 150