1 /*************************************************************************/
2 /* Copyright (c) 2015 Linas Vepstas */
3 /* All rights reserved */
4 /* */
5 /* Use of the link grammar parsing system is subject to the terms of the */
6 /* license set forth in the LICENSE file included with this software. */
7 /* This license allows free redistribution and use in source and binary */
8 /* forms, with or without modification, subject to certain conditions. */
9 /* */
10 /*************************************************************************/
11
12 #include <math.h>
13 #include "histogram.h"
14
15 #ifdef PERFORM_COUNT_HISTOGRAMMING
16 /* A histogram distribution of the parse counts. */
17
hist_zero(void)18 Count_bin hist_zero(void)
19 {
20 static Count_bin zero
21 = {0, 0, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0};
22 return zero;
23 }
24
hist_one(void)25 Count_bin hist_one(void)
26 {
27 static Count_bin one
28 = {0, 1, {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0};
29 return one;
30 }
31
32 #define BIN_WIDTH 0.334
33
34 /**
35 * Accumulate counts in 'a', adding them to sum.
36 * The histogram is shifted by the amount 'cost'.
37 * That is, the bins are shifted over by the integer part of the cost
38 * (scaled to the bin-width).
39 */
hist_accum(Count_bin * sum,double cost,const Count_bin * a)40 void hist_accum(Count_bin* sum, double cost, const Count_bin* a)
41 {
42 unsigned int i;
43 unsigned int start;
44
45 // Skip, if nothing to accumulate.
46 if (0 == a->total) return;
47 sum->total += a->total;
48
49 // The cost tells us how much to offset the histogram in a,
50 // before accumulating it. 'base' is the bin number of the first
51 // non-empty bin.
52 start = (unsigned int) floor (cost / BIN_WIDTH);
53 if (0 == sum->bin[0])
54 {
55 sum->base = start;
56 start = 0;
57 }
58
59 for (i = start; i < NUM_BINS; i++)
60 {
61 sum->bin[i] += a->bin[i-start];
62 }
63 for (i = NUM_BINS-start; i < NUM_BINS; i++)
64 {
65 sum->overrun += a->bin[i];
66 }
67 sum->overrun += a->overrun;
68 }
69
70 /** Same as above */
hist_accumv(Count_bin * sum,double cost,const Count_bin a)71 void hist_accumv(Count_bin* sum, double cost, const Count_bin a)
72 {
73 hist_accum(sum, cost, &a);
74 }
75
76 /**
77 * Create a product of two histogrammed counts.
78 * Observe that doing so requires a kind-of cross-product to
79 * be performed, thus, a nested double sum.
80 */
hist_prod(Count_bin * prod,const Count_bin * a,const Count_bin * b)81 void hist_prod(Count_bin* prod, const Count_bin* a, const Count_bin* b)
82 {
83 unsigned int i, k;
84
85 // Skip, if the product is zero.
86 if (0 == a->total || 0 == b->total) return;
87 prod->total = a->total * b->total;
88
89 // #define SLOW_BUT_SIMPLE 1
90 #ifdef SLOW_BUT_SIMPLE
91 /* The below implements the straight-forward concept of the product.
92 * Its not quite optimal, because the intialization loop, and the
93 * if check can be eliminated by re-writing j = k-i.
94 */
95 for (i = 0; i < NUM_BINS; i++) prod->bin[i] = 0;
96 prod->overrun = 0;
97 for (i = 0; i < NUM_BINS; i++)
98 {
99 for (j = 0; j < NUM_BINS; j++)
100 {
101 if (i+j < NUM_BINS)
102 prod->bin[i+j] += a->bin[i] * b->bin[j];
103 else
104 prod->overrun += a->bin[i] * b->bin[j];
105 }
106
107 prod->overrun += a->bin[i] * b->overrun;
108 prod->overrun += a->overrun * b->bin[i];
109 }
110 prod->overrun += a->overrun * b->overrun;
111 #else
112 /* The below does exactly the same thing as the above, but
113 * ever so slightly more quickly. Some pointless checks get
114 * eliminated.
115 */
116 prod->overrun = 0;
117 for (k = 0; k < NUM_BINS; k++)
118 {
119 prod->bin[k] = 0;
120 for (i = 0; i <= k; i++)
121 {
122 prod->bin[k] += a->bin[i] * b->bin[k-i];
123 }
124 prod->overrun += a->bin[k] * b->overrun;
125 prod->overrun += a->overrun * b->bin[k];
126 }
127 for (k = NUM_BINS; k < 2 * NUM_BINS - 1; k++)
128 {
129 for (i = k - NUM_BINS + 1; i < NUM_BINS; i++)
130 {
131 prod->overrun += a->bin[i] * b->bin[k-i];
132 }
133 }
134 prod->overrun += a->overrun * b->overrun;
135 #endif
136 }
137
138 /**
139 * Multiply two histograms 'a' and 'b', and accumulate them into 'acc'.
140 * The accumulated histogram is first shifted by 'cost'.
141 */
hist_muladd(Count_bin * acc,const Count_bin * a,double cost,const Count_bin * b)142 void hist_muladd(Count_bin* acc, const Count_bin* a, double cost, const Count_bin* b)
143 {
144 Count_bin tmp = hist_zero();
145 hist_prod(&tmp, a, b);
146 hist_accum(acc, cost, &tmp);
147 }
148
hist_muladdv(Count_bin * acc,const Count_bin * a,double cost,const Count_bin b)149 void hist_muladdv(Count_bin* acc, const Count_bin* a, double cost, const Count_bin b)
150 {
151 hist_muladd(acc, a, cost, &b);
152 }
153
hist_cost_cutoff(Count_bin * hist,int count)154 double hist_cost_cutoff(Count_bin* hist, int count)
155 {
156 int i;
157 s64 cnt = 0;
158
159 for (i=0; i<NUM_BINS; i++)
160 {
161 cnt += hist->bin[i];
162 if (count <= cnt)
163 return ((double) i + hist->base) * BIN_WIDTH;
164 }
165 return 1.0e38;
166 }
167
hist_cut_total(Count_bin * hist,int min_total)168 s64 hist_cut_total(Count_bin* hist, int min_total)
169 {
170 int i;
171 s64 cnt = 0;
172
173 for (i=0; i<NUM_BINS; i++)
174 {
175 cnt += hist->bin[i];
176 if (min_total <= cnt) return cnt;
177 }
178 return hist->total;
179 }
180
181 #endif /* PERFORM_COUNT_HISTOGRAMMING */
182