1 /*************************************************************************/
2 /* Copyright (c) 2015 Linas Vepstas                                      */
3 /* All rights reserved                                                   */
4 /*                                                                       */
5 /* Use of the link grammar parsing system is subject to the terms of the */
6 /* license set forth in the LICENSE file included with this software.    */
7 /* This license allows free redistribution and use in source and binary  */
8 /* forms, with or without modification, subject to certain conditions.   */
9 /*                                                                       */
10 /*************************************************************************/
11 
12 #include <math.h>
13 #include "histogram.h"
14 
15 #ifdef PERFORM_COUNT_HISTOGRAMMING
16 /* A histogram distribution of the parse counts. */
17 
hist_zero(void)18 Count_bin hist_zero(void)
19 {
20 	static Count_bin zero
21 		= {0, 0, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0};
22 	return zero;
23 }
24 
hist_one(void)25 Count_bin hist_one(void)
26 {
27 	static Count_bin one
28 		= {0, 1, {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0};
29 	return one;
30 }
31 
32 #define BIN_WIDTH 0.334
33 
34 /**
35  * Accumulate counts in 'a', adding them to sum.
36  * The histogram is shifted by the amount 'cost'.
37  * That is, the bins are shifted over by the integer part of the cost
38  * (scaled to the bin-width).
39  */
hist_accum(Count_bin * sum,double cost,const Count_bin * a)40 void hist_accum(Count_bin* sum, double cost, const Count_bin* a)
41 {
42 	unsigned int i;
43 	unsigned int start;
44 
45 	// Skip, if nothing to accumulate.
46 	if (0 == a->total) return;
47 	sum->total += a->total;
48 
49 	// The cost tells us how much to offset the histogram in a,
50 	// before accumulating it.  'base' is the bin number of the first
51 	// non-empty bin.
52 	start = (unsigned int) floor (cost / BIN_WIDTH);
53 	if (0 == sum->bin[0])
54 	{
55 		sum->base = start;
56 		start = 0;
57 	}
58 
59 	for (i = start; i < NUM_BINS; i++)
60 	{
61 		sum->bin[i] += a->bin[i-start];
62 	}
63 	for (i = NUM_BINS-start; i < NUM_BINS; i++)
64 	{
65 		sum->overrun += a->bin[i];
66 	}
67 	sum->overrun += a->overrun;
68 }
69 
70 /** Same as above */
hist_accumv(Count_bin * sum,double cost,const Count_bin a)71 void hist_accumv(Count_bin* sum, double cost, const Count_bin a)
72 {
73 	hist_accum(sum, cost, &a);
74 }
75 
76 /**
77  * Create a product of two histogrammed counts.
78  * Observe that doing so requires a kind-of cross-product to
79  * be performed, thus, a nested double sum.
80  */
hist_prod(Count_bin * prod,const Count_bin * a,const Count_bin * b)81 void hist_prod(Count_bin* prod, const Count_bin* a, const Count_bin* b)
82 {
83 	unsigned int i, k;
84 
85 	// Skip, if the product is zero.
86 	if (0 == a->total || 0 == b->total) return;
87 	prod->total = a->total * b->total;
88 
89 // #define SLOW_BUT_SIMPLE 1
90 #ifdef SLOW_BUT_SIMPLE
91 	/* The below implements the straight-forward concept of the product.
92 	 * Its not quite optimal, because the intialization loop, and the
93 	 * if check can be eliminated by re-writing j = k-i.
94 	 */
95 	for (i = 0; i < NUM_BINS; i++) prod->bin[i] = 0;
96 	prod->overrun = 0;
97 	for (i = 0; i < NUM_BINS; i++)
98 	{
99 		for (j = 0; j < NUM_BINS; j++)
100 		{
101 			if (i+j < NUM_BINS)
102 				prod->bin[i+j] += a->bin[i] * b->bin[j];
103 			else
104 				prod->overrun += a->bin[i] * b->bin[j];
105 		}
106 
107 		prod->overrun += a->bin[i] * b->overrun;
108 		prod->overrun += a->overrun * b->bin[i];
109 	}
110 	prod->overrun += a->overrun * b->overrun;
111 #else
112 	/* The below does exactly the same thing as the above, but
113 	 * ever so slightly more quickly. Some pointless checks get
114 	 * eliminated.
115 	 */
116 	prod->overrun = 0;
117 	for (k = 0; k < NUM_BINS; k++)
118 	{
119 		prod->bin[k] = 0;
120 		for (i = 0; i <= k; i++)
121 		{
122 			prod->bin[k] += a->bin[i] * b->bin[k-i];
123 		}
124 		prod->overrun += a->bin[k] * b->overrun;
125 		prod->overrun += a->overrun * b->bin[k];
126 	}
127 	for (k = NUM_BINS; k < 2 * NUM_BINS - 1; k++)
128 	{
129 		for (i = k - NUM_BINS + 1; i < NUM_BINS; i++)
130 		{
131 			prod->overrun += a->bin[i] * b->bin[k-i];
132 		}
133 	}
134 	prod->overrun += a->overrun * b->overrun;
135 #endif
136 }
137 
138 /**
139  * Multiply two histograms 'a' and 'b', and accumulate them into 'acc'.
140  * The accumulated histogram is first shifted by 'cost'.
141  */
hist_muladd(Count_bin * acc,const Count_bin * a,double cost,const Count_bin * b)142 void hist_muladd(Count_bin* acc, const Count_bin* a, double cost, const Count_bin* b)
143 {
144 	Count_bin tmp = hist_zero();
145 	hist_prod(&tmp, a, b);
146 	hist_accum(acc, cost, &tmp);
147 }
148 
hist_muladdv(Count_bin * acc,const Count_bin * a,double cost,const Count_bin b)149 void hist_muladdv(Count_bin* acc, const Count_bin* a, double cost, const Count_bin b)
150 {
151 	hist_muladd(acc, a, cost, &b);
152 }
153 
hist_cost_cutoff(Count_bin * hist,int count)154 double hist_cost_cutoff(Count_bin* hist, int count)
155 {
156 	int i;
157 	s64 cnt = 0;
158 
159 	for (i=0; i<NUM_BINS; i++)
160 	{
161 		cnt += hist->bin[i];
162 		if (count <= cnt)
163 			return ((double) i + hist->base) * BIN_WIDTH;
164 	}
165 	return 1.0e38;
166 }
167 
hist_cut_total(Count_bin * hist,int min_total)168 s64 hist_cut_total(Count_bin* hist, int min_total)
169 {
170 	int i;
171 	s64 cnt = 0;
172 
173 	for (i=0; i<NUM_BINS; i++)
174 	{
175 		cnt += hist->bin[i];
176 		if (min_total <= cnt) return cnt;
177 	}
178 	return hist->total;
179 }
180 
181 #endif /* PERFORM_COUNT_HISTOGRAMMING */
182