1 // pdmath.c
2 // Elementary math on probability distributions
3 //
4 // (c) 2016 - Nico Palermo, IV3NWV - Microtelecom Srl, Italy
5 // ------------------------------------------------------------------------------
6 // This file is part of the qracodes project, a Forward Error Control
7 // encoding/decoding package based on Q-ary RA (Repeat and Accumulate) LDPC codes.
8 //
9 //    qracodes is free software: you can redistribute it and/or modify
10 //    it under the terms of the GNU General Public License as published by
11 //    the Free Software Foundation, either version 3 of the License, or
12 //    (at your option) any later version.
13 //    qracodes is distributed in the hope that it will be useful,
14 //    but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 //    GNU General Public License for more details.
17 
18 //    You should have received a copy of the GNU General Public License
19 //    along with qracodes source distribution.
20 //    If not, see <http://www.gnu.org/licenses/>.
21 
22 #include "pdmath.h"
23 
24 typedef const float *ppd_uniform;
25 typedef void  (*ppd_imul)(float*,const float*);
26 typedef float (*ppd_norm)(float*);
27 
28 // define vector size in function of its logarithm in base 2
29 static const int pd_log2dim[7] = {
30 	1,2,4,8,16,32,64
31 };
32 
33 // define uniform distributions of given size
34 static const float pd_uniform1[1] = {
35 	1.
36 };
37 static const float pd_uniform2[2] = {
38 	1./2., 1./2.
39 };
40 static const float pd_uniform4[4] = {
41 	1./4., 1./4.,1./4., 1./4.
42 };
43 static const float pd_uniform8[8] = {
44 	1./8., 1./8.,1./8., 1./8.,1./8., 1./8.,1./8., 1./8.
45 };
46 static const float pd_uniform16[16] = {
47 	1./16., 1./16., 1./16., 1./16.,1./16., 1./16.,1./16., 1./16.,
48 	1./16., 1./16., 1./16., 1./16.,1./16., 1./16.,1./16., 1./16.
49 };
50 static const float pd_uniform32[32] = {
51 	1./32., 1./32., 1./32., 1./32.,1./32., 1./32.,1./32., 1./32.,
52 	1./32., 1./32., 1./32., 1./32.,1./32., 1./32.,1./32., 1./32.,
53 	1./32., 1./32., 1./32., 1./32.,1./32., 1./32.,1./32., 1./32.,
54 	1./32., 1./32., 1./32., 1./32.,1./32., 1./32.,1./32., 1./32.
55 };
56 static const float pd_uniform64[64] = {
57 	1./64., 1./64., 1./64., 1./64.,1./64., 1./64.,1./64., 1./64.,
58 	1./64., 1./64., 1./64., 1./64.,1./64., 1./64.,1./64., 1./64.,
59 	1./64., 1./64., 1./64., 1./64.,1./64., 1./64.,1./64., 1./64.,
60 	1./64., 1./64., 1./64., 1./64.,1./64., 1./64.,1./64., 1./64.,
61 	1./64., 1./64., 1./64., 1./64.,1./64., 1./64.,1./64., 1./64.,
62 	1./64., 1./64., 1./64., 1./64.,1./64., 1./64.,1./64., 1./64.,
63 	1./64., 1./64., 1./64., 1./64.,1./64., 1./64.,1./64., 1./64.,
64 	1./64., 1./64., 1./64., 1./64.,1./64., 1./64.,1./64., 1./64.
65 
66 };
67 
68 static const ppd_uniform pd_uniform_tab[7] = {
69 	pd_uniform1,
70 	pd_uniform2,
71 	pd_uniform4,
72 	pd_uniform8,
73 	pd_uniform16,
74 	pd_uniform32,
75 	pd_uniform64
76 };
77 
78 // returns a pointer to the uniform distribution of the given logsize
pd_uniform(int nlogdim)79 const float *pd_uniform(int nlogdim)
80 {
81 	return pd_uniform_tab[nlogdim];
82 }
83 
84 // in-place multiplication functions
85 // compute dst = dst*src for any element of the distrib
86 
pd_imul1(float * dst,const float * src)87 static void pd_imul1(float *dst, const float *src)
88 {
89 		dst[0] *= src[0];
90 }
91 
pd_imul2(float * dst,const float * src)92 static void pd_imul2(float *dst, const float *src)
93 {
94 		dst[0] *= src[0]; dst[1] *= src[1];
95 }
pd_imul4(float * dst,const float * src)96 static void pd_imul4(float *dst, const float *src)
97 {
98 		dst[0] *= src[0]; dst[1] *= src[1];
99 		dst[2] *= src[2]; dst[3] *= src[3];
100 }
pd_imul8(float * dst,const float * src)101 static void pd_imul8(float *dst, const float *src)
102 {
103 		dst[0] *= src[0]; dst[1] *= src[1]; dst[2] *= src[2]; dst[3] *= src[3];
104 		dst[4] *= src[4]; dst[5] *= src[5]; dst[6] *= src[6]; dst[7] *= src[7];
105 }
pd_imul16(float * dst,const float * src)106 static void pd_imul16(float *dst, const float *src)
107 {
108 		dst[0] *= src[0];  dst[1] *= src[1];  dst[2] *= src[2];  dst[3] *= src[3];
109 		dst[4] *= src[4];  dst[5] *= src[5];  dst[6] *= src[6];  dst[7] *= src[7];
110 		dst[8] *= src[8];  dst[9] *= src[9];  dst[10]*= src[10]; dst[11]*= src[11];
111 		dst[12]*= src[12]; dst[13]*= src[13]; dst[14]*= src[14]; dst[15]*= src[15];
112 }
pd_imul32(float * dst,const float * src)113 static void pd_imul32(float *dst, const float *src)
114 {
115 	pd_imul16(dst,src);
116 	pd_imul16(dst+16,src+16);
117 }
pd_imul64(float * dst,const float * src)118 static void pd_imul64(float *dst, const float *src)
119 {
120 	pd_imul16(dst, src);
121 	pd_imul16(dst+16, src+16);
122 	pd_imul16(dst+32, src+32);
123 	pd_imul16(dst+48, src+48);
124 }
125 
126 static const ppd_imul pd_imul_tab[7] = {
127 	pd_imul1,
128 	pd_imul2,
129 	pd_imul4,
130 	pd_imul8,
131 	pd_imul16,
132 	pd_imul32,
133 	pd_imul64
134 };
135 
136 // in place multiplication
137 // compute dst = dst*src for any element of the distrib give their log2 size
138 // arguments must be pointers to array of floats of the given size
pd_imul(float * dst,const float * src,int nlogdim)139 void pd_imul(float *dst, const float *src, int nlogdim)
140 {
141 	pd_imul_tab[nlogdim](dst,src);
142 }
143 
pd_norm1(float * ppd)144 static float pd_norm1(float *ppd)
145 {
146 	float t = ppd[0];
147 	ppd[0] = 1.f;
148 	return t;
149 }
150 
pd_norm2(float * ppd)151 static float pd_norm2(float *ppd)
152 {
153 	float t,to;
154 
155 	t =ppd[0]; t +=ppd[1];
156 
157 	if (t<=0) {
158 		pd_init(ppd,pd_uniform(1),pd_log2dim[1]);
159 		return t;
160 		}
161 
162 	to = t;
163 	t = 1.f/t;
164 	ppd[0] *=t; ppd[1] *=t;
165 	return to;
166 
167 }
168 
pd_norm4(float * ppd)169 static float pd_norm4(float *ppd)
170 {
171 	float t,to;
172 
173 	t =ppd[0]; t +=ppd[1]; t +=ppd[2]; t +=ppd[3];
174 
175 	if (t<=0) {
176 		pd_init(ppd,pd_uniform(2),pd_log2dim[2]);
177 		return t;
178 		}
179 
180 	to = t;
181 	t = 1.f/t;
182 	ppd[0] *=t; ppd[1] *=t; ppd[2] *=t; ppd[3] *=t;
183 	return to;
184 }
185 
pd_norm8(float * ppd)186 static float pd_norm8(float *ppd)
187 {
188 	float t,to;
189 
190 	t  =ppd[0]; t +=ppd[1]; t +=ppd[2]; t +=ppd[3];
191 	t +=ppd[4]; t +=ppd[5]; t +=ppd[6]; t +=ppd[7];
192 
193 	if (t<=0) {
194 		pd_init(ppd,pd_uniform(3),pd_log2dim[3]);
195 		return t;
196 		}
197 
198 	to = t;
199 	t = 1.f/t;
200 	ppd[0] *=t; ppd[1] *=t; ppd[2] *=t; ppd[3] *=t;
201 	ppd[4] *=t; ppd[5] *=t; ppd[6] *=t; ppd[7] *=t;
202 	return to;
203 }
pd_norm16(float * ppd)204 static float pd_norm16(float *ppd)
205 {
206 	float t,to;
207 
208 	t  =ppd[0];  t +=ppd[1];  t +=ppd[2];  t +=ppd[3];
209 	t +=ppd[4];  t +=ppd[5];  t +=ppd[6];  t +=ppd[7];
210 	t +=ppd[8];  t +=ppd[9];  t +=ppd[10]; t +=ppd[11];
211 	t +=ppd[12]; t +=ppd[13]; t +=ppd[14]; t +=ppd[15];
212 
213 	if (t<=0) {
214 		pd_init(ppd,pd_uniform(4),pd_log2dim[4]);
215 		return t;
216 		}
217 
218 	to = t;
219 	t = 1.f/t;
220 	ppd[0]  *=t; ppd[1]  *=t; ppd[2]  *=t; ppd[3]  *=t;
221 	ppd[4]  *=t; ppd[5]  *=t; ppd[6]  *=t; ppd[7]  *=t;
222 	ppd[8]  *=t; ppd[9]  *=t; ppd[10] *=t; ppd[11] *=t;
223 	ppd[12] *=t; ppd[13] *=t; ppd[14] *=t; ppd[15] *=t;
224 
225 	return to;
226 }
pd_norm32(float * ppd)227 static float pd_norm32(float *ppd)
228 {
229 	float t,to;
230 
231 	t  =ppd[0];  t +=ppd[1];  t +=ppd[2];  t +=ppd[3];
232 	t +=ppd[4];  t +=ppd[5];  t +=ppd[6];  t +=ppd[7];
233 	t +=ppd[8];  t +=ppd[9];  t +=ppd[10]; t +=ppd[11];
234 	t +=ppd[12]; t +=ppd[13]; t +=ppd[14]; t +=ppd[15];
235 	t +=ppd[16]; t +=ppd[17]; t +=ppd[18]; t +=ppd[19];
236 	t +=ppd[20]; t +=ppd[21]; t +=ppd[22]; t +=ppd[23];
237 	t +=ppd[24]; t +=ppd[25]; t +=ppd[26]; t +=ppd[27];
238 	t +=ppd[28]; t +=ppd[29]; t +=ppd[30]; t +=ppd[31];
239 
240 	if (t<=0) {
241 		pd_init(ppd,pd_uniform(5),pd_log2dim[5]);
242 		return t;
243 		}
244 
245 	to = t;
246 	t = 1.f/t;
247 	ppd[0]  *=t; ppd[1]  *=t; ppd[2]  *=t; ppd[3]  *=t;
248 	ppd[4]  *=t; ppd[5]  *=t; ppd[6]  *=t; ppd[7]  *=t;
249 	ppd[8]  *=t; ppd[9]  *=t; ppd[10] *=t; ppd[11] *=t;
250 	ppd[12] *=t; ppd[13] *=t; ppd[14] *=t; ppd[15] *=t;
251 	ppd[16] *=t; ppd[17] *=t; ppd[18] *=t; ppd[19] *=t;
252 	ppd[20] *=t; ppd[21] *=t; ppd[22] *=t; ppd[23] *=t;
253 	ppd[24] *=t; ppd[25] *=t; ppd[26] *=t; ppd[27] *=t;
254 	ppd[28] *=t; ppd[29] *=t; ppd[30] *=t; ppd[31] *=t;
255 
256 	return to;
257 }
258 
pd_norm64(float * ppd)259 static float pd_norm64(float *ppd)
260 {
261 	float t,to;
262 
263 	t  =ppd[0];  t +=ppd[1];  t +=ppd[2];  t +=ppd[3];
264 	t +=ppd[4];  t +=ppd[5];  t +=ppd[6];  t +=ppd[7];
265 	t +=ppd[8];  t +=ppd[9];  t +=ppd[10]; t +=ppd[11];
266 	t +=ppd[12]; t +=ppd[13]; t +=ppd[14]; t +=ppd[15];
267 	t +=ppd[16]; t +=ppd[17]; t +=ppd[18]; t +=ppd[19];
268 	t +=ppd[20]; t +=ppd[21]; t +=ppd[22]; t +=ppd[23];
269 	t +=ppd[24]; t +=ppd[25]; t +=ppd[26]; t +=ppd[27];
270 	t +=ppd[28]; t +=ppd[29]; t +=ppd[30]; t +=ppd[31];
271 
272 	t +=ppd[32]; t +=ppd[33]; t +=ppd[34]; t +=ppd[35];
273 	t +=ppd[36]; t +=ppd[37]; t +=ppd[38]; t +=ppd[39];
274 	t +=ppd[40]; t +=ppd[41]; t +=ppd[42]; t +=ppd[43];
275 	t +=ppd[44]; t +=ppd[45]; t +=ppd[46]; t +=ppd[47];
276 	t +=ppd[48]; t +=ppd[49]; t +=ppd[50]; t +=ppd[51];
277 	t +=ppd[52]; t +=ppd[53]; t +=ppd[54]; t +=ppd[55];
278 	t +=ppd[56]; t +=ppd[57]; t +=ppd[58]; t +=ppd[59];
279 	t +=ppd[60]; t +=ppd[61]; t +=ppd[62]; t +=ppd[63];
280 
281 	if (t<=0) {
282 		pd_init(ppd,pd_uniform(6),pd_log2dim[6]);
283 		return t;
284 		}
285 
286 	to = t;
287 	t = 1.0f/t;
288 	ppd[0]  *=t; ppd[1]  *=t; ppd[2]  *=t; ppd[3]  *=t;
289 	ppd[4]  *=t; ppd[5]  *=t; ppd[6]  *=t; ppd[7]  *=t;
290 	ppd[8]  *=t; ppd[9]  *=t; ppd[10] *=t; ppd[11] *=t;
291 	ppd[12] *=t; ppd[13] *=t; ppd[14] *=t; ppd[15] *=t;
292 	ppd[16] *=t; ppd[17] *=t; ppd[18] *=t; ppd[19] *=t;
293 	ppd[20] *=t; ppd[21] *=t; ppd[22] *=t; ppd[23] *=t;
294 	ppd[24] *=t; ppd[25] *=t; ppd[26] *=t; ppd[27] *=t;
295 	ppd[28] *=t; ppd[29] *=t; ppd[30] *=t; ppd[31] *=t;
296 
297 	ppd[32] *=t; ppd[33] *=t; ppd[34] *=t; ppd[35] *=t;
298 	ppd[36] *=t; ppd[37] *=t; ppd[38] *=t; ppd[39] *=t;
299 	ppd[40] *=t; ppd[41] *=t; ppd[42] *=t; ppd[43] *=t;
300 	ppd[44] *=t; ppd[45] *=t; ppd[46] *=t; ppd[47] *=t;
301 	ppd[48] *=t; ppd[49] *=t; ppd[50] *=t; ppd[51] *=t;
302 	ppd[52] *=t; ppd[53] *=t; ppd[54] *=t; ppd[55] *=t;
303 	ppd[56] *=t; ppd[57] *=t; ppd[58] *=t; ppd[59] *=t;
304 	ppd[60] *=t; ppd[61] *=t; ppd[62] *=t; ppd[63] *=t;
305 
306 	return to;
307 }
308 
309 
310 static const ppd_norm pd_norm_tab[7] = {
311 	pd_norm1,
312 	pd_norm2,
313 	pd_norm4,
314 	pd_norm8,
315 	pd_norm16,
316 	pd_norm32,
317 	pd_norm64
318 };
319 
pd_norm(float * pd,int nlogdim)320 float pd_norm(float *pd, int nlogdim)
321 {
322 	return pd_norm_tab[nlogdim](pd);
323 }
324 
pd_memset(float * dst,const float * src,int ndim,int nitems)325 void pd_memset(float *dst, const float *src, int ndim, int nitems)
326 {
327 	int size = PD_SIZE(ndim);
328 	while(nitems--) {
329 		memcpy(dst,src,size);
330 		dst +=ndim;
331 		}
332 }
333 
pd_fwdperm(float * dst,float * src,const int * perm,int ndim)334 void  pd_fwdperm(float *dst, float *src, const  int *perm, int ndim)
335 {
336 	// TODO: non-loop implementation
337 	while (ndim--)
338 		dst[ndim] = src[perm[ndim]];
339 }
340 
pd_bwdperm(float * dst,float * src,const int * perm,int ndim)341 void  pd_bwdperm(float *dst, float *src, const  int *perm, int ndim)
342 {
343 	// TODO: non-loop implementation
344 	while (ndim--)
345 		dst[perm[ndim]] = src[ndim];
346 }
347 
pd_max(float * src,int ndim)348 float pd_max(float *src, int ndim)
349 {
350 	// TODO: faster implementation
351 
352 	float cmax=0;  // we assume that prob distributions are always positive
353 	float cval;
354 
355 	while (ndim--) {
356 		cval = src[ndim];
357 		if (cval>=cmax) {
358 			cmax = cval;
359 			}
360 		}
361 
362 	return cmax;
363 }
364 
pd_argmax(float * pmax,float * src,int ndim)365 int pd_argmax(float *pmax, float *src, int ndim)
366 {
367 	// TODO: faster implementation
368 
369 	float cmax=0;  // we assume that prob distributions are always positive
370 	float cval;
371 	int idxmax=-1; // indicates that all pd elements are <0
372 
373 	while (ndim--) {
374 		cval = src[ndim];
375 		if (cval>=cmax) {
376 			cmax = cval;
377 			idxmax = ndim;
378 			}
379 		}
380 
381 	if (pmax)
382 		*pmax = cmax;
383 
384 	return idxmax;
385 }
386