1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file dnafunc1.c
29 * <pre>
30 *
31 * Rearrangements
32 * l_int32 *l_dnaJoin()
33 * l_int32 *l_dnaaFlattenToDna()
34 *
35 * Conversion between numa and dna
36 * NUMA *l_dnaConvertToNuma()
37 * L_DNA *numaConvertToDna()
38 *
39 * Set operations using aset (rbtree)
40 * L_DNA *l_dnaUnionByAset()
41 * L_DNA *l_dnaRemoveDupsByAset()
42 * L_DNA *l_dnaIntersectionByAset()
43 * L_ASET *l_asetCreateFromDna()
44 *
45 * Miscellaneous operations
46 * L_DNA *l_dnaDiffAdjValues()
47 *
48 *
49 * This file contains an implemention on sets of doubles (or integers)
50 * that uses an underlying tree (rbtree). The keys stored in the tree
51 * are simply the double array values in the dna. Use of a DnaHash
52 * is typically more efficient, with O(1) in lookup and insertion.
53 *
54 * </pre>
55 */
56
57 #include "allheaders.h"
58
59 /*----------------------------------------------------------------------*
60 * Rearrangements *
61 *----------------------------------------------------------------------*/
62 /*!
63 * \brief l_dnaJoin()
64 *
65 * \param[in] dad dest dna; add to this one
66 * \param[in] das [optional] source dna; add from this one
67 * \param[in] istart starting index in das
68 * \param[in] iend ending index in das; use -1 to cat all
69 * \return 0 if OK, 1 on error
70 *
71 * <pre>
72 * Notes:
73 * (1) istart < 0 is taken to mean 'read from the start' (istart = 0)
74 * (2) iend < 0 means 'read to the end'
75 * (3) if das == NULL, this is a no-op
76 * </pre>
77 */
78 l_int32
l_dnaJoin(L_DNA * dad,L_DNA * das,l_int32 istart,l_int32 iend)79 l_dnaJoin(L_DNA *dad,
80 L_DNA *das,
81 l_int32 istart,
82 l_int32 iend)
83 {
84 l_int32 n, i;
85 l_float64 val;
86
87 PROCNAME("l_dnaJoin");
88
89 if (!dad)
90 return ERROR_INT("dad not defined", procName, 1);
91 if (!das)
92 return 0;
93
94 if (istart < 0)
95 istart = 0;
96 n = l_dnaGetCount(das);
97 if (iend < 0 || iend >= n)
98 iend = n - 1;
99 if (istart > iend)
100 return ERROR_INT("istart > iend; nothing to add", procName, 1);
101
102 for (i = istart; i <= iend; i++) {
103 l_dnaGetDValue(das, i, &val);
104 l_dnaAddNumber(dad, val);
105 }
106
107 return 0;
108 }
109
110
111 /*!
112 * \brief l_dnaaFlattenToDna()
113 *
114 * \param[in] daa
115 * \return dad, or NULL on error
116 *
117 * <pre>
118 * Notes:
119 * (1) This 'flattens' the dnaa to a dna, by joining successively
120 * each dna in the dnaa.
121 * (2) It leaves the input dnaa unchanged.
122 * </pre>
123 */
124 L_DNA *
l_dnaaFlattenToDna(L_DNAA * daa)125 l_dnaaFlattenToDna(L_DNAA *daa)
126 {
127 l_int32 i, nalloc;
128 L_DNA *da, *dad;
129 L_DNA **array;
130
131 PROCNAME("l_dnaaFlattenToDna");
132
133 if (!daa)
134 return (L_DNA *)ERROR_PTR("daa not defined", procName, NULL);
135
136 nalloc = daa->nalloc;
137 array = daa->dna;
138 dad = l_dnaCreate(0);
139 for (i = 0; i < nalloc; i++) {
140 da = array[i];
141 if (!da) continue;
142 l_dnaJoin(dad, da, 0, -1);
143 }
144
145 return dad;
146 }
147
148
149 /*----------------------------------------------------------------------*
150 * Conversion between numa and dna *
151 *----------------------------------------------------------------------*/
152 /*!
153 * \brief l_dnaConvertToNuma()
154 *
155 * \param[in] da
156 * \return na, or NULL on error
157 */
158 NUMA *
l_dnaConvertToNuma(L_DNA * da)159 l_dnaConvertToNuma(L_DNA *da)
160 {
161 l_int32 i, n;
162 l_float64 val;
163 NUMA *na;
164
165 PROCNAME("l_dnaConvertToNuma");
166
167 if (!da)
168 return (NUMA *)ERROR_PTR("da not defined", procName, NULL);
169
170 n = l_dnaGetCount(da);
171 na = numaCreate(n);
172 for (i = 0; i < n; i++) {
173 l_dnaGetDValue(da, i, &val);
174 numaAddNumber(na, val);
175 }
176 return na;
177 }
178
179
180 /*!
181 * \brief numaConvertToDna
182 *
183 * \param[in] na
184 * \return da, or NULL on error
185 */
186 L_DNA *
numaConvertToDna(NUMA * na)187 numaConvertToDna(NUMA *na)
188 {
189 l_int32 i, n;
190 l_float32 val;
191 L_DNA *da;
192
193 PROCNAME("numaConvertToDna");
194
195 if (!na)
196 return (L_DNA *)ERROR_PTR("na not defined", procName, NULL);
197
198 n = numaGetCount(na);
199 da = l_dnaCreate(n);
200 for (i = 0; i < n; i++) {
201 numaGetFValue(na, i, &val);
202 l_dnaAddNumber(da, val);
203 }
204 return da;
205 }
206
207
208 /*----------------------------------------------------------------------*
209 * Set operations using aset (rbtree) *
210 *----------------------------------------------------------------------*/
211 /*!
212 * \brief l_dnaUnionByAset()
213 *
214 * \param[in] da1, da2
215 * \return dad with the union of the set of numbers, or NULL on error
216 *
217 * <pre>
218 * Notes:
219 * (1) See sarrayUnionByAset() for the approach.
220 * (2) Here, the key in building the sorted tree is the number itself.
221 * (3) Operations using an underlying tree are O(nlogn), which is
222 * typically less efficient than hashing, which is O(n).
223 * </pre>
224 */
225 L_DNA *
l_dnaUnionByAset(L_DNA * da1,L_DNA * da2)226 l_dnaUnionByAset(L_DNA *da1,
227 L_DNA *da2)
228 {
229 L_DNA *da3, *dad;
230
231 PROCNAME("l_dnaUnionByAset");
232
233 if (!da1)
234 return (L_DNA *)ERROR_PTR("da1 not defined", procName, NULL);
235 if (!da2)
236 return (L_DNA *)ERROR_PTR("da2 not defined", procName, NULL);
237
238 /* Join */
239 da3 = l_dnaCopy(da1);
240 l_dnaJoin(da3, da2, 0, -1);
241
242 /* Eliminate duplicates */
243 dad = l_dnaRemoveDupsByAset(da3);
244 l_dnaDestroy(&da3);
245 return dad;
246 }
247
248
249 /*!
250 * \brief l_dnaRemoveDupsByAset()
251 *
252 * \param[in] das
253 * \return dad with duplicates removed, or NULL on error
254 */
255 L_DNA *
l_dnaRemoveDupsByAset(L_DNA * das)256 l_dnaRemoveDupsByAset(L_DNA *das)
257 {
258 l_int32 i, n;
259 l_float64 val;
260 L_DNA *dad;
261 L_ASET *set;
262 RB_TYPE key;
263
264 PROCNAME("l_dnaRemoveDupsByAset");
265
266 if (!das)
267 return (L_DNA *)ERROR_PTR("das not defined", procName, NULL);
268
269 set = l_asetCreate(L_FLOAT_TYPE);
270 dad = l_dnaCreate(0);
271 n = l_dnaGetCount(das);
272 for (i = 0; i < n; i++) {
273 l_dnaGetDValue(das, i, &val);
274 key.ftype = val;
275 if (!l_asetFind(set, key)) {
276 l_dnaAddNumber(dad, val);
277 l_asetInsert(set, key);
278 }
279 }
280
281 l_asetDestroy(&set);
282 return dad;
283 }
284
285
286 /*!
287 * \brief l_dnaIntersectionByAset()
288 *
289 * \param[in] da1, da2
290 * \return dad with the intersection of the two arrays, or NULL on error
291 *
292 * <pre>
293 * Notes:
294 * (1) See sarrayIntersection() for the approach.
295 * (2) Here, the key in building the sorted tree is the number itself.
296 * (3) Operations using an underlying tree are O(nlogn), which is
297 * typically less efficient than hashing, which is O(n).
298 * </pre>
299 */
300 L_DNA *
l_dnaIntersectionByAset(L_DNA * da1,L_DNA * da2)301 l_dnaIntersectionByAset(L_DNA *da1,
302 L_DNA *da2)
303 {
304 l_int32 n1, n2, i, n;
305 l_float64 val;
306 L_ASET *set1, *set2;
307 RB_TYPE key;
308 L_DNA *da_small, *da_big, *dad;
309
310 PROCNAME("l_dnaIntersectionByAset");
311
312 if (!da1)
313 return (L_DNA *)ERROR_PTR("da1 not defined", procName, NULL);
314 if (!da2)
315 return (L_DNA *)ERROR_PTR("da2 not defined", procName, NULL);
316
317 /* Put the elements of the largest array into a set */
318 n1 = l_dnaGetCount(da1);
319 n2 = l_dnaGetCount(da2);
320 da_small = (n1 < n2) ? da1 : da2; /* do not destroy da_small */
321 da_big = (n1 < n2) ? da2 : da1; /* do not destroy da_big */
322 set1 = l_asetCreateFromDna(da_big);
323
324 /* Build up the intersection of floats */
325 dad = l_dnaCreate(0);
326 n = l_dnaGetCount(da_small);
327 set2 = l_asetCreate(L_FLOAT_TYPE);
328 for (i = 0; i < n; i++) {
329 l_dnaGetDValue(da_small, i, &val);
330 key.ftype = val;
331 if (l_asetFind(set1, key) && !l_asetFind(set2, key)) {
332 l_dnaAddNumber(dad, val);
333 l_asetInsert(set2, key);
334 }
335 }
336
337 l_asetDestroy(&set1);
338 l_asetDestroy(&set2);
339 return dad;
340 }
341
342
343 /*!
344 * \brief l_asetCreateFromDna()
345 *
346 * \param[in] da source dna
347 * \return set using the doubles in %da as keys
348 */
349 L_ASET *
l_asetCreateFromDna(L_DNA * da)350 l_asetCreateFromDna(L_DNA *da)
351 {
352 l_int32 i, n;
353 l_float64 val;
354 L_ASET *set;
355 RB_TYPE key;
356
357 PROCNAME("l_asetCreateFromDna");
358
359 if (!da)
360 return (L_ASET *)ERROR_PTR("da not defined", procName, NULL);
361
362 set = l_asetCreate(L_FLOAT_TYPE);
363 n = l_dnaGetCount(da);
364 for (i = 0; i < n; i++) {
365 l_dnaGetDValue(da, i, &val);
366 key.ftype = val;
367 l_asetInsert(set, key);
368 }
369
370 return set;
371 }
372
373
374 /*----------------------------------------------------------------------*
375 * Miscellaneous operations *
376 *----------------------------------------------------------------------*/
377 /*!
378 * \brief l_dnaDiffAdjValues()
379 *
380 * \param[in] das input l_dna
381 * \return dad of difference values val[i+1] - val[i],
382 * or NULL on error
383 */
384 L_DNA *
l_dnaDiffAdjValues(L_DNA * das)385 l_dnaDiffAdjValues(L_DNA *das)
386 {
387 l_int32 i, n, prev, cur;
388 L_DNA *dad;
389
390 PROCNAME("l_dnaDiffAdjValues");
391
392 if (!das)
393 return (L_DNA *)ERROR_PTR("das not defined", procName, NULL);
394 n = l_dnaGetCount(das);
395 dad = l_dnaCreate(n - 1);
396 prev = 0;
397 for (i = 1; i < n; i++) {
398 l_dnaGetIValue(das, i, &cur);
399 l_dnaAddNumber(dad, cur - prev);
400 prev = cur;
401 }
402 return dad;
403 }
404
405