1 /*====================================================================*
2  -  Copyright (C) 2001 Leptonica.  All rights reserved.
3  -
4  -  Redistribution and use in source and binary forms, with or without
5  -  modification, are permitted provided that the following conditions
6  -  are met:
7  -  1. Redistributions of source code must retain the above copyright
8  -     notice, this list of conditions and the following disclaimer.
9  -  2. Redistributions in binary form must reproduce the above
10  -     copyright notice, this list of conditions and the following
11  -     disclaimer in the documentation and/or other materials
12  -     provided with the distribution.
13  -
14  -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
18  -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
27 /*!
28  * \file  dnafunc1.c
29  * <pre>
30  *
31  *      Rearrangements
32  *          l_int32     *l_dnaJoin()
33  *          l_int32     *l_dnaaFlattenToDna()
34  *
35  *      Conversion between numa and dna
36  *          NUMA        *l_dnaConvertToNuma()
37  *          L_DNA       *numaConvertToDna()
38  *
39  *      Set operations using aset (rbtree)
40  *          L_DNA       *l_dnaUnionByAset()
41  *          L_DNA       *l_dnaRemoveDupsByAset()
42  *          L_DNA       *l_dnaIntersectionByAset()
43  *          L_ASET      *l_asetCreateFromDna()
44  *
45  *      Miscellaneous operations
46  *          L_DNA       *l_dnaDiffAdjValues()
47  *
48  *
49  * This file contains an implemention on sets of doubles (or integers)
50  * that uses an underlying tree (rbtree).  The keys stored in the tree
51  * are simply the double array values in the dna.  Use of a DnaHash
52  * is typically more efficient, with O(1) in lookup and insertion.
53  *
54  * </pre>
55  */
56 
57 #include "allheaders.h"
58 
59 /*----------------------------------------------------------------------*
60  *                            Rearrangements                            *
61  *----------------------------------------------------------------------*/
62 /*!
63  * \brief   l_dnaJoin()
64  *
65  * \param[in]    dad  dest dna; add to this one
66  * \param[in]    das  [optional] source dna; add from this one
67  * \param[in]    istart  starting index in das
68  * \param[in]    iend  ending index in das; use -1 to cat all
69  * \return  0 if OK, 1 on error
70  *
71  * <pre>
72  * Notes:
73  *      (1) istart < 0 is taken to mean 'read from the start' (istart = 0)
74  *      (2) iend < 0 means 'read to the end'
75  *      (3) if das == NULL, this is a no-op
76  * </pre>
77  */
78 l_int32
l_dnaJoin(L_DNA * dad,L_DNA * das,l_int32 istart,l_int32 iend)79 l_dnaJoin(L_DNA   *dad,
80           L_DNA   *das,
81           l_int32  istart,
82           l_int32  iend)
83 {
84 l_int32    n, i;
85 l_float64  val;
86 
87     PROCNAME("l_dnaJoin");
88 
89     if (!dad)
90         return ERROR_INT("dad not defined", procName, 1);
91     if (!das)
92         return 0;
93 
94     if (istart < 0)
95         istart = 0;
96     n = l_dnaGetCount(das);
97     if (iend < 0 || iend >= n)
98         iend = n - 1;
99     if (istart > iend)
100         return ERROR_INT("istart > iend; nothing to add", procName, 1);
101 
102     for (i = istart; i <= iend; i++) {
103         l_dnaGetDValue(das, i, &val);
104         l_dnaAddNumber(dad, val);
105     }
106 
107     return 0;
108 }
109 
110 
111 /*!
112  * \brief   l_dnaaFlattenToDna()
113  *
114  * \param[in]    daa
115  * \return  dad, or NULL on error
116  *
117  * <pre>
118  * Notes:
119  *      (1) This 'flattens' the dnaa to a dna, by joining successively
120  *          each dna in the dnaa.
121  *      (2) It leaves the input dnaa unchanged.
122  * </pre>
123  */
124 L_DNA *
l_dnaaFlattenToDna(L_DNAA * daa)125 l_dnaaFlattenToDna(L_DNAA  *daa)
126 {
127 l_int32  i, nalloc;
128 L_DNA   *da, *dad;
129 L_DNA  **array;
130 
131     PROCNAME("l_dnaaFlattenToDna");
132 
133     if (!daa)
134         return (L_DNA *)ERROR_PTR("daa not defined", procName, NULL);
135 
136     nalloc = daa->nalloc;
137     array = daa->dna;
138     dad = l_dnaCreate(0);
139     for (i = 0; i < nalloc; i++) {
140         da = array[i];
141         if (!da) continue;
142         l_dnaJoin(dad, da, 0, -1);
143     }
144 
145     return dad;
146 }
147 
148 
149 /*----------------------------------------------------------------------*
150  *                   Conversion between numa and dna                    *
151  *----------------------------------------------------------------------*/
152 /*!
153  * \brief   l_dnaConvertToNuma()
154  *
155  * \param[in]    da
156  * \return  na, or NULL on error
157  */
158 NUMA *
l_dnaConvertToNuma(L_DNA * da)159 l_dnaConvertToNuma(L_DNA  *da)
160 {
161 l_int32    i, n;
162 l_float64  val;
163 NUMA      *na;
164 
165     PROCNAME("l_dnaConvertToNuma");
166 
167     if (!da)
168         return (NUMA *)ERROR_PTR("da not defined", procName, NULL);
169 
170     n = l_dnaGetCount(da);
171     na = numaCreate(n);
172     for (i = 0; i < n; i++) {
173         l_dnaGetDValue(da, i, &val);
174         numaAddNumber(na, val);
175     }
176     return na;
177 }
178 
179 
180 /*!
181  * \brief   numaConvertToDna
182  *
183  * \param[in]    na
184  * \return  da, or NULL on error
185  */
186 L_DNA *
numaConvertToDna(NUMA * na)187 numaConvertToDna(NUMA  *na)
188 {
189 l_int32    i, n;
190 l_float32  val;
191 L_DNA     *da;
192 
193     PROCNAME("numaConvertToDna");
194 
195     if (!na)
196         return (L_DNA *)ERROR_PTR("na not defined", procName, NULL);
197 
198     n = numaGetCount(na);
199     da = l_dnaCreate(n);
200     for (i = 0; i < n; i++) {
201         numaGetFValue(na, i, &val);
202         l_dnaAddNumber(da, val);
203     }
204     return da;
205 }
206 
207 
208 /*----------------------------------------------------------------------*
209  *                   Set operations using aset (rbtree)                 *
210  *----------------------------------------------------------------------*/
211 /*!
212  * \brief   l_dnaUnionByAset()
213  *
214  * \param[in]    da1, da2
215  * \return  dad with the union of the set of numbers, or NULL on error
216  *
217  * <pre>
218  * Notes:
219  *      (1) See sarrayUnionByAset() for the approach.
220  *      (2) Here, the key in building the sorted tree is the number itself.
221  *      (3) Operations using an underlying tree are O(nlogn), which is
222  *          typically less efficient than hashing, which is O(n).
223  * </pre>
224  */
225 L_DNA *
l_dnaUnionByAset(L_DNA * da1,L_DNA * da2)226 l_dnaUnionByAset(L_DNA  *da1,
227                  L_DNA  *da2)
228 {
229 L_DNA  *da3, *dad;
230 
231     PROCNAME("l_dnaUnionByAset");
232 
233     if (!da1)
234         return (L_DNA *)ERROR_PTR("da1 not defined", procName, NULL);
235     if (!da2)
236         return (L_DNA *)ERROR_PTR("da2 not defined", procName, NULL);
237 
238         /* Join */
239     da3 = l_dnaCopy(da1);
240     l_dnaJoin(da3, da2, 0, -1);
241 
242         /* Eliminate duplicates */
243     dad = l_dnaRemoveDupsByAset(da3);
244     l_dnaDestroy(&da3);
245     return dad;
246 }
247 
248 
249 /*!
250  * \brief   l_dnaRemoveDupsByAset()
251  *
252  * \param[in]    das
253  * \return  dad with duplicates removed, or NULL on error
254  */
255 L_DNA *
l_dnaRemoveDupsByAset(L_DNA * das)256 l_dnaRemoveDupsByAset(L_DNA  *das)
257 {
258 l_int32    i, n;
259 l_float64  val;
260 L_DNA     *dad;
261 L_ASET    *set;
262 RB_TYPE    key;
263 
264     PROCNAME("l_dnaRemoveDupsByAset");
265 
266     if (!das)
267         return (L_DNA *)ERROR_PTR("das not defined", procName, NULL);
268 
269     set = l_asetCreate(L_FLOAT_TYPE);
270     dad = l_dnaCreate(0);
271     n = l_dnaGetCount(das);
272     for (i = 0; i < n; i++) {
273         l_dnaGetDValue(das, i, &val);
274         key.ftype = val;
275         if (!l_asetFind(set, key)) {
276             l_dnaAddNumber(dad, val);
277             l_asetInsert(set, key);
278         }
279     }
280 
281     l_asetDestroy(&set);
282     return dad;
283 }
284 
285 
286 /*!
287  * \brief   l_dnaIntersectionByAset()
288  *
289  * \param[in]    da1, da2
290  * \return  dad with the intersection of the two arrays, or NULL on error
291  *
292  * <pre>
293  * Notes:
294  *      (1) See sarrayIntersection() for the approach.
295  *      (2) Here, the key in building the sorted tree is the number itself.
296  *      (3) Operations using an underlying tree are O(nlogn), which is
297  *          typically less efficient than hashing, which is O(n).
298  * </pre>
299  */
300 L_DNA *
l_dnaIntersectionByAset(L_DNA * da1,L_DNA * da2)301 l_dnaIntersectionByAset(L_DNA  *da1,
302                         L_DNA  *da2)
303 {
304 l_int32    n1, n2, i, n;
305 l_float64  val;
306 L_ASET    *set1, *set2;
307 RB_TYPE    key;
308 L_DNA     *da_small, *da_big, *dad;
309 
310     PROCNAME("l_dnaIntersectionByAset");
311 
312     if (!da1)
313         return (L_DNA *)ERROR_PTR("da1 not defined", procName, NULL);
314     if (!da2)
315         return (L_DNA *)ERROR_PTR("da2 not defined", procName, NULL);
316 
317         /* Put the elements of the largest array into a set */
318     n1 = l_dnaGetCount(da1);
319     n2 = l_dnaGetCount(da2);
320     da_small = (n1 < n2) ? da1 : da2;   /* do not destroy da_small */
321     da_big = (n1 < n2) ? da2 : da1;   /* do not destroy da_big */
322     set1 = l_asetCreateFromDna(da_big);
323 
324         /* Build up the intersection of floats */
325     dad = l_dnaCreate(0);
326     n = l_dnaGetCount(da_small);
327     set2 = l_asetCreate(L_FLOAT_TYPE);
328     for (i = 0; i < n; i++) {
329         l_dnaGetDValue(da_small, i, &val);
330         key.ftype = val;
331         if (l_asetFind(set1, key) && !l_asetFind(set2, key)) {
332             l_dnaAddNumber(dad, val);
333             l_asetInsert(set2, key);
334         }
335     }
336 
337     l_asetDestroy(&set1);
338     l_asetDestroy(&set2);
339     return dad;
340 }
341 
342 
343 /*!
344  * \brief   l_asetCreateFromDna()
345  *
346  * \param[in]    da source dna
347  * \return  set using the doubles in %da as keys
348  */
349 L_ASET *
l_asetCreateFromDna(L_DNA * da)350 l_asetCreateFromDna(L_DNA  *da)
351 {
352 l_int32    i, n;
353 l_float64  val;
354 L_ASET    *set;
355 RB_TYPE    key;
356 
357     PROCNAME("l_asetCreateFromDna");
358 
359     if (!da)
360         return (L_ASET *)ERROR_PTR("da not defined", procName, NULL);
361 
362     set = l_asetCreate(L_FLOAT_TYPE);
363     n = l_dnaGetCount(da);
364     for (i = 0; i < n; i++) {
365         l_dnaGetDValue(da, i, &val);
366         key.ftype = val;
367         l_asetInsert(set, key);
368     }
369 
370     return set;
371 }
372 
373 
374 /*----------------------------------------------------------------------*
375  *                       Miscellaneous operations                       *
376  *----------------------------------------------------------------------*/
377 /*!
378  * \brief   l_dnaDiffAdjValues()
379  *
380  * \param[in]    das input l_dna
381  * \return  dad of difference values val[i+1] - val[i],
382  *                   or NULL on error
383  */
384 L_DNA *
l_dnaDiffAdjValues(L_DNA * das)385 l_dnaDiffAdjValues(L_DNA  *das)
386 {
387 l_int32  i, n, prev, cur;
388 L_DNA   *dad;
389 
390     PROCNAME("l_dnaDiffAdjValues");
391 
392     if (!das)
393         return (L_DNA *)ERROR_PTR("das not defined", procName, NULL);
394     n = l_dnaGetCount(das);
395     dad = l_dnaCreate(n - 1);
396     prev = 0;
397     for (i = 1; i < n; i++) {
398         l_dnaGetIValue(das, i, &cur);
399         l_dnaAddNumber(dad, cur - prev);
400         prev = cur;
401     }
402     return dad;
403 }
404 
405