1 /*====================================================================*
2  -  Copyright (C) 2001 Leptonica.  All rights reserved.
3  -
4  -  Redistribution and use in source and binary forms, with or without
5  -  modification, are permitted provided that the following conditions
6  -  are met:
7  -  1. Redistributions of source code must retain the above copyright
8  -     notice, this list of conditions and the following disclaimer.
9  -  2. Redistributions in binary form must reproduce the above
10  -     copyright notice, this list of conditions and the following
11  -     disclaimer in the documentation and/or other materials
12  -     provided with the distribution.
13  -
14  -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
18  -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
27 /*!
28  * \file  boxfunc2.c
29  * <pre>
30  *
31  *      Boxa/Box transform (shift, scale) and orthogonal rotation
32  *           BOXA            *boxaTransform()
33  *           BOX             *boxTransform()
34  *           BOXA            *boxaTransformOrdered()
35  *           BOX             *boxTransformOrdered()
36  *           BOXA            *boxaRotateOrth()
37  *           BOX             *boxRotateOrth()
38  *
39  *      Boxa sort
40  *           BOXA            *boxaSort()
41  *           BOXA            *boxaBinSort()
42  *           BOXA            *boxaSortByIndex()
43  *           BOXAA           *boxaSort2d()
44  *           BOXAA           *boxaSort2dByIndex()
45  *
46  *      Boxa statistics
47  *           l_int32          boxaGetRankVals()
48  *           l_int32          boxaGetMedianVals()
49  *           l_int32          boxaGetAverageSize()
50  *
51  *      Boxa array extraction
52  *           l_int32          boxaExtractAsNuma()
53  *           l_int32          boxaExtractAsPta()
54  *
55  *      Other Boxaa functions
56  *           l_int32          boxaaGetExtent()
57  *           BOXA            *boxaaFlattenToBoxa()
58  *           BOXA            *boxaaFlattenAligned()
59  *           BOXAA           *boxaEncapsulateAligned()
60  *           BOXAA           *boxaaTranspose()
61  *           l_int32          boxaaAlignBox()
62  * </pre>
63  */
64 
65 #include <math.h>
66 #include "allheaders.h"
67 
68     /* For more than this number of c.c. in a binarized image of
69      * semi-perimeter (w + h) about 5000 or less, the O(n) binsort
70      * is faster than the O(nlogn) shellsort.  */
71 static const l_int32   MIN_COMPS_FOR_BIN_SORT = 200;
72 
73 
74 /*---------------------------------------------------------------------*
75  *      Boxa/Box transform (shift, scale) and orthogonal rotation      *
76  *---------------------------------------------------------------------*/
77 /*!
78  * \brief   boxaTransform()
79  *
80  * \param[in]    boxas
81  * \param[in]    shiftx, shifty
82  * \param[in]    scalex, scaley
83  * \return  boxad, or NULL on error
84  *
85  * <pre>
86  * Notes:
87  *      (1) This is a very simple function that first shifts, then scales.
88  * </pre>
89  */
90 BOXA *
boxaTransform(BOXA * boxas,l_int32 shiftx,l_int32 shifty,l_float32 scalex,l_float32 scaley)91 boxaTransform(BOXA      *boxas,
92               l_int32    shiftx,
93               l_int32    shifty,
94               l_float32  scalex,
95               l_float32  scaley)
96 {
97 l_int32  i, n;
98 BOX     *boxs, *boxd;
99 BOXA    *boxad;
100 
101     PROCNAME("boxaTransform");
102 
103     if (!boxas)
104         return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
105     n = boxaGetCount(boxas);
106     if ((boxad = boxaCreate(n)) == NULL)
107         return (BOXA *)ERROR_PTR("boxad not made", procName, NULL);
108     for (i = 0; i < n; i++) {
109         if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) {
110             boxaDestroy(&boxad);
111             return (BOXA *)ERROR_PTR("boxs not found", procName, NULL);
112         }
113         boxd = boxTransform(boxs, shiftx, shifty, scalex, scaley);
114         boxDestroy(&boxs);
115         boxaAddBox(boxad, boxd, L_INSERT);
116     }
117 
118     return boxad;
119 }
120 
121 
122 /*!
123  * \brief   boxTransform()
124  *
125  * \param[in]    box
126  * \param[in]    shiftx, shifty
127  * \param[in]    scalex, scaley
128  * \return  boxd, or NULL on error
129  *
130  * <pre>
131  * Notes:
132  *      (1) This is a very simple function that first shifts, then scales.
133  *      (2) If the box is invalid, a new invalid box is returned.
134  * </pre>
135  */
136 BOX *
boxTransform(BOX * box,l_int32 shiftx,l_int32 shifty,l_float32 scalex,l_float32 scaley)137 boxTransform(BOX       *box,
138              l_int32    shiftx,
139              l_int32    shifty,
140              l_float32  scalex,
141              l_float32  scaley)
142 {
143     PROCNAME("boxTransform");
144 
145     if (!box)
146         return (BOX *)ERROR_PTR("box not defined", procName, NULL);
147     if (box->w <= 0 || box->h <= 0)
148         return boxCreate(0, 0, 0, 0);
149     else
150         return boxCreate((l_int32)(scalex * (box->x + shiftx) + 0.5),
151                          (l_int32)(scaley * (box->y + shifty) + 0.5),
152                          (l_int32)(L_MAX(1.0, scalex * box->w + 0.5)),
153                          (l_int32)(L_MAX(1.0, scaley * box->h + 0.5)));
154 }
155 
156 
157 /*!
158  * \brief   boxaTransformOrdered()
159  *
160  * \param[in]    boxas
161  * \param[in]    shiftx, shifty
162  * \param[in]    scalex, scaley
163  * \param[in]    xcen, ycen center of rotation
164  * \param[in]    angle in radians; clockwise is positive
165  * \param[in]    order one of 6 combinations: L_TR_SC_RO, ...
166  * \return  boxd, or NULL on error
167  *
168  * <pre>
169  * Notes:
170  *      (1) This allows a sequence of linear transforms on each box.
171  *          the transforms are from the affine set, composed of
172  *          shift, scaling and rotation, and the order of the
173  *          transforms is specified.
174  *      (2) Although these operations appear to be on an infinite
175  *          2D plane, in practice the region of interest is clipped
176  *          to a finite image.  The center of rotation is usually taken
177  *          with respect to the image (either the UL corner or the
178  *          center).  A translation can have two very different effects:
179  *            (a) Moves the boxes across the fixed image region.
180  *            (b) Moves the image origin, causing a change in the image
181  *                region and an opposite effective translation of the boxes.
182  *          This function should only be used for (a), where the image
183  *          region is fixed on translation.  If the image region is
184  *          changed by the translation, use instead the functions
185  *          in affinecompose.c, where the image region and rotation
186  *          center can be computed from the actual clipping due to
187  *          translation of the image origin.
188  *      (3) See boxTransformOrdered() for usage and implementation details.
189  * </pre>
190  */
191 BOXA *
boxaTransformOrdered(BOXA * boxas,l_int32 shiftx,l_int32 shifty,l_float32 scalex,l_float32 scaley,l_int32 xcen,l_int32 ycen,l_float32 angle,l_int32 order)192 boxaTransformOrdered(BOXA      *boxas,
193                      l_int32    shiftx,
194                      l_int32    shifty,
195                      l_float32  scalex,
196                      l_float32  scaley,
197                      l_int32    xcen,
198                      l_int32    ycen,
199                      l_float32  angle,
200                      l_int32    order)
201 {
202 l_int32  i, n;
203 BOX     *boxs, *boxd;
204 BOXA    *boxad;
205 
206     PROCNAME("boxaTransformOrdered");
207 
208     if (!boxas)
209         return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
210     n = boxaGetCount(boxas);
211     if ((boxad = boxaCreate(n)) == NULL)
212         return (BOXA *)ERROR_PTR("boxad not made", procName, NULL);
213     for (i = 0; i < n; i++) {
214         if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) {
215             boxaDestroy(&boxad);
216             return (BOXA *)ERROR_PTR("boxs not found", procName, NULL);
217         }
218         boxd = boxTransformOrdered(boxs, shiftx, shifty, scalex, scaley,
219                                    xcen, ycen, angle, order);
220         boxDestroy(&boxs);
221         boxaAddBox(boxad, boxd, L_INSERT);
222     }
223 
224     return boxad;
225 }
226 
227 
228 /*!
229  * \brief   boxTransformOrdered()
230  *
231  * \param[in]    boxs
232  * \param[in]    shiftx, shifty
233  * \param[in]    scalex, scaley
234  * \param[in]    xcen, ycen center of rotation
235  * \param[in]    angle in radians; clockwise is positive
236  * \param[in]    order one of 6 combinations: L_TR_SC_RO, ...
237  * \return  boxd, or NULL on error
238  *
239  * <pre>
240  * Notes:
241  *      (1) This allows a sequence of linear transforms, composed of
242  *          shift, scaling and rotation, where the order of the
243  *          transforms is specified.
244  *      (2) The rotation is taken about a point specified by (xcen, ycen).
245  *          Let the components of the vector from the center of rotation
246  *          to the box center be (xdif, ydif):
247  *            xdif = (bx + 0.5 * bw) - xcen
248  *            ydif = (by + 0.5 * bh) - ycen
249  *          Then the box center after rotation has new components:
250  *            bxcen = xcen + xdif * cosa + ydif * sina
251  *            bycen = ycen + ydif * cosa - xdif * sina
252  *          where cosa and sina are the cos and sin of the angle,
253  *          and the enclosing box for the rotated box has size:
254  *            rw = |bw * cosa| + |bh * sina|
255  *            rh = |bh * cosa| + |bw * sina|
256  *          where bw and bh are the unrotated width and height.
257  *          Then the box UL corner (rx, ry) is
258  *            rx = bxcen - 0.5 * rw
259  *            ry = bycen - 0.5 * rh
260  *      (3) The center of rotation specified by args %xcen and %ycen
261  *          is the point BEFORE any translation or scaling.  If the
262  *          rotation is not the first operation, this function finds
263  *          the actual center at the time of rotation.  It does this
264  *          by making the following assumptions:
265  *             (1) Any scaling is with respect to the UL corner, so
266  *                 that the center location scales accordingly.
267  *             (2) A translation does not affect the center of
268  *                 the image; it just moves the boxes.
269  *          We always use assumption (1).  However, assumption (2)
270  *          will be incorrect if the apparent translation is due
271  *          to a clipping operation that, in effect, moves the
272  *          origin of the image.  In that case, you should NOT use
273  *          these simple functions.  Instead, use the functions
274  *          in affinecompose.c, where the rotation center can be
275  *          computed from the actual clipping due to translation
276  *          of the image origin.
277  * </pre>
278  */
279 BOX *
boxTransformOrdered(BOX * boxs,l_int32 shiftx,l_int32 shifty,l_float32 scalex,l_float32 scaley,l_int32 xcen,l_int32 ycen,l_float32 angle,l_int32 order)280 boxTransformOrdered(BOX       *boxs,
281                     l_int32    shiftx,
282                     l_int32    shifty,
283                     l_float32  scalex,
284                     l_float32  scaley,
285                     l_int32    xcen,
286                     l_int32    ycen,
287                     l_float32  angle,
288                     l_int32    order)
289 {
290 l_int32    bx, by, bw, bh, tx, ty, tw, th;
291 l_int32    xcent, ycent;  /* transformed center of rotation due to scaling */
292 l_float32  sina, cosa, xdif, ydif, rx, ry, rw, rh;
293 BOX       *boxd;
294 
295     PROCNAME("boxTransformOrdered");
296 
297     if (!boxs)
298         return (BOX *)ERROR_PTR("boxs not defined", procName, NULL);
299     if (order != L_TR_SC_RO && order != L_SC_RO_TR && order != L_RO_TR_SC &&
300         order != L_TR_RO_SC && order != L_RO_SC_TR && order != L_SC_TR_RO)
301         return (BOX *)ERROR_PTR("order invalid", procName, NULL);
302 
303     boxGetGeometry(boxs, &bx, &by, &bw, &bh);
304     if (bw <= 0 || bh <= 0)  /* invalid */
305         return boxCreate(0, 0, 0, 0);
306     if (angle != 0.0) {
307         sina = sin(angle);
308         cosa = cos(angle);
309     }
310 
311     if (order == L_TR_SC_RO) {
312         tx = (l_int32)(scalex * (bx + shiftx) + 0.5);
313         ty = (l_int32)(scaley * (by + shifty) + 0.5);
314         tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5));
315         th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5));
316         xcent = (l_int32)(scalex * xcen + 0.5);
317         ycent = (l_int32)(scaley * ycen + 0.5);
318         if (angle == 0.0) {
319             boxd = boxCreate(tx, ty, tw, th);
320         } else {
321             xdif = tx + 0.5 * tw - xcent;
322             ydif = ty + 0.5 * th - ycent;
323             rw = L_ABS(tw * cosa) + L_ABS(th * sina);
324             rh = L_ABS(th * cosa) + L_ABS(tw * sina);
325             rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw;
326             ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh;
327             boxd = boxCreate((l_int32)rx, (l_int32)ry, (l_int32)rw,
328                              (l_int32)rh);
329         }
330     } else if (order == L_SC_TR_RO) {
331         tx = (l_int32)(scalex * bx + shiftx + 0.5);
332         ty = (l_int32)(scaley * by + shifty + 0.5);
333         tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5));
334         th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5));
335         xcent = (l_int32)(scalex * xcen + 0.5);
336         ycent = (l_int32)(scaley * ycen + 0.5);
337         if (angle == 0.0) {
338             boxd = boxCreate(tx, ty, tw, th);
339         } else {
340             xdif = tx + 0.5 * tw - xcent;
341             ydif = ty + 0.5 * th - ycent;
342             rw = L_ABS(tw * cosa) + L_ABS(th * sina);
343             rh = L_ABS(th * cosa) + L_ABS(tw * sina);
344             rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw;
345             ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh;
346             boxd = boxCreate((l_int32)rx, (l_int32)ry, (l_int32)rw,
347                              (l_int32)rh);
348         }
349     } else if (order == L_RO_TR_SC) {
350         if (angle == 0.0) {
351             rx = bx;
352             ry = by;
353             rw = bw;
354             rh = bh;
355         } else {
356             xdif = bx + 0.5 * bw - xcen;
357             ydif = by + 0.5 * bh - ycen;
358             rw = L_ABS(bw * cosa) + L_ABS(bh * sina);
359             rh = L_ABS(bh * cosa) + L_ABS(bw * sina);
360             rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw;
361             ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh;
362         }
363         tx = (l_int32)(scalex * (rx + shiftx) + 0.5);
364         ty = (l_int32)(scaley * (ry + shifty) + 0.5);
365         tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5));
366         th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5));
367         boxd = boxCreate(tx, ty, tw, th);
368     } else if (order == L_RO_SC_TR) {
369         if (angle == 0.0) {
370             rx = bx;
371             ry = by;
372             rw = bw;
373             rh = bh;
374         } else {
375             xdif = bx + 0.5 * bw - xcen;
376             ydif = by + 0.5 * bh - ycen;
377             rw = L_ABS(bw * cosa) + L_ABS(bh * sina);
378             rh = L_ABS(bh * cosa) + L_ABS(bw * sina);
379             rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw;
380             ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh;
381         }
382         tx = (l_int32)(scalex * rx + shiftx + 0.5);
383         ty = (l_int32)(scaley * ry + shifty + 0.5);
384         tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5));
385         th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5));
386         boxd = boxCreate(tx, ty, tw, th);
387     } else if (order == L_TR_RO_SC) {
388         tx = bx + shiftx;
389         ty = by + shifty;
390         if (angle == 0.0) {
391             rx = tx;
392             ry = ty;
393             rw = bw;
394             rh = bh;
395         } else {
396             xdif = tx + 0.5 * bw - xcen;
397             ydif = ty + 0.5 * bh - ycen;
398             rw = L_ABS(bw * cosa) + L_ABS(bh * sina);
399             rh = L_ABS(bh * cosa) + L_ABS(bw * sina);
400             rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw;
401             ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh;
402         }
403         tx = (l_int32)(scalex * rx + 0.5);
404         ty = (l_int32)(scaley * ry + 0.5);
405         tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5));
406         th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5));
407         boxd = boxCreate(tx, ty, tw, th);
408     } else {  /* order == L_SC_RO_TR) */
409         tx = (l_int32)(scalex * bx + 0.5);
410         ty = (l_int32)(scaley * by + 0.5);
411         tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5));
412         th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5));
413         xcent = (l_int32)(scalex * xcen + 0.5);
414         ycent = (l_int32)(scaley * ycen + 0.5);
415         if (angle == 0.0) {
416             rx = tx;
417             ry = ty;
418             rw = tw;
419             rh = th;
420         } else {
421             xdif = tx + 0.5 * tw - xcent;
422             ydif = ty + 0.5 * th - ycent;
423             rw = L_ABS(tw * cosa) + L_ABS(th * sina);
424             rh = L_ABS(th * cosa) + L_ABS(tw * sina);
425             rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw;
426             ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh;
427         }
428         tx = (l_int32)(rx + shiftx + 0.5);
429         ty = (l_int32)(ry + shifty + 0.5);
430         tw = (l_int32)(rw + 0.5);
431         th = (l_int32)(rh + 0.5);
432         boxd = boxCreate(tx, ty, tw, th);
433     }
434 
435     return boxd;
436 }
437 
438 
439 /*!
440  * \brief   boxaRotateOrth()
441  *
442  * \param[in]    boxas
443  * \param[in]    w, h of image in which the boxa is embedded
444  * \param[in]    rotation 0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg;
445  *                        all rotations are clockwise
446  * \return  boxad, or NULL on error
447  *
448  * <pre>
449  * Notes:
450  *      (1) See boxRotateOrth() for details.
451  * </pre>
452  */
453 BOXA *
boxaRotateOrth(BOXA * boxas,l_int32 w,l_int32 h,l_int32 rotation)454 boxaRotateOrth(BOXA    *boxas,
455                l_int32  w,
456                l_int32  h,
457                l_int32  rotation)
458 {
459 l_int32  i, n;
460 BOX     *boxs, *boxd;
461 BOXA    *boxad;
462 
463     PROCNAME("boxaRotateOrth");
464 
465     if (!boxas)
466         return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
467     if (rotation < 0 || rotation > 3)
468         return (BOXA *)ERROR_PTR("rotation not in {0,1,2,3}", procName, NULL);
469     if (rotation == 0)
470         return boxaCopy(boxas, L_COPY);
471 
472     n = boxaGetCount(boxas);
473     if ((boxad = boxaCreate(n)) == NULL)
474         return (BOXA *)ERROR_PTR("boxad not made", procName, NULL);
475     for (i = 0; i < n; i++) {
476         if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) {
477             boxaDestroy(&boxad);
478             return (BOXA *)ERROR_PTR("boxs not found", procName, NULL);
479         }
480         boxd = boxRotateOrth(boxs, w, h, rotation);
481         boxDestroy(&boxs);
482         boxaAddBox(boxad, boxd, L_INSERT);
483     }
484 
485     return boxad;
486 }
487 
488 
489 /*!
490  * \brief   boxRotateOrth()
491  *
492  * \param[in]    box
493  * \param[in]    w, h of image in which the box is embedded
494  * \param[in]    rotation 0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg;
495  *                        all rotations are clockwise
496  * \return  boxd, or NULL on error
497  *
498  * <pre>
499  * Notes:
500  *      (1) Rotate the image with the embedded box by the specified amount.
501  *      (2) After rotation, the rotated box is always measured with
502  *          respect to the UL corner of the image.
503  * </pre>
504  */
505 BOX *
boxRotateOrth(BOX * box,l_int32 w,l_int32 h,l_int32 rotation)506 boxRotateOrth(BOX     *box,
507               l_int32  w,
508               l_int32  h,
509               l_int32  rotation)
510 {
511 l_int32  bx, by, bw, bh, xdist, ydist;
512 
513     PROCNAME("boxRotateOrth");
514 
515     if (!box)
516         return (BOX *)ERROR_PTR("box not defined", procName, NULL);
517     if (rotation < 0 || rotation > 3)
518         return (BOX *)ERROR_PTR("rotation not in {0,1,2,3}", procName, NULL);
519     if (rotation == 0)
520         return boxCopy(box);
521 
522     boxGetGeometry(box, &bx, &by, &bw, &bh);
523     if (bw <= 0 || bh <= 0)  /* invalid */
524         return boxCreate(0, 0, 0, 0);
525     ydist = h - by - bh;  /* below box */
526     xdist = w - bx - bw;  /* to right of box */
527     if (rotation == 1)   /* 90 deg cw */
528         return boxCreate(ydist, bx, bh, bw);
529     else if (rotation == 2)  /* 180 deg cw */
530         return boxCreate(xdist, ydist, bw, bh);
531     else  /*  rotation == 3, 270 deg cw */
532         return boxCreate(by, xdist, bh, bw);
533 }
534 
535 
536 /*---------------------------------------------------------------------*
537  *                              Boxa sort                              *
538  *---------------------------------------------------------------------*/
539 /*!
540  * \brief   boxaSort()
541  *
542  * \param[in]    boxas
543  * \param[in]    sorttype L_SORT_BY_X, L_SORT_BY_Y,
544  *                        L_SORT_BY_RIGHT, L_SORT_BY_BOT,
545  *                        L_SORT_BY_WIDTH, L_SORT_BY_HEIGHT,
546  *                        L_SORT_BY_MIN_DIMENSION, L_SORT_BY_MAX_DIMENSION,
547  *                        L_SORT_BY_PERIMETER, L_SORT_BY_AREA,
548  *                        L_SORT_BY_ASPECT_RATIO
549  * \param[in]    sortorder  L_SORT_INCREASING, L_SORT_DECREASING
550  * \param[out]   pnaindex [optional] index of sorted order into
551  *                        original array
552  * \return  boxad sorted version of boxas, or NULL on error
553  *
554  * <pre>
555  * Notes:
556  *      (1) An empty boxa returns a copy, with a warning.
557  * </pre>
558  */
559 BOXA *
boxaSort(BOXA * boxas,l_int32 sorttype,l_int32 sortorder,NUMA ** pnaindex)560 boxaSort(BOXA    *boxas,
561          l_int32  sorttype,
562          l_int32  sortorder,
563          NUMA   **pnaindex)
564 {
565 l_int32    i, n, x, y, w, h, size;
566 BOXA      *boxad;
567 NUMA      *na, *naindex;
568 
569     PROCNAME("boxaSort");
570 
571     if (pnaindex) *pnaindex = NULL;
572     if (!boxas)
573         return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
574     if ((n = boxaGetCount(boxas)) == 0) {
575         L_WARNING("boxas is empty\n", procName);
576         return boxaCopy(boxas, L_COPY);
577     }
578     if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y &&
579         sorttype != L_SORT_BY_RIGHT && sorttype != L_SORT_BY_BOT &&
580         sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT &&
581         sorttype != L_SORT_BY_MIN_DIMENSION &&
582         sorttype != L_SORT_BY_MAX_DIMENSION &&
583         sorttype != L_SORT_BY_PERIMETER &&
584         sorttype != L_SORT_BY_AREA &&
585         sorttype != L_SORT_BY_ASPECT_RATIO)
586         return (BOXA *)ERROR_PTR("invalid sort type", procName, NULL);
587     if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
588         return (BOXA *)ERROR_PTR("invalid sort order", procName, NULL);
589 
590         /* Use O(n) binsort if possible */
591     if (n > MIN_COMPS_FOR_BIN_SORT &&
592         ((sorttype == L_SORT_BY_X) || (sorttype == L_SORT_BY_Y) ||
593          (sorttype == L_SORT_BY_WIDTH) || (sorttype == L_SORT_BY_HEIGHT) ||
594          (sorttype == L_SORT_BY_PERIMETER)))
595         return boxaBinSort(boxas, sorttype, sortorder, pnaindex);
596 
597         /* Build up numa of specific data */
598     if ((na = numaCreate(n)) == NULL)
599         return (BOXA *)ERROR_PTR("na not made", procName, NULL);
600     for (i = 0; i < n; i++) {
601         boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h);
602         switch (sorttype)
603         {
604         case L_SORT_BY_X:
605             numaAddNumber(na, x);
606             break;
607         case L_SORT_BY_Y:
608             numaAddNumber(na, y);
609             break;
610         case L_SORT_BY_RIGHT:
611             numaAddNumber(na, x + w - 1);
612             break;
613         case L_SORT_BY_BOT:
614             numaAddNumber(na, y + h - 1);
615             break;
616         case L_SORT_BY_WIDTH:
617             numaAddNumber(na, w);
618             break;
619         case L_SORT_BY_HEIGHT:
620             numaAddNumber(na, h);
621             break;
622         case L_SORT_BY_MIN_DIMENSION:
623             size = L_MIN(w, h);
624             numaAddNumber(na, size);
625             break;
626         case L_SORT_BY_MAX_DIMENSION:
627             size = L_MAX(w, h);
628             numaAddNumber(na, size);
629             break;
630         case L_SORT_BY_PERIMETER:
631             size = w + h;
632             numaAddNumber(na, size);
633             break;
634         case L_SORT_BY_AREA:
635             size = w * h;
636             numaAddNumber(na, size);
637             break;
638         case L_SORT_BY_ASPECT_RATIO:
639             numaAddNumber(na, (l_float32)w / (l_float32)h);
640             break;
641         default:
642             L_WARNING("invalid sort type\n", procName);
643         }
644     }
645 
646         /* Get the sort index for data array */
647     naindex = numaGetSortIndex(na, sortorder);
648     numaDestroy(&na);
649     if (!naindex)
650         return (BOXA *)ERROR_PTR("naindex not made", procName, NULL);
651 
652         /* Build up sorted boxa using sort index */
653     boxad = boxaSortByIndex(boxas, naindex);
654 
655     if (pnaindex)
656         *pnaindex = naindex;
657     else
658         numaDestroy(&naindex);
659     return boxad;
660 }
661 
662 
663 /*!
664  * \brief   boxaBinSort()
665  *
666  * \param[in]    boxas
667  * \param[in]    sorttype L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH,
668  *                        L_SORT_BY_HEIGHT, L_SORT_BY_PERIMETER
669  * \param[in]    sortorder  L_SORT_INCREASING, L_SORT_DECREASING
670  * \param[out]   pnaindex [optional] index of sorted order into
671  *                        original array
672  * \return  boxad sorted version of boxas, or NULL on error
673  *
674  * <pre>
675  * Notes:
676  *      (1) For a large number of boxes (say, greater than 1000), this
677  *          O(n) binsort is much faster than the O(nlogn) shellsort.
678  *          For 5000 components, this is over 20x faster than boxaSort().
679  *      (2) Consequently, boxaSort() calls this function if it will
680  *          likely go much faster.
681  * </pre>
682  */
683 BOXA *
boxaBinSort(BOXA * boxas,l_int32 sorttype,l_int32 sortorder,NUMA ** pnaindex)684 boxaBinSort(BOXA    *boxas,
685             l_int32  sorttype,
686             l_int32  sortorder,
687             NUMA   **pnaindex)
688 {
689 l_int32  i, n, x, y, w, h;
690 BOXA    *boxad;
691 NUMA    *na, *naindex;
692 
693     PROCNAME("boxaBinSort");
694 
695     if (pnaindex) *pnaindex = NULL;
696     if (!boxas)
697         return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
698     if ((n = boxaGetCount(boxas)) == 0) {
699         L_WARNING("boxas is empty\n", procName);
700         return boxaCopy(boxas, L_COPY);
701     }
702     if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y &&
703         sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT &&
704         sorttype != L_SORT_BY_PERIMETER)
705         return (BOXA *)ERROR_PTR("invalid sort type", procName, NULL);
706     if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
707         return (BOXA *)ERROR_PTR("invalid sort order", procName, NULL);
708 
709         /* Generate Numa of appropriate box dimensions */
710     if ((na = numaCreate(n)) == NULL)
711         return (BOXA *)ERROR_PTR("na not made", procName, NULL);
712     for (i = 0; i < n; i++) {
713         boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h);
714         switch (sorttype)
715         {
716         case L_SORT_BY_X:
717             numaAddNumber(na, x);
718             break;
719         case L_SORT_BY_Y:
720             numaAddNumber(na, y);
721             break;
722         case L_SORT_BY_WIDTH:
723             numaAddNumber(na, w);
724             break;
725         case L_SORT_BY_HEIGHT:
726             numaAddNumber(na, h);
727             break;
728         case L_SORT_BY_PERIMETER:
729             numaAddNumber(na, w + h);
730             break;
731         default:
732             L_WARNING("invalid sort type\n", procName);
733         }
734     }
735 
736         /* Get the sort index for data array */
737     naindex = numaGetBinSortIndex(na, sortorder);
738     numaDestroy(&na);
739     if (!naindex)
740         return (BOXA *)ERROR_PTR("naindex not made", procName, NULL);
741 
742         /* Build up sorted boxa using the sort index */
743     boxad = boxaSortByIndex(boxas, naindex);
744 
745     if (pnaindex)
746         *pnaindex = naindex;
747     else
748         numaDestroy(&naindex);
749     return boxad;
750 }
751 
752 
753 /*!
754  * \brief   boxaSortByIndex()
755  *
756  * \param[in]    boxas
757  * \param[in]    naindex na that maps from the new boxa to the input boxa
758  * \return  boxad sorted, or NULL on error
759  */
760 BOXA *
boxaSortByIndex(BOXA * boxas,NUMA * naindex)761 boxaSortByIndex(BOXA  *boxas,
762                 NUMA  *naindex)
763 {
764 l_int32  i, n, index;
765 BOX     *box;
766 BOXA    *boxad;
767 
768     PROCNAME("boxaSortByIndex");
769 
770     if (!boxas)
771         return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
772     if ((n = boxaGetCount(boxas)) == 0) {
773         L_WARNING("boxas is empty\n", procName);
774         return boxaCopy(boxas, L_COPY);
775     }
776     if (!naindex)
777         return (BOXA *)ERROR_PTR("naindex not defined", procName, NULL);
778 
779     boxad = boxaCreate(n);
780     for (i = 0; i < n; i++) {
781         numaGetIValue(naindex, i, &index);
782         box = boxaGetBox(boxas, index, L_COPY);
783         boxaAddBox(boxad, box, L_INSERT);
784     }
785 
786     return boxad;
787 }
788 
789 
790 /*!
791  * \brief   boxaSort2d()
792  *
793  * \param[in]    boxas
794  * \param[out]   pnaad [optional] numaa with sorted indices
795  *                    whose values are the indices of the input array
796  * \param[in]    delta1 min overlap that permits aggregation of a box
797  *                      onto a boxa of horizontally-aligned boxes; pass 1
798  * \param[in]    delta2 min overlap that permits aggregation of a box
799  *                      onto a boxa of horizontally-aligned boxes; pass 2
800  * \param[in]    minh1 components less than this height either join an
801  *                     existing boxa or are set aside for pass 2
802  * \return  baa 2d sorted version of boxa, or NULL on error
803  *
804  * <pre>
805  * Notes:
806  *      (1) The final result is a sort where the 'fast scan' direction is
807  *          left to right, and the 'slow scan' direction is from top
808  *          to bottom.  Each boxa in the baa represents a sorted set
809  *          of boxes from left to right.
810  *      (2) Three passes are used to aggregate the boxas, which can correspond
811  *          to characters or words in a line of text.  In pass 1, only
812  *          taller components, which correspond to xheight or larger,
813  *          are permitted to start a new boxa.  In pass 2, the remaining
814  *          vertically-challenged components are allowed to join an
815  *          existing boxa or start a new one.  In pass 3, boxa whose extent
816  *          is overlapping are joined.  After that, the boxes in each
817  *          boxa are sorted horizontally, and finally the boxa are
818  *          sorted vertically.
819  *      (3) If delta1 < 0, the first pass allows aggregation when
820  *          boxes in the same boxa do not overlap vertically.
821  *          The distance by which they can miss and still be aggregated
822  *          is the absolute value |delta1|.   Similar for delta2 on
823  *          the second pass.
824  *      (4) On the first pass, any component of height less than minh1
825  *          cannot start a new boxa; it's put aside for later insertion.
826  *      (5) On the second pass, any small component that doesn't align
827  *          with an existing boxa can start a new one.
828  *      (6) This can be used to identify lines of text from
829  *          character or word bounding boxes.
830  *      (7) Typical values for the input parameters on 300 ppi text are:
831  *                 delta1 ~ 0
832  *                 delta2 ~ 0
833  *                 minh1 ~ 5
834  * </pre>
835  */
836 BOXAA *
boxaSort2d(BOXA * boxas,NUMAA ** pnaad,l_int32 delta1,l_int32 delta2,l_int32 minh1)837 boxaSort2d(BOXA    *boxas,
838            NUMAA  **pnaad,
839            l_int32  delta1,
840            l_int32  delta2,
841            l_int32  minh1)
842 {
843 l_int32  i, index, h, nt, ne, n, m, ival;
844 BOX     *box;
845 BOXA    *boxa, *boxae, *boxan, *boxa1, *boxa2, *boxa3, *boxav, *boxavs;
846 BOXAA   *baa, *baa1, *baad;
847 NUMA    *naindex, *nae, *nan, *nah, *nav, *na1, *na2, *nad, *namap;
848 NUMAA   *naa, *naa1, *naad;
849 
850     PROCNAME("boxaSort2d");
851 
852     if (pnaad) *pnaad = NULL;
853     if (!boxas)
854         return (BOXAA *)ERROR_PTR("boxas not defined", procName, NULL);
855     if (boxaGetCount(boxas) == 0)
856         return (BOXAA *)ERROR_PTR("boxas is empty", procName, NULL);
857 
858         /* Sort from left to right */
859     if ((boxa = boxaSort(boxas, L_SORT_BY_X, L_SORT_INCREASING, &naindex))
860                     == NULL)
861         return (BOXAA *)ERROR_PTR("boxa not made", procName, NULL);
862 
863         /* First pass: assign taller boxes to boxa by row */
864     nt = boxaGetCount(boxa);
865     baa = boxaaCreate(0);
866     naa = numaaCreate(0);
867     boxae = boxaCreate(0);  /* save small height boxes here */
868     nae = numaCreate(0);  /* keep track of small height boxes */
869     for (i = 0; i < nt; i++) {
870         box = boxaGetBox(boxa, i, L_CLONE);
871         boxGetGeometry(box, NULL, NULL, NULL, &h);
872         if (h < minh1) {  /* save for 2nd pass */
873             boxaAddBox(boxae, box, L_INSERT);
874             numaAddNumber(nae, i);
875         } else {
876             n = boxaaGetCount(baa);
877             boxaaAlignBox(baa, box, delta1, &index);
878             if (index < n) {  /* append to an existing boxa */
879                 boxaaAddBox(baa, index, box, L_INSERT);
880             } else {  /* doesn't align, need new boxa */
881                 boxan = boxaCreate(0);
882                 boxaAddBox(boxan, box, L_INSERT);
883                 boxaaAddBoxa(baa, boxan, L_INSERT);
884                 nan = numaCreate(0);
885                 numaaAddNuma(naa, nan, L_INSERT);
886             }
887             numaGetIValue(naindex, i, &ival);
888             numaaAddNumber(naa, index, ival);
889         }
890     }
891     boxaDestroy(&boxa);
892     numaDestroy(&naindex);
893 
894         /* Second pass: feed in small height boxes */
895     ne = boxaGetCount(boxae);
896     for (i = 0; i < ne; i++) {
897         box = boxaGetBox(boxae, i, L_CLONE);
898         n = boxaaGetCount(baa);
899         boxaaAlignBox(baa, box, delta2, &index);
900         if (index < n) {  /* append to an existing boxa */
901             boxaaAddBox(baa, index, box, L_INSERT);
902         } else {  /* doesn't align, need new boxa */
903             boxan = boxaCreate(0);
904             boxaAddBox(boxan, box, L_INSERT);
905             boxaaAddBoxa(baa, boxan, L_INSERT);
906             nan = numaCreate(0);
907             numaaAddNuma(naa, nan, L_INSERT);
908         }
909         numaGetIValue(nae, i, &ival);  /* location in original boxas */
910         numaaAddNumber(naa, index, ival);
911     }
912 
913         /* Third pass: merge some boxa whose extent is overlapping.
914          * Think of these boxa as text lines, where the bounding boxes
915          * of the text lines can overlap, but likely won't have
916          * a huge overlap.
917          * First do a greedy find of pairs of overlapping boxa, where
918          * the two boxa overlap by at least 50% of the smaller, and
919          * the smaller is not more than half the area of the larger.
920          * For such pairs, call the larger one the primary boxa.  The
921          * boxes in the smaller one are appended to those in the primary
922          * in pass 3a, and the primaries are extracted in pass 3b.
923          * In this way, all boxes in the original baa are saved. */
924     n = boxaaGetCount(baa);
925     boxaaGetExtent(baa, NULL, NULL, NULL, &boxa3);
926     boxa1 = boxaHandleOverlaps(boxa3, L_REMOVE_SMALL, 1000, 0.5, 0.5, &namap);
927     boxaDestroy(&boxa1);
928     boxaDestroy(&boxa3);
929     for (i = 0; i < n; i++) {  /* Pass 3a: join selected copies of boxa */
930         numaGetIValue(namap, i, &ival);
931         if (ival >= 0) {  /* join current to primary boxa[ival] */
932             boxa1 = boxaaGetBoxa(baa, i, L_COPY);
933             boxa2 = boxaaGetBoxa(baa, ival, L_CLONE);
934             boxaJoin(boxa2, boxa1, 0, -1);
935             boxaDestroy(&boxa2);
936             boxaDestroy(&boxa1);
937             na1 = numaaGetNuma(naa, i, L_COPY);
938             na2 = numaaGetNuma(naa, ival, L_CLONE);
939             numaJoin(na2, na1, 0, -1);
940             numaDestroy(&na1);
941             numaDestroy(&na2);
942         }
943     }
944     baa1 = boxaaCreate(n);
945     naa1 = numaaCreate(n);
946     for (i = 0; i < n; i++) {  /* Pass 3b: save primary boxa */
947         numaGetIValue(namap, i, &ival);
948         if (ival == -1) {
949             boxa1 = boxaaGetBoxa(baa, i, L_CLONE);
950             boxaaAddBoxa(baa1, boxa1, L_INSERT);
951             na1 = numaaGetNuma(naa, i, L_CLONE);
952             numaaAddNuma(naa1, na1, L_INSERT);
953         }
954     }
955     numaDestroy(&namap);
956     boxaaDestroy(&baa);
957     baa = baa1;
958     numaaDestroy(&naa);
959     naa = naa1;
960 
961         /* Sort the boxes in each boxa horizontally */
962     m = boxaaGetCount(baa);
963     for (i = 0; i < m; i++) {
964         boxa1 = boxaaGetBoxa(baa, i, L_CLONE);
965         boxa2 = boxaSort(boxa1, L_SORT_BY_X, L_SORT_INCREASING, &nah);
966         boxaaReplaceBoxa(baa, i, boxa2);
967         na1 = numaaGetNuma(naa, i, L_CLONE);
968         na2 = numaSortByIndex(na1, nah);
969         numaaReplaceNuma(naa, i, na2);
970         boxaDestroy(&boxa1);
971         numaDestroy(&na1);
972         numaDestroy(&nah);
973     }
974 
975         /* Sort the boxa vertically within boxaa, using the first box
976          * in each boxa. */
977     m = boxaaGetCount(baa);
978     boxav = boxaCreate(m);  /* holds first box in each boxa in baa */
979     naad = numaaCreate(m);
980     if (pnaad)
981         *pnaad = naad;
982     baad = boxaaCreate(m);
983     for (i = 0; i < m; i++) {
984         boxa1 = boxaaGetBoxa(baa, i, L_CLONE);
985         box = boxaGetBox(boxa1, 0, L_CLONE);
986         boxaAddBox(boxav, box, L_INSERT);
987         boxaDestroy(&boxa1);
988     }
989     boxavs = boxaSort(boxav, L_SORT_BY_Y, L_SORT_INCREASING, &nav);
990     for (i = 0; i < m; i++) {
991         numaGetIValue(nav, i, &index);
992         boxa = boxaaGetBoxa(baa, index, L_CLONE);
993         boxaaAddBoxa(baad, boxa, L_INSERT);
994         nad = numaaGetNuma(naa, index, L_CLONE);
995         numaaAddNuma(naad, nad, L_INSERT);
996     }
997 
998 
999 /*    fprintf(stderr, "box count = %d, numaa count = %d\n", nt,
1000             numaaGetNumberCount(naad)); */
1001 
1002     boxaaDestroy(&baa);
1003     boxaDestroy(&boxav);
1004     boxaDestroy(&boxavs);
1005     boxaDestroy(&boxae);
1006     numaDestroy(&nav);
1007     numaDestroy(&nae);
1008     numaaDestroy(&naa);
1009     if (!pnaad)
1010         numaaDestroy(&naad);
1011 
1012     return baad;
1013 }
1014 
1015 
1016 /*!
1017  * \brief   boxaSort2dByIndex()
1018  *
1019  * \param[in]    boxas
1020  * \param[in]    naa numaa that maps from the new baa to the input boxa
1021  * \return  baa sorted boxaa, or NULL on error
1022  */
1023 BOXAA *
boxaSort2dByIndex(BOXA * boxas,NUMAA * naa)1024 boxaSort2dByIndex(BOXA   *boxas,
1025                   NUMAA  *naa)
1026 {
1027 l_int32  ntot, boxtot, i, j, n, nn, index;
1028 BOX     *box;
1029 BOXA    *boxa;
1030 BOXAA   *baa;
1031 NUMA    *na;
1032 
1033     PROCNAME("boxaSort2dByIndex");
1034 
1035     if (!boxas)
1036         return (BOXAA *)ERROR_PTR("boxas not defined", procName, NULL);
1037     if ((boxtot = boxaGetCount(boxas)) == 0)
1038         return (BOXAA *)ERROR_PTR("boxas is empty", procName, NULL);
1039     if (!naa)
1040         return (BOXAA *)ERROR_PTR("naindex not defined", procName, NULL);
1041 
1042         /* Check counts */
1043     ntot = numaaGetNumberCount(naa);
1044     if (ntot != boxtot)
1045         return (BOXAA *)ERROR_PTR("element count mismatch", procName, NULL);
1046 
1047     n = numaaGetCount(naa);
1048     baa = boxaaCreate(n);
1049     for (i = 0; i < n; i++) {
1050         na = numaaGetNuma(naa, i, L_CLONE);
1051         nn = numaGetCount(na);
1052         boxa = boxaCreate(nn);
1053         for (j = 0; j < nn; j++) {
1054             numaGetIValue(na, i, &index);
1055             box = boxaGetBox(boxas, index, L_COPY);
1056             boxaAddBox(boxa, box, L_INSERT);
1057         }
1058         boxaaAddBoxa(baa, boxa, L_INSERT);
1059         numaDestroy(&na);
1060     }
1061 
1062     return baa;
1063 }
1064 
1065 
1066 /*---------------------------------------------------------------------*
1067  *                        Boxa array extraction                        *
1068  *---------------------------------------------------------------------*/
1069 /*!
1070  * \brief   boxaExtractAsNuma()
1071  *
1072  * \param[in]    boxa
1073  * \param[out]   pnal [optional] array of left locations
1074  * \param[out]   pnat [optional] array of top locations
1075  * \param[out]   pnar [optional] array of right locations
1076  * \param[out]   pnab [optional] array of bottom locations
1077  * \param[out]   pnaw [optional] array of widths
1078  * \param[out]   pnah [optional] array of heights
1079  * \param[in]    keepinvalid 1 to keep invalid boxes; 0 to remove them
1080  * \return  0 if OK, 1 on error
1081  *
1082  * <pre>
1083  * Notes:
1084  *      (1) If you are counting or sorting values, such as determining
1085  *          rank order, you must remove invalid boxes.
1086  *      (2) If you are parametrizing the values, or doing an evaluation
1087  *          where the position in the boxa sequence is important, you
1088  *          must replace the invalid boxes with valid ones before
1089  *          doing the extraction. This is easily done with boxaFillSequence().
1090  * </pre>
1091  */
1092 l_int32
boxaExtractAsNuma(BOXA * boxa,NUMA ** pnal,NUMA ** pnat,NUMA ** pnar,NUMA ** pnab,NUMA ** pnaw,NUMA ** pnah,l_int32 keepinvalid)1093 boxaExtractAsNuma(BOXA    *boxa,
1094                   NUMA   **pnal,
1095                   NUMA   **pnat,
1096                   NUMA   **pnar,
1097                   NUMA   **pnab,
1098                   NUMA   **pnaw,
1099                   NUMA   **pnah,
1100                   l_int32  keepinvalid)
1101 {
1102 l_int32  i, n, left, top, right, bot, w, h;
1103 
1104     PROCNAME("boxaExtractAsNuma");
1105 
1106     if (!pnal && !pnat && !pnar && !pnab && !pnaw && !pnah)
1107         return ERROR_INT("no output requested", procName, 1);
1108     if (pnal) *pnal = NULL;
1109     if (pnat) *pnat = NULL;
1110     if (pnar) *pnar = NULL;
1111     if (pnab) *pnab = NULL;
1112     if (pnaw) *pnaw = NULL;
1113     if (pnah) *pnah = NULL;
1114     if (!boxa)
1115         return ERROR_INT("boxa not defined", procName, 1);
1116     if (!keepinvalid && boxaGetValidCount(boxa) == 0)
1117         return ERROR_INT("no valid boxes", procName, 1);
1118 
1119     n = boxaGetCount(boxa);
1120     if (pnal) *pnal = numaCreate(n);
1121     if (pnat) *pnat = numaCreate(n);
1122     if (pnar) *pnar = numaCreate(n);
1123     if (pnab) *pnab = numaCreate(n);
1124     if (pnaw) *pnaw = numaCreate(n);
1125     if (pnah) *pnah = numaCreate(n);
1126     for (i = 0; i < n; i++) {
1127         boxaGetBoxGeometry(boxa, i, &left, &top, &w, &h);
1128         if (!keepinvalid && (w <= 0 || h <= 0))
1129             continue;
1130         right = left + w - 1;
1131         bot = top + h - 1;
1132         if (pnal) numaAddNumber(*pnal, left);
1133         if (pnat) numaAddNumber(*pnat, top);
1134         if (pnar) numaAddNumber(*pnar, right);
1135         if (pnab) numaAddNumber(*pnab, bot);
1136         if (pnaw) numaAddNumber(*pnaw, w);
1137         if (pnah) numaAddNumber(*pnah, h);
1138     }
1139 
1140     return 0;
1141 }
1142 
1143 
1144 /*!
1145  * \brief   boxaExtractAsPta()
1146  *
1147  * \param[in]    boxa
1148  * \param[out]   pptal [optional] array of left locations vs. index
1149  * \param[out]   pptat [optional] array of top locations vs. index
1150  * \param[out]   pptar [optional] array of right locations vs. index
1151  * \param[out]   pptab [optional] array of bottom locations vs. index
1152  * \param[out]   pptaw [optional] array of widths vs. index
1153  * \param[out]   pptah [optional] array of heights vs. index
1154  * \param[in]    keepinvalid 1 to keep invalid boxes; 0 to remove them
1155  * \return  0 if OK, 1 on error
1156  *
1157  * <pre>
1158  * Notes:
1159  *      (1) For most applications, such as counting, sorting, fitting
1160  *          to some parametrized form, plotting or filtering in general,
1161  *          you should remove the invalid boxes.  Each pta saves the
1162  *          box index in the x array, so replacing invalid boxes by
1163  *          filling with boxaFillSequence(), which is required for
1164  *          boxaExtractAsNuma(), is not necessary.
1165  *      (2) If invalid boxes are retained, each one will result in
1166  *          entries (typically 0) in all selected output pta.
1167  * </pre>
1168  */
1169 l_int32
boxaExtractAsPta(BOXA * boxa,PTA ** pptal,PTA ** pptat,PTA ** pptar,PTA ** pptab,PTA ** pptaw,PTA ** pptah,l_int32 keepinvalid)1170 boxaExtractAsPta(BOXA    *boxa,
1171                  PTA    **pptal,
1172                  PTA    **pptat,
1173                  PTA    **pptar,
1174                  PTA    **pptab,
1175                  PTA    **pptaw,
1176                  PTA    **pptah,
1177                  l_int32  keepinvalid)
1178 {
1179 l_int32  i, n, left, top, right, bot, w, h;
1180 
1181     PROCNAME("boxaExtractAsPta");
1182 
1183     if (!pptal && !pptar && !pptat && !pptab && !pptaw && !pptah)
1184         return ERROR_INT("no output requested", procName, 1);
1185     if (pptal) *pptal = NULL;
1186     if (pptat) *pptat = NULL;
1187     if (pptar) *pptar = NULL;
1188     if (pptab) *pptab = NULL;
1189     if (pptaw) *pptaw = NULL;
1190     if (pptah) *pptah = NULL;
1191     if (!boxa)
1192         return ERROR_INT("boxa not defined", procName, 1);
1193     if (!keepinvalid && boxaGetValidCount(boxa) == 0)
1194         return ERROR_INT("no valid boxes", procName, 1);
1195 
1196     n = boxaGetCount(boxa);
1197     if (pptal) *pptal = ptaCreate(n);
1198     if (pptat) *pptat = ptaCreate(n);
1199     if (pptar) *pptar = ptaCreate(n);
1200     if (pptab) *pptab = ptaCreate(n);
1201     if (pptaw) *pptaw = ptaCreate(n);
1202     if (pptah) *pptah = ptaCreate(n);
1203     for (i = 0; i < n; i++) {
1204         boxaGetBoxGeometry(boxa, i, &left, &top, &w, &h);
1205         if (!keepinvalid && (w <= 0 || h <= 0))
1206             continue;
1207         right = left + w - 1;
1208         bot = top + h - 1;
1209         if (pptal) ptaAddPt(*pptal, i, left);
1210         if (pptat) ptaAddPt(*pptat, i, top);
1211         if (pptar) ptaAddPt(*pptar, i, right);
1212         if (pptab) ptaAddPt(*pptab, i, bot);
1213         if (pptaw) ptaAddPt(*pptaw, i, w);
1214         if (pptah) ptaAddPt(*pptah, i, h);
1215     }
1216 
1217     return 0;
1218 }
1219 
1220 
1221 /*---------------------------------------------------------------------*
1222  *                            Boxa statistics                          *
1223  *---------------------------------------------------------------------*/
1224 /*!
1225  * \brief   boxaGetRankVals()
1226  *
1227  * \param[in]    boxa
1228  * \param[in]    fract use 0.0 for smallest, 1.0 for largest width and height
1229  * \param[out]   px  [optional] rank value of x
1230  * \param[out]   py  [optional] rank value of y
1231  * \param[out]   pw  [optional] rank value of width
1232  * \param[out]   ph  [optional] rank value of height
1233  * \return  0 if OK, 1 on error or if the boxa is empty or has no valid boxes
1234  *
1235  * <pre>
1236  * Notes:
1237  *      (1) This function does not assume that all boxes in the boxa are valid
1238  *      (2) The four box parameters are sorted independently.
1239  *          For rank order, the width and height are sorted in increasing
1240  *          order.  But what does it mean to sort x and y in "rank order"?
1241  *          If the boxes are of comparable size and somewhat
1242  *          aligned (e.g., from multiple images), it makes some sense
1243  *          to give a "rank order" for x and y by sorting them in
1244  *          decreasing order.  But in general, the interpretation of a rank
1245  *          order on x and y is highly application dependent.  In summary:
1246  *             ~ x and y are sorted in decreasing order
1247  *             ~ w and h are sorted in increasing order
1248  * </pre>
1249  */
1250 l_int32
boxaGetRankVals(BOXA * boxa,l_float32 fract,l_int32 * px,l_int32 * py,l_int32 * pw,l_int32 * ph)1251 boxaGetRankVals(BOXA      *boxa,
1252                 l_float32  fract,
1253                 l_int32   *px,
1254                 l_int32   *py,
1255                 l_int32   *pw,
1256                 l_int32   *ph)
1257 {
1258 l_float32  xval, yval, wval, hval;
1259 NUMA      *nax, *nay, *naw, *nah;
1260 
1261     PROCNAME("boxaGetRankVals");
1262 
1263     if (px) *px = 0;
1264     if (py) *py = 0;
1265     if (pw) *pw = 0;
1266     if (ph) *ph = 0;
1267     if (!boxa)
1268         return ERROR_INT("boxa not defined", procName, 1);
1269     if (fract < 0.0 || fract > 1.0)
1270         return ERROR_INT("fract not in [0.0 ... 1.0]", procName, 1);
1271     if (boxaGetValidCount(boxa) == 0)
1272         return ERROR_INT("no valid boxes in boxa", procName, 1);
1273 
1274         /* Use only the valid boxes */
1275     boxaExtractAsNuma(boxa, &nax, &nay, NULL, NULL, &naw, &nah, 0);
1276 
1277     if (px) {
1278         numaGetRankValue(nax, 1.0 - fract, NULL, 1, &xval);
1279         *px = (l_int32)xval;
1280     }
1281     if (py) {
1282         numaGetRankValue(nay, 1.0 - fract, NULL, 1, &yval);
1283         *py = (l_int32)yval;
1284     }
1285     if (pw) {
1286         numaGetRankValue(naw, fract, NULL, 1, &wval);
1287         *pw = (l_int32)wval;
1288     }
1289     if (ph) {
1290         numaGetRankValue(nah, fract, NULL, 1, &hval);
1291         *ph = (l_int32)hval;
1292     }
1293     numaDestroy(&nax);
1294     numaDestroy(&nay);
1295     numaDestroy(&naw);
1296     numaDestroy(&nah);
1297     return 0;
1298 }
1299 
1300 
1301 /*!
1302  * \brief   boxaGetMedianVals()
1303  *
1304  * \param[in]    boxa
1305  * \param[out]   px  [optional] median value of x
1306  * \param[out]   py  [optional] median value of y
1307  * \param[out]   pw  [optional] median value of width
1308  * \param[out]   ph  [optional] median value of height
1309  * \return  0 if OK, 1 on error or if the boxa is empty or has no valid boxes
1310  *
1311  * <pre>
1312  * Notes:
1313  *      (1) See boxaGetRankVals()
1314  * </pre>
1315  */
1316 l_int32
boxaGetMedianVals(BOXA * boxa,l_int32 * px,l_int32 * py,l_int32 * pw,l_int32 * ph)1317 boxaGetMedianVals(BOXA     *boxa,
1318                   l_int32  *px,
1319                   l_int32  *py,
1320                   l_int32  *pw,
1321                   l_int32  *ph)
1322 {
1323     PROCNAME("boxaGetMedianVals");
1324 
1325     if (!boxa)
1326         return ERROR_INT("boxa not defined", procName, 1);
1327     if (boxaGetValidCount(boxa) == 0)
1328         return ERROR_INT("no valid boxes in boxa", procName, 1);
1329 
1330     return boxaGetRankVals(boxa, 0.5, px, py, pw, ph);
1331 }
1332 
1333 
1334 /*!
1335  * \brief   boxaGetAverageSize()
1336  *
1337  * \param[in]    boxa
1338  * \param[out]   pw  [optional] average width
1339  * \param[out]   ph  [optional] average height
1340  * \return  0 if OK, 1 on error or if the boxa is empty
1341  */
1342 l_int32
boxaGetAverageSize(BOXA * boxa,l_float32 * pw,l_float32 * ph)1343 boxaGetAverageSize(BOXA       *boxa,
1344                    l_float32  *pw,
1345                    l_float32  *ph)
1346 {
1347 l_int32    i, n, bw, bh;
1348 l_float32  sumw, sumh;
1349 
1350     PROCNAME("boxaGetAverageSize");
1351 
1352     if (pw) *pw = 0.0;
1353     if (ph) *ph = 0.0;
1354     if (!boxa)
1355         return ERROR_INT("boxa not defined", procName, 1);
1356     if ((n = boxaGetCount(boxa)) == 0)
1357         return ERROR_INT("boxa is empty", procName, 1);
1358 
1359     sumw = sumh = 0.0;
1360     for (i = 0; i < n; i++) {
1361         boxaGetBoxGeometry(boxa, i, NULL, NULL, &bw, &bh);
1362         sumw += bw;
1363         sumh += bh;
1364     }
1365 
1366     if (pw) *pw = sumw / n;
1367     if (ph) *ph = sumh / n;
1368     return 0;
1369 }
1370 
1371 
1372 /*---------------------------------------------------------------------*
1373  *                        Other Boxaa functions                        *
1374  *---------------------------------------------------------------------*/
1375 /*!
1376  * \brief   boxaaGetExtent()
1377  *
1378  * \param[in]    baa
1379  * \param[out]   pw  [optional] width
1380  * \param[out]   ph  [optional] height
1381  * \param[out]   pbox [optional]  minimum box containing all boxa
1382  *                    in boxaa
1383  * \param[out]   pboxa [optional]  boxa containing all boxes in each
1384  *                     boxa in the boxaa
1385  * \return  0 if OK, 1 on error
1386  *
1387  * <pre>
1388  * Notes:
1389  *      (1) The returned w and h are the minimum size image
1390  *          that would contain all boxes untranslated.
1391  *      (2) Each box in the returned boxa is the minimum box required to
1392  *          hold all the boxes in the respective boxa of baa.
1393  *      (3) If there are no valid boxes in a boxa, the box corresponding
1394  *          to its extent has all fields set to 0 (an invalid box).
1395  * </pre>
1396  */
1397 l_int32
boxaaGetExtent(BOXAA * baa,l_int32 * pw,l_int32 * ph,BOX ** pbox,BOXA ** pboxa)1398 boxaaGetExtent(BOXAA    *baa,
1399                l_int32  *pw,
1400                l_int32  *ph,
1401                BOX     **pbox,
1402                BOXA    **pboxa)
1403 {
1404 l_int32  i, n, x, y, w, h, xmax, ymax, xmin, ymin, found;
1405 BOX     *box1;
1406 BOXA    *boxa, *boxa1;
1407 
1408     PROCNAME("boxaaGetExtent");
1409 
1410     if (!pw && !ph && !pbox && !pboxa)
1411         return ERROR_INT("no ptrs defined", procName, 1);
1412     if (pw) *pw = 0;
1413     if (ph) *ph = 0;
1414     if (pbox) *pbox = NULL;
1415     if (pboxa) *pboxa = NULL;
1416     if (!baa)
1417         return ERROR_INT("baa not defined", procName, 1);
1418 
1419     n = boxaaGetCount(baa);
1420     if (n == 0)
1421         return ERROR_INT("no boxa in baa", procName, 1);
1422 
1423     boxa = boxaCreate(n);
1424     xmax = ymax = 0;
1425     xmin = ymin = 100000000;
1426     found = FALSE;
1427     for (i = 0; i < n; i++) {
1428         boxa1 = boxaaGetBoxa(baa, i, L_CLONE);
1429         boxaGetExtent(boxa1, NULL, NULL, &box1);
1430         boxaDestroy(&boxa1);
1431         boxGetGeometry(box1, &x, &y, &w, &h);
1432         if (w > 0 && h > 0) {  /* a valid extent box */
1433             found = TRUE;  /* found at least one valid extent box */
1434             xmin = L_MIN(xmin, x);
1435             ymin = L_MIN(ymin, y);
1436             xmax = L_MAX(xmax, x + w);
1437             ymax = L_MAX(ymax, y + h);
1438         }
1439         boxaAddBox(boxa, box1, L_INSERT);
1440     }
1441     if (found == FALSE)  /* no valid extent boxes */
1442         xmin = ymin = 0;
1443 
1444     if (pw) *pw = xmax;
1445     if (ph) *ph = ymax;
1446     if (pbox)
1447         *pbox = boxCreate(xmin, ymin, xmax - xmin, ymax - ymin);
1448     if (pboxa)
1449         *pboxa = boxa;
1450     else
1451         boxaDestroy(&boxa);
1452     return 0;
1453 }
1454 
1455 
1456 /*!
1457  * \brief   boxaaFlattenToBoxa()
1458  *
1459  * \param[in]    baa
1460  * \param[out]   pnaindex  [optional] the boxa index in the baa
1461  * \param[in]    copyflag  L_COPY or L_CLONE
1462  * \return  boxa, or NULL on error
1463  *
1464  * <pre>
1465  * Notes:
1466  *      (1) This 'flattens' the baa to a boxa, taking the boxes in
1467  *          order in the first boxa, then the second, etc.
1468  *      (2) If a boxa is empty, we generate an invalid, placeholder box
1469  *          of zero size.  This is useful when converting from a baa
1470  *          where each boxa has either 0 or 1 boxes, and it is necessary
1471  *          to maintain a 1:1 correspondence between the initial
1472  *          boxa array and the resulting box array.
1473  *      (3) If &naindex is defined, we generate a Numa that gives, for
1474  *          each box in the baa, the index of the boxa to which it belongs.
1475  * </pre>
1476  */
1477 BOXA *
boxaaFlattenToBoxa(BOXAA * baa,NUMA ** pnaindex,l_int32 copyflag)1478 boxaaFlattenToBoxa(BOXAA   *baa,
1479                    NUMA   **pnaindex,
1480                    l_int32  copyflag)
1481 {
1482 l_int32  i, j, m, n;
1483 BOXA    *boxa, *boxat;
1484 BOX     *box;
1485 NUMA    *naindex;
1486 
1487     PROCNAME("boxaaFlattenToBoxa");
1488 
1489     if (pnaindex) *pnaindex = NULL;
1490     if (!baa)
1491         return (BOXA *)ERROR_PTR("baa not defined", procName, NULL);
1492     if (copyflag != L_COPY && copyflag != L_CLONE)
1493         return (BOXA *)ERROR_PTR("invalid copyflag", procName, NULL);
1494     if (pnaindex) {
1495         naindex = numaCreate(0);
1496         *pnaindex = naindex;
1497     }
1498 
1499     n = boxaaGetCount(baa);
1500     boxa = boxaCreate(n);
1501     for (i = 0; i < n; i++) {
1502         boxat = boxaaGetBoxa(baa, i, L_CLONE);
1503         m = boxaGetCount(boxat);
1504         if (m == 0) {  /* placeholder box */
1505             box = boxCreate(0, 0, 0, 0);
1506             boxaAddBox(boxa, box, L_INSERT);
1507             if (pnaindex)
1508                 numaAddNumber(naindex, i);  /* save 'row' number */
1509         } else {
1510             for (j = 0; j < m; j++) {
1511                 box = boxaGetBox(boxat, j, copyflag);
1512                 boxaAddBox(boxa, box, L_INSERT);
1513                 if (pnaindex)
1514                     numaAddNumber(naindex, i);  /* save 'row' number */
1515             }
1516         }
1517         boxaDestroy(&boxat);
1518     }
1519 
1520     return boxa;
1521 }
1522 
1523 
1524 /*!
1525  * \brief   boxaaFlattenAligned()
1526  *
1527  * \param[in]    baa
1528  * \param[in]    num number extracted from each
1529  * \param[in]    fillerbox [optional] that fills if necessary
1530  * \param[in]    copyflag  L_COPY or L_CLONE
1531  * \return  boxa, or NULL on error
1532  *
1533  * <pre>
1534  * Notes:
1535  *      (1) This 'flattens' the baa to a boxa, taking the first %num
1536  *          boxes from each boxa.
1537  *      (2) In each boxa, if there are less than %num boxes, we preserve
1538  *          the alignment between the input baa and the output boxa
1539  *          by inserting one or more fillerbox(es) or, if %fillerbox == NULL,
1540  *          one or more invalid placeholder boxes.
1541  * </pre>
1542  */
1543 BOXA *
boxaaFlattenAligned(BOXAA * baa,l_int32 num,BOX * fillerbox,l_int32 copyflag)1544 boxaaFlattenAligned(BOXAA   *baa,
1545                     l_int32  num,
1546                     BOX     *fillerbox,
1547                     l_int32  copyflag)
1548 {
1549 l_int32  i, j, m, n, mval, nshort;
1550 BOXA    *boxat, *boxad;
1551 BOX     *box;
1552 
1553     PROCNAME("boxaaFlattenAligned");
1554 
1555     if (!baa)
1556         return (BOXA *)ERROR_PTR("baa not defined", procName, NULL);
1557     if (copyflag != L_COPY && copyflag != L_CLONE)
1558         return (BOXA *)ERROR_PTR("invalid copyflag", procName, NULL);
1559 
1560     n = boxaaGetCount(baa);
1561     boxad = boxaCreate(n);
1562     for (i = 0; i < n; i++) {
1563         boxat = boxaaGetBoxa(baa, i, L_CLONE);
1564         m = boxaGetCount(boxat);
1565         mval = L_MIN(m, num);
1566         nshort = num - mval;
1567         for (j = 0; j < mval; j++) {  /* take the first %num if possible */
1568             box = boxaGetBox(boxat, j, copyflag);
1569             boxaAddBox(boxad, box, L_INSERT);
1570         }
1571         for (j = 0; j < nshort; j++) {  /* add fillers if necessary */
1572             if (fillerbox) {
1573                 boxaAddBox(boxad, fillerbox, L_COPY);
1574             } else {
1575                 box = boxCreate(0, 0, 0, 0);  /* invalid placeholder box */
1576                 boxaAddBox(boxad, box, L_INSERT);
1577             }
1578         }
1579         boxaDestroy(&boxat);
1580     }
1581 
1582     return boxad;
1583 }
1584 
1585 
1586 /*!
1587  * \brief   boxaEncapsulateAligned()
1588  *
1589  * \param[in]    boxa
1590  * \param[in]    num number put into each boxa in the baa
1591  * \param[in]    copyflag  L_COPY or L_CLONE
1592  * \return  baa, or NULL on error
1593  *
1594  * <pre>
1595  * Notes:
1596  *      (1) This puts %num boxes from the input %boxa into each of a
1597  *          set of boxa within an output baa.
1598  *      (2) This assumes that the boxes in %boxa are in sets of %num each.
1599  * </pre>
1600  */
1601 BOXAA *
boxaEncapsulateAligned(BOXA * boxa,l_int32 num,l_int32 copyflag)1602 boxaEncapsulateAligned(BOXA    *boxa,
1603                        l_int32  num,
1604                        l_int32  copyflag)
1605 {
1606 l_int32  i, j, n, nbaa, index;
1607 BOX     *box;
1608 BOXA    *boxat;
1609 BOXAA   *baa;
1610 
1611     PROCNAME("boxaEncapsulateAligned");
1612 
1613     if (!boxa)
1614         return (BOXAA *)ERROR_PTR("boxa not defined", procName, NULL);
1615     if (copyflag != L_COPY && copyflag != L_CLONE)
1616         return (BOXAA *)ERROR_PTR("invalid copyflag", procName, NULL);
1617 
1618     n = boxaGetCount(boxa);
1619     nbaa = n / num;
1620     if (num * nbaa != n)
1621         L_ERROR("inconsistent alignment: num doesn't divide n\n", procName);
1622     baa = boxaaCreate(nbaa);
1623     for (i = 0, index = 0; i < nbaa; i++) {
1624         boxat = boxaCreate(num);
1625         for (j = 0; j < num; j++, index++) {
1626             box = boxaGetBox(boxa, index, copyflag);
1627             boxaAddBox(boxat, box, L_INSERT);
1628         }
1629         boxaaAddBoxa(baa, boxat, L_INSERT);
1630     }
1631 
1632     return baa;
1633 }
1634 
1635 
1636 /*!
1637  * \brief   boxaaTranspose()
1638  *
1639  * \param[in]    baas
1640  * \return  baad, or NULL on error
1641  *
1642  * <pre>
1643  * Notes:
1644  *      (1) If you think of a boxaa as a 2D array of boxes that is accessed
1645  *          row major, then each row is represented by one of the boxa.
1646  *          This function creates a new boxaa related to the input boxaa
1647  *          as a column major traversal of the input boxaa.
1648  *      (2) For example, if %baas has 2 boxa, each with 10 boxes, then
1649  *          %baad will have 10 boxa, each with 2 boxes.
1650  *      (3) Require for this transpose operation that each boxa in
1651  *          %baas has the same number of boxes.  This operation is useful
1652  *          when the i-th boxes in each boxa are meaningfully related.
1653  * </pre>
1654  */
1655 BOXAA *
boxaaTranspose(BOXAA * baas)1656 boxaaTranspose(BOXAA  *baas)
1657 {
1658 l_int32   i, j, ny, nb, nbox;
1659 BOX      *box;
1660 BOXA     *boxa;
1661 BOXAA    *baad;
1662 
1663     PROCNAME("boxaaTranspose");
1664 
1665     if (!baas)
1666         return (BOXAA *)ERROR_PTR("baas not defined", procName, NULL);
1667     if ((ny = boxaaGetCount(baas)) == 0)
1668         return (BOXAA *)ERROR_PTR("baas empty", procName, NULL);
1669 
1670         /* Make sure that each boxa in baas has the same number of boxes */
1671     for (i = 0; i < ny; i++) {
1672         if ((boxa = boxaaGetBoxa(baas, i, L_CLONE)) == NULL)
1673             return (BOXAA *)ERROR_PTR("baas is missing a boxa", procName, NULL);
1674         nb = boxaGetCount(boxa);
1675         boxaDestroy(&boxa);
1676         if (i == 0)
1677             nbox = nb;
1678         else if (nb != nbox)
1679             return (BOXAA *)ERROR_PTR("boxa are not all the same size",
1680                                       procName, NULL);
1681     }
1682 
1683         /* baad[i][j] = baas[j][i] */
1684     baad = boxaaCreate(nbox);
1685     for (i = 0; i < nbox; i++) {
1686         boxa = boxaCreate(ny);
1687         for (j = 0; j < ny; j++) {
1688             box = boxaaGetBox(baas, j, i, L_COPY);
1689             boxaAddBox(boxa, box, L_INSERT);
1690         }
1691         boxaaAddBoxa(baad, boxa, L_INSERT);
1692     }
1693     return baad;
1694 }
1695 
1696 
1697 /*!
1698  * \brief   boxaaAlignBox()
1699  *
1700  * \param[in]    baa
1701  * \param[in]    box to be aligned with the bext boxa in the baa, if possible
1702  * \param[in]    delta amount by which consecutive components can miss
1703  *                     in overlap and still be included in the array
1704  * \param[out]   pindex index of boxa with best overlap, or if none match,
1705  *                      this is the index of the next boxa to be generated
1706  * \return  0 if OK, 1 on error
1707  *
1708  * <pre>
1709  * Notes:
1710  *      (1) This is not greedy.  It finds the boxa whose vertical
1711  *          extent has the closest overlap with the input box.
1712  * </pre>
1713  */
1714 l_int32
boxaaAlignBox(BOXAA * baa,BOX * box,l_int32 delta,l_int32 * pindex)1715 boxaaAlignBox(BOXAA    *baa,
1716               BOX      *box,
1717               l_int32   delta,
1718               l_int32  *pindex)
1719 {
1720 l_int32  i, n, m, y, yt, h, ht, ovlp, maxovlp, maxindex;
1721 BOX     *boxt;
1722 BOXA    *boxa;
1723 
1724     PROCNAME("boxaaAlignBox");
1725 
1726     if (pindex) *pindex = 0;
1727     if (!baa)
1728         return ERROR_INT("baa not defined", procName, 1);
1729     if (!box)
1730         return ERROR_INT("box not defined", procName, 1);
1731     if (!pindex)
1732         return ERROR_INT("&index not defined", procName, 1);
1733 
1734     n = boxaaGetCount(baa);
1735     boxGetGeometry(box, NULL, &y, NULL, &h);
1736     maxovlp = -10000000;
1737     for (i = 0; i < n; i++) {
1738         boxa = boxaaGetBoxa(baa, i, L_CLONE);
1739         if ((m = boxaGetCount(boxa)) == 0) {
1740             boxaDestroy(&boxa);
1741             L_WARNING("no boxes in boxa\n", procName);
1742             continue;
1743         }
1744         boxaGetExtent(boxa, NULL, NULL, &boxt);
1745         boxGetGeometry(boxt, NULL, &yt, NULL, &ht);
1746         boxDestroy(&boxt);
1747         boxaDestroy(&boxa);
1748 
1749             /* Overlap < 0 means the components do not overlap vertically */
1750         if (yt >= y)
1751             ovlp = y + h - 1 - yt;
1752         else
1753             ovlp = yt + ht - 1 - y;
1754         if (ovlp > maxovlp) {
1755             maxovlp = ovlp;
1756             maxindex = i;
1757         }
1758     }
1759 
1760     if (maxovlp + delta >= 0)
1761         *pindex = maxindex;
1762     else
1763         *pindex = n;
1764     return 0;
1765 }
1766