1 /*====================================================================*
2  -  Copyright (C) 2001 Leptonica.  All rights reserved.
3  -
4  -  Redistribution and use in source and binary forms, with or without
5  -  modification, are permitted provided that the following conditions
6  -  are met:
7  -  1. Redistributions of source code must retain the above copyright
8  -     notice, this list of conditions and the following disclaimer.
9  -  2. Redistributions in binary form must reproduce the above
10  -     copyright notice, this list of conditions and the following
11  -     disclaimer in the documentation and/or other materials
12  -     provided with the distribution.
13  -
14  -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
18  -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
27 /*!
28  * \file  bytearray.c
29  * <pre>
30  *
31  *   Functions for handling byte arrays, in analogy with C++ 'strings'
32  *
33  *      Creation, copy, clone, destruction
34  *           L_BYTEA      *l_byteaCreate()
35  *           L_BYTEA      *l_byteaInitFromMem()
36  *           L_BYTEA      *l_byteaInitFromFile()
37  *           L_BYTEA      *l_byteaInitFromStream()
38  *           L_BYTEA      *l_byteaCopy()
39  *           L_BYTEA      *l_byteaClone()
40  *           void          l_byteaDestroy()
41  *
42  *      Accessors
43  *           size_t        l_byteaGetSize()
44  *           l_uint8      *l_byteaGetData()
45  *           l_uint8      *l_byteaCopyData()
46  *
47  *      Appending
48  *           l_int32       l_byteaAppendData()
49  *           l_int32       l_byteaAppendString()
50  *           static l_int32  l_byteaExtendArrayToSize()
51  *
52  *      Join/Split
53  *           l_int32       l_byteaJoin()
54  *           l_int32       l_byteaSplit()
55  *
56  *      Search
57  *           l_int32       l_byteaFindEachSequence()
58  *
59  *      Output to file
60  *           l_int32       l_byteaWrite()
61  *           l_int32       l_byteaWriteStream()
62  *
63  *   The internal data array is always null-terminated, for ease of use
64  *   in the event that it is an ascii string without null bytes.
65  * </pre>
66  */
67 
68 #include <string.h>
69 #include "allheaders.h"
70 
71 static const l_int32  INITIAL_ARRAYSIZE = 200;   /* n'import quoi */
72 
73     /* Static function */
74 static l_int32 l_byteaExtendArrayToSize(L_BYTEA *ba, size_t size);
75 
76 
77 /*---------------------------------------------------------------------*
78  *                  Creation, copy, clone, destruction                 *
79  *---------------------------------------------------------------------*/
80 /*!
81  * \brief   l_byteaCreate()
82  *
83  * \param[in]    nbytes determines initial size of data array
84  * \return  l_bytea, or NULL on error
85  *
86  * <pre>
87  * Notes:
88  *      (1) The allocated array is n + 1 bytes.  This allows room
89  *          for null termination.
90  * </pre>
91  */
92 L_BYTEA *
l_byteaCreate(size_t nbytes)93 l_byteaCreate(size_t  nbytes)
94 {
95 L_BYTEA  *ba;
96 
97     PROCNAME("l_byteaCreate");
98 
99     if (nbytes <= 0)
100         nbytes = INITIAL_ARRAYSIZE;
101 
102     ba = (L_BYTEA *)LEPT_CALLOC(1, sizeof(L_BYTEA));
103     ba->data = (l_uint8 *)LEPT_CALLOC(nbytes + 1, sizeof(l_uint8));
104     if (!ba->data) {
105         l_byteaDestroy(&ba);
106         return (L_BYTEA *)ERROR_PTR("ba array not made", procName, NULL);
107     }
108     ba->nalloc = nbytes + 1;
109     ba->refcount = 1;
110     return ba;
111 }
112 
113 
114 /*!
115  * \brief   l_byteaInitFromMem()
116  *
117  * \param[in]    data to be copied to the array
118  * \param[in]    size amount of data
119  * \return  l_bytea, or NULL on error
120  */
121 L_BYTEA *
l_byteaInitFromMem(l_uint8 * data,size_t size)122 l_byteaInitFromMem(l_uint8  *data,
123                    size_t    size)
124 {
125 L_BYTEA  *ba;
126 
127     PROCNAME("l_byteaInitFromMem");
128 
129     if (!data)
130         return (L_BYTEA *)ERROR_PTR("data not defined", procName, NULL);
131     if (size <= 0)
132         return (L_BYTEA *)ERROR_PTR("no bytes to initialize", procName, NULL);
133 
134     if ((ba = l_byteaCreate(size)) == NULL)
135         return (L_BYTEA *)ERROR_PTR("ba not made", procName, NULL);
136     memcpy(ba->data, data, size);
137     ba->size = size;
138     return ba;
139 }
140 
141 
142 /*!
143  * \brief   l_byteaInitFromFile()
144  *
145  * \param[in]    fname
146  * \return  l_bytea, or NULL on error
147  */
148 L_BYTEA *
l_byteaInitFromFile(const char * fname)149 l_byteaInitFromFile(const char  *fname)
150 {
151 FILE     *fp;
152 L_BYTEA  *ba;
153 
154     PROCNAME("l_byteaInitFromFile");
155 
156     if (!fname)
157         return (L_BYTEA *)ERROR_PTR("fname not defined", procName, NULL);
158 
159     if ((fp = fopenReadStream(fname)) == NULL)
160         return (L_BYTEA *)ERROR_PTR("file stream not opened", procName, NULL);
161     ba = l_byteaInitFromStream(fp);
162     fclose(fp);
163     if (!ba)
164         return (L_BYTEA *)ERROR_PTR("ba not made", procName, NULL);
165     return ba;
166 }
167 
168 
169 /*!
170  * \brief   l_byteaInitFromStream()
171  *
172  * \param[in]    fp file stream
173  * \return  l_bytea, or NULL on error
174  */
175 L_BYTEA *
l_byteaInitFromStream(FILE * fp)176 l_byteaInitFromStream(FILE  *fp)
177 {
178 l_uint8  *data;
179 size_t    nbytes;
180 L_BYTEA  *ba;
181 
182     PROCNAME("l_byteaInitFromStream");
183 
184     if (!fp)
185         return (L_BYTEA *)ERROR_PTR("stream not defined", procName, NULL);
186 
187     if ((data = l_binaryReadStream(fp, &nbytes)) == NULL)
188         return (L_BYTEA *)ERROR_PTR("data not read", procName, NULL);
189     if ((ba = l_byteaCreate(nbytes)) == NULL) {
190         LEPT_FREE(data);
191         return (L_BYTEA *)ERROR_PTR("ba not made", procName, NULL);
192     }
193     memcpy(ba->data, data, nbytes);
194     ba->size = nbytes;
195     LEPT_FREE(data);
196     return ba;
197 }
198 
199 
200 /*!
201  * \brief   l_byteaCopy()
202  *
203  * \param[in]    bas  source lba
204  * \param[in]    copyflag L_COPY, L_CLONE
205  * \return  clone or copy of bas, or NULL on error
206  *
207  * <pre>
208  * Notes:
209  *      (1) If cloning, up the refcount and return a ptr to %bas.
210  * </pre>
211  */
212 L_BYTEA *
l_byteaCopy(L_BYTEA * bas,l_int32 copyflag)213 l_byteaCopy(L_BYTEA  *bas,
214             l_int32   copyflag)
215 {
216     PROCNAME("l_byteaCopy");
217 
218     if (!bas)
219         return (L_BYTEA *)ERROR_PTR("bas not defined", procName, NULL);
220 
221     if (copyflag == L_CLONE) {
222         bas->refcount++;
223         return bas;
224     }
225 
226     return l_byteaInitFromMem(bas->data, bas->size);
227 }
228 
229 
230 /*!
231  * \brief   l_byteaDestroy()
232  *
233  * \param[in,out]   pba will be set to null before returning
234  * \return  void
235  *
236  * <pre>
237  * Notes:
238  *      (1) Decrements the ref count and, if 0, destroys the lba.
239  *      (2) Always nulls the input ptr.
240  *      (3) If the data has been previously removed, the lba will
241  *          have been nulled, so this will do nothing.
242  * </pre>
243  */
244 void
l_byteaDestroy(L_BYTEA ** pba)245 l_byteaDestroy(L_BYTEA  **pba)
246 {
247 L_BYTEA  *ba;
248 
249     PROCNAME("l_byteaDestroy");
250 
251     if (pba == NULL) {
252         L_WARNING("ptr address is null!\n", procName);
253         return;
254     }
255 
256     if ((ba = *pba) == NULL)
257         return;
258 
259         /* Decrement the ref count.  If it is 0, destroy the lba. */
260     ba->refcount--;
261     if (ba->refcount <= 0) {
262         if (ba->data) LEPT_FREE(ba->data);
263         LEPT_FREE(ba);
264     }
265 
266     *pba = NULL;
267     return;
268 }
269 
270 
271 /*---------------------------------------------------------------------*
272  *                               Accessors                             *
273  *---------------------------------------------------------------------*/
274 /*!
275  * \brief   l_byteaGetSize()
276  *
277  * \param[in]    ba
278  * \return  size of stored byte array, or 0 on error
279  */
280 size_t
l_byteaGetSize(L_BYTEA * ba)281 l_byteaGetSize(L_BYTEA  *ba)
282 {
283     PROCNAME("l_byteaGetSize");
284 
285     if (!ba)
286         return ERROR_INT("ba not defined", procName, 0);
287     return ba->size;
288 }
289 
290 
291 /*!
292  * \brief   l_byteaGetData()
293  *
294  * \param[in]    ba
295  * \param[out]   psize size of data in lba
296  * \return  ptr to existing data array, or NULL on error
297  *
298  * <pre>
299  * Notes:
300  *      (1) The returned ptr is owned by %ba.  Do not free it!
301  * </pre>
302  */
303 l_uint8 *
l_byteaGetData(L_BYTEA * ba,size_t * psize)304 l_byteaGetData(L_BYTEA  *ba,
305                size_t   *psize)
306 {
307     PROCNAME("l_byteaGetData");
308 
309     if (!ba)
310         return (l_uint8 *)ERROR_PTR("ba not defined", procName, NULL);
311     if (!psize)
312         return (l_uint8 *)ERROR_PTR("&size not defined", procName, NULL);
313 
314     *psize = ba->size;
315     return ba->data;
316 }
317 
318 
319 /*!
320  * \brief   l_byteaCopyData()
321  *
322  * \param[in]    ba
323  * \param[out]   psize size of data in lba
324  * \return  copy of data in use in the data array, or NULL on error.
325  *
326  * <pre>
327  * Notes:
328  *      (1) The returned data is owned by the caller.  The input %ba
329  *          still owns the original data array.
330  * </pre>
331  */
332 l_uint8 *
l_byteaCopyData(L_BYTEA * ba,size_t * psize)333 l_byteaCopyData(L_BYTEA  *ba,
334                 size_t   *psize)
335 {
336 l_uint8  *data;
337 
338     PROCNAME("l_byteaCopyData");
339 
340     if (!psize)
341         return (l_uint8 *)ERROR_PTR("&size not defined", procName, NULL);
342     *psize = 0;
343     if (!ba)
344         return (l_uint8 *)ERROR_PTR("ba not defined", procName, NULL);
345 
346     data = l_byteaGetData(ba, psize);
347     return l_binaryCopy(data, *psize);
348 }
349 
350 
351 /*---------------------------------------------------------------------*
352  *                               Appending                             *
353  *---------------------------------------------------------------------*/
354 /*!
355  * \brief   l_byteaAppendData()
356  *
357  * \param[in]    ba
358  * \param[in]    newdata byte array to be appended
359  * \param[in]    newbytes size of data array
360  * \return  0 if OK, 1 on error
361  */
362 l_int32
l_byteaAppendData(L_BYTEA * ba,l_uint8 * newdata,size_t newbytes)363 l_byteaAppendData(L_BYTEA  *ba,
364                   l_uint8  *newdata,
365                   size_t    newbytes)
366 {
367 size_t  size, nalloc, reqsize;
368 
369     PROCNAME("l_byteaAppendData");
370 
371     if (!ba)
372         return ERROR_INT("ba not defined", procName, 1);
373     if (!newdata)
374         return ERROR_INT("newdata not defined", procName, 1);
375 
376     size = l_byteaGetSize(ba);
377     reqsize = size + newbytes + 1;
378     nalloc = ba->nalloc;
379     if (nalloc < reqsize)
380         l_byteaExtendArrayToSize(ba, 2 * reqsize);
381 
382     memcpy((char *)(ba->data + size), (char *)newdata, newbytes);
383     ba->size += newbytes;
384     return 0;
385 }
386 
387 
388 /*!
389  * \brief   l_byteaAppendString()
390  *
391  * \param[in]    ba
392  * \param[in]    str null-terminated string to be appended
393  * \return  0 if OK, 1 on error
394  */
395 l_int32
l_byteaAppendString(L_BYTEA * ba,char * str)396 l_byteaAppendString(L_BYTEA  *ba,
397                     char     *str)
398 {
399 size_t  size, len, nalloc, reqsize;
400 
401     PROCNAME("l_byteaAppendString");
402 
403     if (!ba)
404         return ERROR_INT("ba not defined", procName, 1);
405     if (!str)
406         return ERROR_INT("str not defined", procName, 1);
407 
408     size = l_byteaGetSize(ba);
409     len = strlen(str);
410     reqsize = size + len + 1;
411     nalloc = ba->nalloc;
412     if (nalloc < reqsize)
413         l_byteaExtendArrayToSize(ba, 2 * reqsize);
414 
415     memcpy(ba->data + size, str, len);
416     ba->size += len;
417     return 0;
418 }
419 
420 
421 /*!
422  * \brief   l_byteaExtendArrayToSize()
423  *
424  * \param[in]    ba
425  * \param[in]    size new size of lba data array
426  * \return  0 if OK; 1 on error
427  */
428 static l_int32
l_byteaExtendArrayToSize(L_BYTEA * ba,size_t size)429 l_byteaExtendArrayToSize(L_BYTEA  *ba,
430                          size_t    size)
431 {
432     PROCNAME("l_byteaExtendArrayToSize");
433 
434     if (!ba)
435         return ERROR_INT("ba not defined", procName, 1);
436 
437     if (size > ba->nalloc) {
438         if ((ba->data =
439             (l_uint8 *)reallocNew((void **)&ba->data, ba->nalloc, size))
440                  == NULL)
441             return ERROR_INT("new array not returned", procName, 1);
442         ba->nalloc = size;
443     }
444     return 0;
445 }
446 
447 
448 /*---------------------------------------------------------------------*
449  *                        String join/split                            *
450  *---------------------------------------------------------------------*/
451 /*!
452  * \brief   l_byteaJoin()
453  *
454  * \param[in]       ba1
455  * \param[in,out]   pba2 data array is added to the one in ba1, and
456  *                       then ba2 is destroyed
457  * \return  0 if OK, 1 on error
458  *
459  * <pre>
460  * Notes:
461  *      (1) It is a no-op, not an error, for %ba2 to be null.
462  * </pre>
463  */
464 l_int32
l_byteaJoin(L_BYTEA * ba1,L_BYTEA ** pba2)465 l_byteaJoin(L_BYTEA   *ba1,
466             L_BYTEA  **pba2)
467 {
468 l_uint8  *data2;
469 size_t    nbytes2;
470 L_BYTEA  *ba2;
471 
472     PROCNAME("l_byteaJoin");
473 
474     if (!ba1)
475         return ERROR_INT("ba1 not defined", procName, 1);
476     if (!pba2)
477         return ERROR_INT("&ba2 not defined", procName, 1);
478     if ((ba2 = *pba2) == NULL) return 0;
479 
480     data2 = l_byteaGetData(ba2, &nbytes2);
481     l_byteaAppendData(ba1, data2, nbytes2);
482 
483     l_byteaDestroy(pba2);
484     return 0;
485 }
486 
487 
488 /*!
489  * \brief   l_byteaSplit()
490  *
491  * \param[in]    ba1 lba to split; array bytes nulled beyond the split loc
492  * \param[in]    splitloc location in ba1 to split; ba2 begins there
493  * \param[out]   pba2 with data starting at splitloc
494  * \return  0 if OK, 1 on error
495  */
496 l_int32
l_byteaSplit(L_BYTEA * ba1,size_t splitloc,L_BYTEA ** pba2)497 l_byteaSplit(L_BYTEA   *ba1,
498              size_t     splitloc,
499              L_BYTEA  **pba2)
500 {
501 l_uint8  *data1;
502 size_t    nbytes1, nbytes2;
503 
504     PROCNAME("l_byteaSplit");
505 
506     if (!pba2)
507         return ERROR_INT("&ba2 not defined", procName, 1);
508     *pba2 = NULL;
509     if (!ba1)
510         return ERROR_INT("ba1 not defined", procName, 1);
511 
512     data1 = l_byteaGetData(ba1, &nbytes1);
513     if (splitloc >= nbytes1)
514         return ERROR_INT("splitloc invalid", procName, 1);
515     nbytes2 = nbytes1 - splitloc;
516 
517         /* Make the new lba */
518     *pba2 = l_byteaInitFromMem(data1 + splitloc, nbytes2);
519 
520         /* Null the removed bytes in the input lba */
521     memset(data1 + splitloc, 0, nbytes2);
522     ba1->size = splitloc;
523     return 0;
524 }
525 
526 
527 /*---------------------------------------------------------------------*
528  *                                Search                               *
529  *---------------------------------------------------------------------*/
530 /*!
531  * \brief   l_byteaFindEachSequence()
532  *
533  * \param[in]    ba
534  * \param[in]    sequence subarray of bytes to find in data
535  * \param[in]    seqlen length of sequence, in bytes
536  * \param[out]   pda byte positions of each occurrence of %sequence
537  * \return  0 if OK, 1 on error
538  */
539 l_int32
l_byteaFindEachSequence(L_BYTEA * ba,l_uint8 * sequence,l_int32 seqlen,L_DNA ** pda)540 l_byteaFindEachSequence(L_BYTEA   *ba,
541                         l_uint8   *sequence,
542                         l_int32    seqlen,
543                         L_DNA    **pda)
544 {
545 l_uint8  *data;
546 size_t    size;
547 
548     PROCNAME("l_byteaFindEachSequence");
549 
550     if (!pda)
551         return ERROR_INT("&da not defined", procName, 1);
552     *pda = NULL;
553     if (!ba)
554         return ERROR_INT("ba not defined", procName, 1);
555     if (!sequence)
556         return ERROR_INT("sequence not defined", procName, 1);
557 
558     data = l_byteaGetData(ba, &size);
559     *pda = arrayFindEachSequence(data, size, sequence, seqlen);
560     return 0;
561 }
562 
563 
564 /*---------------------------------------------------------------------*
565  *                              Output to file                         *
566  *---------------------------------------------------------------------*/
567 /*!
568  * \brief   l_byteaWrite()
569  *
570  * \param[in]    fname output file
571  * \param[in]    ba
572  * \param[in]    startloc first byte to output
573  * \param[in]    endloc last byte to output; use 0 to write to the
574  *                      end of the data array
575  * \return  0 if OK, 1 on error
576  */
577 l_int32
l_byteaWrite(const char * fname,L_BYTEA * ba,size_t startloc,size_t endloc)578 l_byteaWrite(const char  *fname,
579              L_BYTEA     *ba,
580              size_t       startloc,
581              size_t       endloc)
582 {
583 l_int32  ret;
584 FILE    *fp;
585 
586     PROCNAME("l_byteaWrite");
587 
588     if (!fname)
589         return ERROR_INT("fname not defined", procName, 1);
590     if (!ba)
591         return ERROR_INT("ba not defined", procName, 1);
592 
593     if ((fp = fopenWriteStream(fname, "wb")) == NULL)
594         return ERROR_INT("stream not opened", procName, 1);
595     ret = l_byteaWriteStream(fp, ba, startloc, endloc);
596     fclose(fp);
597     return ret;
598 }
599 
600 
601 /*!
602  * \brief   l_byteaWriteStream()
603  *
604  * \param[in]    fp file stream opened for binary write
605  * \param[in]    ba
606  * \param[in]    startloc first byte to output
607  * \param[in]    endloc last byte to output; use 0 to write to the
608  *                      end of the data array
609  * \return  0 if OK, 1 on error
610  */
611 l_int32
l_byteaWriteStream(FILE * fp,L_BYTEA * ba,size_t startloc,size_t endloc)612 l_byteaWriteStream(FILE     *fp,
613                    L_BYTEA  *ba,
614                    size_t    startloc,
615                    size_t    endloc)
616 {
617 l_uint8  *data;
618 size_t    size, nbytes;
619 
620     PROCNAME("l_byteaWriteStream");
621 
622     if (!fp)
623         return ERROR_INT("stream not defined", procName, 1);
624     if (!ba)
625         return ERROR_INT("ba not defined", procName, 1);
626 
627     data = l_byteaGetData(ba, &size);
628     if (startloc >= size)
629         return ERROR_INT("invalid startloc", procName, 1);
630     if (endloc == 0) endloc = size - 1;
631     nbytes = endloc - startloc + 1;
632     if (nbytes < 1)
633         return ERROR_INT("endloc must be >= startloc", procName, 1);
634 
635     fwrite(data + startloc, 1, nbytes, fp);
636     return 0;
637 }
638