1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file bytearray.c
29 * <pre>
30 *
31 * Functions for handling byte arrays, in analogy with C++ 'strings'
32 *
33 * Creation, copy, clone, destruction
34 * L_BYTEA *l_byteaCreate()
35 * L_BYTEA *l_byteaInitFromMem()
36 * L_BYTEA *l_byteaInitFromFile()
37 * L_BYTEA *l_byteaInitFromStream()
38 * L_BYTEA *l_byteaCopy()
39 * L_BYTEA *l_byteaClone()
40 * void l_byteaDestroy()
41 *
42 * Accessors
43 * size_t l_byteaGetSize()
44 * l_uint8 *l_byteaGetData()
45 * l_uint8 *l_byteaCopyData()
46 *
47 * Appending
48 * l_int32 l_byteaAppendData()
49 * l_int32 l_byteaAppendString()
50 * static l_int32 l_byteaExtendArrayToSize()
51 *
52 * Join/Split
53 * l_int32 l_byteaJoin()
54 * l_int32 l_byteaSplit()
55 *
56 * Search
57 * l_int32 l_byteaFindEachSequence()
58 *
59 * Output to file
60 * l_int32 l_byteaWrite()
61 * l_int32 l_byteaWriteStream()
62 *
63 * The internal data array is always null-terminated, for ease of use
64 * in the event that it is an ascii string without null bytes.
65 * </pre>
66 */
67
68 #include <string.h>
69 #include "allheaders.h"
70
71 static const l_int32 INITIAL_ARRAYSIZE = 200; /* n'import quoi */
72
73 /* Static function */
74 static l_int32 l_byteaExtendArrayToSize(L_BYTEA *ba, size_t size);
75
76
77 /*---------------------------------------------------------------------*
78 * Creation, copy, clone, destruction *
79 *---------------------------------------------------------------------*/
80 /*!
81 * \brief l_byteaCreate()
82 *
83 * \param[in] nbytes determines initial size of data array
84 * \return l_bytea, or NULL on error
85 *
86 * <pre>
87 * Notes:
88 * (1) The allocated array is n + 1 bytes. This allows room
89 * for null termination.
90 * </pre>
91 */
92 L_BYTEA *
l_byteaCreate(size_t nbytes)93 l_byteaCreate(size_t nbytes)
94 {
95 L_BYTEA *ba;
96
97 PROCNAME("l_byteaCreate");
98
99 if (nbytes <= 0)
100 nbytes = INITIAL_ARRAYSIZE;
101
102 ba = (L_BYTEA *)LEPT_CALLOC(1, sizeof(L_BYTEA));
103 ba->data = (l_uint8 *)LEPT_CALLOC(nbytes + 1, sizeof(l_uint8));
104 if (!ba->data) {
105 l_byteaDestroy(&ba);
106 return (L_BYTEA *)ERROR_PTR("ba array not made", procName, NULL);
107 }
108 ba->nalloc = nbytes + 1;
109 ba->refcount = 1;
110 return ba;
111 }
112
113
114 /*!
115 * \brief l_byteaInitFromMem()
116 *
117 * \param[in] data to be copied to the array
118 * \param[in] size amount of data
119 * \return l_bytea, or NULL on error
120 */
121 L_BYTEA *
l_byteaInitFromMem(l_uint8 * data,size_t size)122 l_byteaInitFromMem(l_uint8 *data,
123 size_t size)
124 {
125 L_BYTEA *ba;
126
127 PROCNAME("l_byteaInitFromMem");
128
129 if (!data)
130 return (L_BYTEA *)ERROR_PTR("data not defined", procName, NULL);
131 if (size <= 0)
132 return (L_BYTEA *)ERROR_PTR("no bytes to initialize", procName, NULL);
133
134 if ((ba = l_byteaCreate(size)) == NULL)
135 return (L_BYTEA *)ERROR_PTR("ba not made", procName, NULL);
136 memcpy(ba->data, data, size);
137 ba->size = size;
138 return ba;
139 }
140
141
142 /*!
143 * \brief l_byteaInitFromFile()
144 *
145 * \param[in] fname
146 * \return l_bytea, or NULL on error
147 */
148 L_BYTEA *
l_byteaInitFromFile(const char * fname)149 l_byteaInitFromFile(const char *fname)
150 {
151 FILE *fp;
152 L_BYTEA *ba;
153
154 PROCNAME("l_byteaInitFromFile");
155
156 if (!fname)
157 return (L_BYTEA *)ERROR_PTR("fname not defined", procName, NULL);
158
159 if ((fp = fopenReadStream(fname)) == NULL)
160 return (L_BYTEA *)ERROR_PTR("file stream not opened", procName, NULL);
161 ba = l_byteaInitFromStream(fp);
162 fclose(fp);
163 if (!ba)
164 return (L_BYTEA *)ERROR_PTR("ba not made", procName, NULL);
165 return ba;
166 }
167
168
169 /*!
170 * \brief l_byteaInitFromStream()
171 *
172 * \param[in] fp file stream
173 * \return l_bytea, or NULL on error
174 */
175 L_BYTEA *
l_byteaInitFromStream(FILE * fp)176 l_byteaInitFromStream(FILE *fp)
177 {
178 l_uint8 *data;
179 size_t nbytes;
180 L_BYTEA *ba;
181
182 PROCNAME("l_byteaInitFromStream");
183
184 if (!fp)
185 return (L_BYTEA *)ERROR_PTR("stream not defined", procName, NULL);
186
187 if ((data = l_binaryReadStream(fp, &nbytes)) == NULL)
188 return (L_BYTEA *)ERROR_PTR("data not read", procName, NULL);
189 if ((ba = l_byteaCreate(nbytes)) == NULL) {
190 LEPT_FREE(data);
191 return (L_BYTEA *)ERROR_PTR("ba not made", procName, NULL);
192 }
193 memcpy(ba->data, data, nbytes);
194 ba->size = nbytes;
195 LEPT_FREE(data);
196 return ba;
197 }
198
199
200 /*!
201 * \brief l_byteaCopy()
202 *
203 * \param[in] bas source lba
204 * \param[in] copyflag L_COPY, L_CLONE
205 * \return clone or copy of bas, or NULL on error
206 *
207 * <pre>
208 * Notes:
209 * (1) If cloning, up the refcount and return a ptr to %bas.
210 * </pre>
211 */
212 L_BYTEA *
l_byteaCopy(L_BYTEA * bas,l_int32 copyflag)213 l_byteaCopy(L_BYTEA *bas,
214 l_int32 copyflag)
215 {
216 PROCNAME("l_byteaCopy");
217
218 if (!bas)
219 return (L_BYTEA *)ERROR_PTR("bas not defined", procName, NULL);
220
221 if (copyflag == L_CLONE) {
222 bas->refcount++;
223 return bas;
224 }
225
226 return l_byteaInitFromMem(bas->data, bas->size);
227 }
228
229
230 /*!
231 * \brief l_byteaDestroy()
232 *
233 * \param[in,out] pba will be set to null before returning
234 * \return void
235 *
236 * <pre>
237 * Notes:
238 * (1) Decrements the ref count and, if 0, destroys the lba.
239 * (2) Always nulls the input ptr.
240 * (3) If the data has been previously removed, the lba will
241 * have been nulled, so this will do nothing.
242 * </pre>
243 */
244 void
l_byteaDestroy(L_BYTEA ** pba)245 l_byteaDestroy(L_BYTEA **pba)
246 {
247 L_BYTEA *ba;
248
249 PROCNAME("l_byteaDestroy");
250
251 if (pba == NULL) {
252 L_WARNING("ptr address is null!\n", procName);
253 return;
254 }
255
256 if ((ba = *pba) == NULL)
257 return;
258
259 /* Decrement the ref count. If it is 0, destroy the lba. */
260 ba->refcount--;
261 if (ba->refcount <= 0) {
262 if (ba->data) LEPT_FREE(ba->data);
263 LEPT_FREE(ba);
264 }
265
266 *pba = NULL;
267 return;
268 }
269
270
271 /*---------------------------------------------------------------------*
272 * Accessors *
273 *---------------------------------------------------------------------*/
274 /*!
275 * \brief l_byteaGetSize()
276 *
277 * \param[in] ba
278 * \return size of stored byte array, or 0 on error
279 */
280 size_t
l_byteaGetSize(L_BYTEA * ba)281 l_byteaGetSize(L_BYTEA *ba)
282 {
283 PROCNAME("l_byteaGetSize");
284
285 if (!ba)
286 return ERROR_INT("ba not defined", procName, 0);
287 return ba->size;
288 }
289
290
291 /*!
292 * \brief l_byteaGetData()
293 *
294 * \param[in] ba
295 * \param[out] psize size of data in lba
296 * \return ptr to existing data array, or NULL on error
297 *
298 * <pre>
299 * Notes:
300 * (1) The returned ptr is owned by %ba. Do not free it!
301 * </pre>
302 */
303 l_uint8 *
l_byteaGetData(L_BYTEA * ba,size_t * psize)304 l_byteaGetData(L_BYTEA *ba,
305 size_t *psize)
306 {
307 PROCNAME("l_byteaGetData");
308
309 if (!ba)
310 return (l_uint8 *)ERROR_PTR("ba not defined", procName, NULL);
311 if (!psize)
312 return (l_uint8 *)ERROR_PTR("&size not defined", procName, NULL);
313
314 *psize = ba->size;
315 return ba->data;
316 }
317
318
319 /*!
320 * \brief l_byteaCopyData()
321 *
322 * \param[in] ba
323 * \param[out] psize size of data in lba
324 * \return copy of data in use in the data array, or NULL on error.
325 *
326 * <pre>
327 * Notes:
328 * (1) The returned data is owned by the caller. The input %ba
329 * still owns the original data array.
330 * </pre>
331 */
332 l_uint8 *
l_byteaCopyData(L_BYTEA * ba,size_t * psize)333 l_byteaCopyData(L_BYTEA *ba,
334 size_t *psize)
335 {
336 l_uint8 *data;
337
338 PROCNAME("l_byteaCopyData");
339
340 if (!psize)
341 return (l_uint8 *)ERROR_PTR("&size not defined", procName, NULL);
342 *psize = 0;
343 if (!ba)
344 return (l_uint8 *)ERROR_PTR("ba not defined", procName, NULL);
345
346 data = l_byteaGetData(ba, psize);
347 return l_binaryCopy(data, *psize);
348 }
349
350
351 /*---------------------------------------------------------------------*
352 * Appending *
353 *---------------------------------------------------------------------*/
354 /*!
355 * \brief l_byteaAppendData()
356 *
357 * \param[in] ba
358 * \param[in] newdata byte array to be appended
359 * \param[in] newbytes size of data array
360 * \return 0 if OK, 1 on error
361 */
362 l_int32
l_byteaAppendData(L_BYTEA * ba,l_uint8 * newdata,size_t newbytes)363 l_byteaAppendData(L_BYTEA *ba,
364 l_uint8 *newdata,
365 size_t newbytes)
366 {
367 size_t size, nalloc, reqsize;
368
369 PROCNAME("l_byteaAppendData");
370
371 if (!ba)
372 return ERROR_INT("ba not defined", procName, 1);
373 if (!newdata)
374 return ERROR_INT("newdata not defined", procName, 1);
375
376 size = l_byteaGetSize(ba);
377 reqsize = size + newbytes + 1;
378 nalloc = ba->nalloc;
379 if (nalloc < reqsize)
380 l_byteaExtendArrayToSize(ba, 2 * reqsize);
381
382 memcpy((char *)(ba->data + size), (char *)newdata, newbytes);
383 ba->size += newbytes;
384 return 0;
385 }
386
387
388 /*!
389 * \brief l_byteaAppendString()
390 *
391 * \param[in] ba
392 * \param[in] str null-terminated string to be appended
393 * \return 0 if OK, 1 on error
394 */
395 l_int32
l_byteaAppendString(L_BYTEA * ba,char * str)396 l_byteaAppendString(L_BYTEA *ba,
397 char *str)
398 {
399 size_t size, len, nalloc, reqsize;
400
401 PROCNAME("l_byteaAppendString");
402
403 if (!ba)
404 return ERROR_INT("ba not defined", procName, 1);
405 if (!str)
406 return ERROR_INT("str not defined", procName, 1);
407
408 size = l_byteaGetSize(ba);
409 len = strlen(str);
410 reqsize = size + len + 1;
411 nalloc = ba->nalloc;
412 if (nalloc < reqsize)
413 l_byteaExtendArrayToSize(ba, 2 * reqsize);
414
415 memcpy(ba->data + size, str, len);
416 ba->size += len;
417 return 0;
418 }
419
420
421 /*!
422 * \brief l_byteaExtendArrayToSize()
423 *
424 * \param[in] ba
425 * \param[in] size new size of lba data array
426 * \return 0 if OK; 1 on error
427 */
428 static l_int32
l_byteaExtendArrayToSize(L_BYTEA * ba,size_t size)429 l_byteaExtendArrayToSize(L_BYTEA *ba,
430 size_t size)
431 {
432 PROCNAME("l_byteaExtendArrayToSize");
433
434 if (!ba)
435 return ERROR_INT("ba not defined", procName, 1);
436
437 if (size > ba->nalloc) {
438 if ((ba->data =
439 (l_uint8 *)reallocNew((void **)&ba->data, ba->nalloc, size))
440 == NULL)
441 return ERROR_INT("new array not returned", procName, 1);
442 ba->nalloc = size;
443 }
444 return 0;
445 }
446
447
448 /*---------------------------------------------------------------------*
449 * String join/split *
450 *---------------------------------------------------------------------*/
451 /*!
452 * \brief l_byteaJoin()
453 *
454 * \param[in] ba1
455 * \param[in,out] pba2 data array is added to the one in ba1, and
456 * then ba2 is destroyed
457 * \return 0 if OK, 1 on error
458 *
459 * <pre>
460 * Notes:
461 * (1) It is a no-op, not an error, for %ba2 to be null.
462 * </pre>
463 */
464 l_int32
l_byteaJoin(L_BYTEA * ba1,L_BYTEA ** pba2)465 l_byteaJoin(L_BYTEA *ba1,
466 L_BYTEA **pba2)
467 {
468 l_uint8 *data2;
469 size_t nbytes2;
470 L_BYTEA *ba2;
471
472 PROCNAME("l_byteaJoin");
473
474 if (!ba1)
475 return ERROR_INT("ba1 not defined", procName, 1);
476 if (!pba2)
477 return ERROR_INT("&ba2 not defined", procName, 1);
478 if ((ba2 = *pba2) == NULL) return 0;
479
480 data2 = l_byteaGetData(ba2, &nbytes2);
481 l_byteaAppendData(ba1, data2, nbytes2);
482
483 l_byteaDestroy(pba2);
484 return 0;
485 }
486
487
488 /*!
489 * \brief l_byteaSplit()
490 *
491 * \param[in] ba1 lba to split; array bytes nulled beyond the split loc
492 * \param[in] splitloc location in ba1 to split; ba2 begins there
493 * \param[out] pba2 with data starting at splitloc
494 * \return 0 if OK, 1 on error
495 */
496 l_int32
l_byteaSplit(L_BYTEA * ba1,size_t splitloc,L_BYTEA ** pba2)497 l_byteaSplit(L_BYTEA *ba1,
498 size_t splitloc,
499 L_BYTEA **pba2)
500 {
501 l_uint8 *data1;
502 size_t nbytes1, nbytes2;
503
504 PROCNAME("l_byteaSplit");
505
506 if (!pba2)
507 return ERROR_INT("&ba2 not defined", procName, 1);
508 *pba2 = NULL;
509 if (!ba1)
510 return ERROR_INT("ba1 not defined", procName, 1);
511
512 data1 = l_byteaGetData(ba1, &nbytes1);
513 if (splitloc >= nbytes1)
514 return ERROR_INT("splitloc invalid", procName, 1);
515 nbytes2 = nbytes1 - splitloc;
516
517 /* Make the new lba */
518 *pba2 = l_byteaInitFromMem(data1 + splitloc, nbytes2);
519
520 /* Null the removed bytes in the input lba */
521 memset(data1 + splitloc, 0, nbytes2);
522 ba1->size = splitloc;
523 return 0;
524 }
525
526
527 /*---------------------------------------------------------------------*
528 * Search *
529 *---------------------------------------------------------------------*/
530 /*!
531 * \brief l_byteaFindEachSequence()
532 *
533 * \param[in] ba
534 * \param[in] sequence subarray of bytes to find in data
535 * \param[in] seqlen length of sequence, in bytes
536 * \param[out] pda byte positions of each occurrence of %sequence
537 * \return 0 if OK, 1 on error
538 */
539 l_int32
l_byteaFindEachSequence(L_BYTEA * ba,l_uint8 * sequence,l_int32 seqlen,L_DNA ** pda)540 l_byteaFindEachSequence(L_BYTEA *ba,
541 l_uint8 *sequence,
542 l_int32 seqlen,
543 L_DNA **pda)
544 {
545 l_uint8 *data;
546 size_t size;
547
548 PROCNAME("l_byteaFindEachSequence");
549
550 if (!pda)
551 return ERROR_INT("&da not defined", procName, 1);
552 *pda = NULL;
553 if (!ba)
554 return ERROR_INT("ba not defined", procName, 1);
555 if (!sequence)
556 return ERROR_INT("sequence not defined", procName, 1);
557
558 data = l_byteaGetData(ba, &size);
559 *pda = arrayFindEachSequence(data, size, sequence, seqlen);
560 return 0;
561 }
562
563
564 /*---------------------------------------------------------------------*
565 * Output to file *
566 *---------------------------------------------------------------------*/
567 /*!
568 * \brief l_byteaWrite()
569 *
570 * \param[in] fname output file
571 * \param[in] ba
572 * \param[in] startloc first byte to output
573 * \param[in] endloc last byte to output; use 0 to write to the
574 * end of the data array
575 * \return 0 if OK, 1 on error
576 */
577 l_int32
l_byteaWrite(const char * fname,L_BYTEA * ba,size_t startloc,size_t endloc)578 l_byteaWrite(const char *fname,
579 L_BYTEA *ba,
580 size_t startloc,
581 size_t endloc)
582 {
583 l_int32 ret;
584 FILE *fp;
585
586 PROCNAME("l_byteaWrite");
587
588 if (!fname)
589 return ERROR_INT("fname not defined", procName, 1);
590 if (!ba)
591 return ERROR_INT("ba not defined", procName, 1);
592
593 if ((fp = fopenWriteStream(fname, "wb")) == NULL)
594 return ERROR_INT("stream not opened", procName, 1);
595 ret = l_byteaWriteStream(fp, ba, startloc, endloc);
596 fclose(fp);
597 return ret;
598 }
599
600
601 /*!
602 * \brief l_byteaWriteStream()
603 *
604 * \param[in] fp file stream opened for binary write
605 * \param[in] ba
606 * \param[in] startloc first byte to output
607 * \param[in] endloc last byte to output; use 0 to write to the
608 * end of the data array
609 * \return 0 if OK, 1 on error
610 */
611 l_int32
l_byteaWriteStream(FILE * fp,L_BYTEA * ba,size_t startloc,size_t endloc)612 l_byteaWriteStream(FILE *fp,
613 L_BYTEA *ba,
614 size_t startloc,
615 size_t endloc)
616 {
617 l_uint8 *data;
618 size_t size, nbytes;
619
620 PROCNAME("l_byteaWriteStream");
621
622 if (!fp)
623 return ERROR_INT("stream not defined", procName, 1);
624 if (!ba)
625 return ERROR_INT("ba not defined", procName, 1);
626
627 data = l_byteaGetData(ba, &size);
628 if (startloc >= size)
629 return ERROR_INT("invalid startloc", procName, 1);
630 if (endloc == 0) endloc = size - 1;
631 nbytes = endloc - startloc + 1;
632 if (nbytes < 1)
633 return ERROR_INT("endloc must be >= startloc", procName, 1);
634
635 fwrite(data + startloc, 1, nbytes, fp);
636 return 0;
637 }
638