1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file utils2.c
29 * <pre>
30 *
31 * Safe string procs
32 * char *stringNew()
33 * l_int32 stringCopy()
34 * l_int32 stringReplace()
35 * l_int32 stringLength()
36 * l_int32 stringCat()
37 * char *stringConcatNew()
38 * char *stringJoin()
39 * l_int32 stringJoinIP()
40 * char *stringReverse()
41 * char *strtokSafe()
42 * l_int32 stringSplitOnToken()
43 *
44 * Find and replace string and array procs
45 * l_int32 stringCheckForChars()
46 * char *stringRemoveChars()
47 * l_int32 stringFindSubstr()
48 * char *stringReplaceSubstr()
49 * char *stringReplaceEachSubstr()
50 * L_DNA *arrayFindEachSequence()
51 * l_int32 arrayFindSequence()
52 *
53 * Safe realloc
54 * void *reallocNew()
55 *
56 * Read and write between file and memory
57 * l_uint8 *l_binaryRead()
58 * l_uint8 *l_binaryReadStream()
59 * l_uint8 *l_binaryReadSelect()
60 * l_uint8 *l_binaryReadSelectStream()
61 * l_int32 l_binaryWrite()
62 * l_int32 nbytesInFile()
63 * l_int32 fnbytesInFile()
64 *
65 * Copy in memory
66 * l_uint8 *l_binaryCopy()
67 *
68 * File copy operations
69 * l_int32 fileCopy()
70 * l_int32 fileConcatenate()
71 * l_int32 fileAppendString()
72 *
73 * Multi-platform functions for opening file streams
74 * FILE *fopenReadStream()
75 * FILE *fopenWriteStream()
76 * FILE *fopenReadFromMemory()
77 *
78 * Opening a windows tmpfile for writing
79 * FILE *fopenWriteWinTempfile()
80 *
81 * Multi-platform functions that avoid C-runtime boundary crossing
82 * with Windows DLLs
83 * FILE *lept_fopen()
84 * l_int32 lept_fclose()
85 * void lept_calloc()
86 * void lept_free()
87 *
88 * Multi-platform file system operations in temp directories
89 * l_int32 lept_mkdir()
90 * l_int32 lept_rmdir()
91 * l_int32 lept_direxists()
92 * l_int32 lept_mv()
93 * l_int32 lept_rm_match()
94 * l_int32 lept_rm()
95 * l_int32 lept_rmfile()
96 * l_int32 lept_cp()
97 *
98 * General file name operations
99 * l_int32 splitPathAtDirectory()
100 * l_int32 splitPathAtExtension()
101 * char *pathJoin()
102 * char *appendSubdirs()
103 *
104 * Special file name operations
105 * l_int32 convertSepCharsInPath()
106 * char *genPathname()
107 * l_int32 makeTempDirname()
108 * l_int32 modifyTrailingSlash()
109 * char *l_makeTempFilename()
110 * l_int32 extractNumberFromFilename()
111 *
112 *
113 * Notes on multi-platform development
114 * -----------------------------------
115 * This is important:
116 * (1) With the exception of splitPathAtDirectory(), splitPathAtExtension()
117 * and genPathname(), all input pathnames must have unix separators.
118 * (2) On Windows, when you specify a read or write to "/tmp/...",
119 * the filename is rewritten to use the Windows temp directory:
120 * /tmp ==> <Temp>... (windows)
121 * (3) This filename rewrite, along with the conversion from unix
122 * to windows pathnames, happens in genPathname().
123 * (4) Use fopenReadStream() and fopenWriteStream() to open files,
124 * because these use genPathname() to find the platform-dependent
125 * filenames. Likewise for l_binaryRead() and l_binaryWrite().
126 * (5) For moving, copying and removing files and directories that are in
127 * subdirectories of /tmp, use the lept_*() file system shell wrappers:
128 * lept_mkdir(), lept_rmdir(), lept_mv(), lept_rm() and lept_cp().
129 * (6) Use the lept_*() C library wrappers. These work properly on
130 * Windows, where the same DLL must perform complementary operations
131 * on file streams (open/close) and heap memory (malloc/free):
132 * lept_fopen(), lept_fclose(), lept_calloc() and lept_free().
133 * (7) Why read and write files to temp directories?
134 * The library needs the ability to read and write ephemeral
135 * files to default places, both for generating debugging output
136 * and for supporting regression tests. Applications also need
137 * this ability for debugging.
138 * (8) Why do the pathname rewrite on Windows?
139 * The goal is to have the library, and programs using the library,
140 * run on multiple platforms without changes. The location of
141 * temporary files depends on the platform as well as the user's
142 * configuration. Temp files on Windows are in some directory
143 * not known a priori. To make everything work seamlessly on
144 * Windows, every time you open a file for reading or writing,
145 * use a special function such as fopenReadStream() or
146 * fopenWriteStream(); these call genPathname() to ensure that
147 * if it is a temp file, the correct path is used. To indicate
148 * that this is a temp file, the application is written with the
149 * root directory of the path in a canonical form: "/tmp".
150 * (9) Why is it that multi-platform directory functions like lept_mkdir()
151 * and lept_rmdir(), as well as associated file functions like
152 * lept_rm(), lept_mv() and lept_cp(), only work in the temp dir?
153 * These functions were designed to provide easy manipulation of
154 * temp files. The restriction to temp files is for safety -- to
155 * prevent an accidental deletion of important files. For example,
156 * lept_rmdir() first deletes all files in a specified subdirectory
157 * of temp, and then removes the directory.
158 *
159 * </pre>
160 */
161
162 #ifdef HAVE_CONFIG_H
163 #include "config_auto.h"
164 #endif /* HAVE_CONFIG_H */
165
166 #ifdef _MSC_VER
167 #include <process.h>
168 #include <direct.h>
169 #else
170 #include <unistd.h>
171 #endif /* _MSC_VER */
172
173 #ifdef _WIN32
174 #include <windows.h>
175 #include <fcntl.h> /* _O_CREAT, ... */
176 #include <io.h> /* _open */
177 #include <sys/stat.h> /* _S_IREAD, _S_IWRITE */
178 #else
179 #include <sys/stat.h> /* for stat, mkdir(2) */
180 #include <sys/types.h>
181 #endif
182
183 #include <string.h>
184 #include <stddef.h>
185 #include "allheaders.h"
186
187 /* This is only used to test "/tmp" --> TMPDIR rewriting on Windows,
188 * by emulating it in unix. It should never be on in production. */
189 #define DEBUG_REWRITE 0
190
191
192 /*--------------------------------------------------------------------*
193 * Safe string operations *
194 *--------------------------------------------------------------------*/
195 /*!
196 * \brief stringNew()
197 *
198 * \param[in] src string
199 * \return dest copy of src string, or NULL on error
200 */
201 char *
stringNew(const char * src)202 stringNew(const char *src)
203 {
204 l_int32 len;
205 char *dest;
206
207 PROCNAME("stringNew");
208
209 if (!src) {
210 L_WARNING("src not defined\n", procName);
211 return NULL;
212 }
213
214 len = strlen(src);
215 if ((dest = (char *)LEPT_CALLOC(len + 1, sizeof(char))) == NULL)
216 return (char *)ERROR_PTR("dest not made", procName, NULL);
217
218 stringCopy(dest, src, len);
219 return dest;
220 }
221
222
223 /*!
224 * \brief stringCopy()
225 *
226 * \param[in] dest existing byte buffer
227 * \param[in] src string [optional] can be null
228 * \param[in] n max number of characters to copy
229 * \return 0 if OK, 1 on error
230 *
231 * <pre>
232 * Notes:
233 * (1) Relatively safe wrapper for strncpy, that checks the input,
234 * and does not complain if %src is null or %n < 1.
235 * If %n < 1, this is a no-op.
236 * (2) %dest needs to be at least %n bytes in size.
237 * (3) We don't call strncpy() because valgrind complains about
238 * use of uninitialized values.
239 * </pre>
240 */
241 l_int32
stringCopy(char * dest,const char * src,l_int32 n)242 stringCopy(char *dest,
243 const char *src,
244 l_int32 n)
245 {
246 l_int32 i;
247
248 PROCNAME("stringCopy");
249
250 if (!dest)
251 return ERROR_INT("dest not defined", procName, 1);
252 if (!src || n < 1)
253 return 0;
254
255 /* Implementation of strncpy that valgrind doesn't complain about */
256 for (i = 0; i < n && src[i] != '\0'; i++)
257 dest[i] = src[i];
258 for (; i < n; i++)
259 dest[i] = '\0';
260 return 0;
261 }
262
263
264 /*!
265 * \brief stringReplace()
266 *
267 * \param[out] pdest string copy
268 * \param[in] src string [optional] can be null
269 * \return 0 if OK; 1 on error
270 *
271 * <pre>
272 * Notes:
273 * (1) Frees any existing dest string
274 * (2) Puts a copy of src string in the dest
275 * (3) If either or both strings are null, does something reasonable.
276 * </pre>
277 */
278 l_int32
stringReplace(char ** pdest,const char * src)279 stringReplace(char **pdest,
280 const char *src)
281 {
282 PROCNAME("stringReplace");
283
284 if (!pdest)
285 return ERROR_INT("pdest not defined", procName, 1);
286
287 if (*pdest)
288 LEPT_FREE(*pdest);
289
290 if (src)
291 *pdest = stringNew(src);
292 else
293 *pdest = NULL;
294 return 0;
295 }
296
297
298 /*!
299 * \brief stringLength()
300 *
301 * \param[in] src string can be null or NULL-terminated string
302 * \param[in] size size of src buffer
303 * \return length of src in bytes.
304 *
305 * <pre>
306 * Notes:
307 * (1) Safe implementation of strlen that only checks size bytes
308 * for trailing NUL.
309 * (2) Valid returned string lengths are between 0 and size - 1.
310 * If size bytes are checked without finding a NUL byte, then
311 * an error is indicated by returning size.
312 * </pre>
313 */
314 l_int32
stringLength(const char * src,size_t size)315 stringLength(const char *src,
316 size_t size)
317 {
318 l_int32 i;
319
320 PROCNAME("stringLength");
321
322 if (!src)
323 return ERROR_INT("src not defined", procName, 0);
324 if (size < 1)
325 return 0;
326
327 for (i = 0; i < size; i++) {
328 if (src[i] == '\0')
329 return i;
330 }
331 return size; /* didn't find a NUL byte */
332 }
333
334
335 /*!
336 * \brief stringCat()
337 *
338 * \param[in] dest null-terminated byte buffer
339 * \param[in] size size of dest
340 * \param[in] src string can be null or NULL-terminated string
341 * \return number of bytes added to dest; -1 on error
342 *
343 * <pre>
344 * Notes:
345 * (1) Alternative implementation of strncat, that checks the input,
346 * is easier to use (since the size of the dest buffer is specified
347 * rather than the number of bytes to copy), and does not complain
348 * if %src is null.
349 * (2) Never writes past end of dest.
350 * (3) If it can't append src (an error), it does nothing.
351 * (4) N.B. The order of 2nd and 3rd args is reversed from that in
352 * strncat, as in the Windows function strcat_s().
353 * </pre>
354 */
355 l_int32
stringCat(char * dest,size_t size,const char * src)356 stringCat(char *dest,
357 size_t size,
358 const char *src)
359 {
360 l_int32 i, n;
361 l_int32 lendest, lensrc;
362
363 PROCNAME("stringCat");
364
365 if (!dest)
366 return ERROR_INT("dest not defined", procName, -1);
367 if (size < 1)
368 return ERROR_INT("size < 1; too small", procName, -1);
369 if (!src)
370 return 0;
371
372 lendest = stringLength(dest, size);
373 if (lendest == size)
374 return ERROR_INT("no terminating nul byte", procName, -1);
375 lensrc = stringLength(src, size);
376 if (lensrc == 0)
377 return 0;
378 n = (lendest + lensrc > size - 1 ? size - lendest - 1 : lensrc);
379 if (n < 1)
380 return ERROR_INT("dest too small for append", procName, -1);
381
382 for (i = 0; i < n; i++)
383 dest[lendest + i] = src[i];
384 dest[lendest + n] = '\0';
385 return n;
386 }
387
388
389 /*!
390 * \brief stringConcatNew()
391 *
392 * \param[in] first first string in list
393 * \param[in] ... NULL-terminated list of strings
394 * \return result new string concatenating the input strings, or
395 * NULL if first == NULL
396 *
397 * <pre>
398 * Notes:
399 * (1) The last arg in the list of strings must be NULL.
400 * (2) Caller must free the returned string.
401 * </pre>
402 */
403 char *
stringConcatNew(const char * first,...)404 stringConcatNew(const char *first, ...)
405 {
406 size_t len;
407 char *result, *ptr;
408 const char *arg;
409 va_list args;
410
411 if (!first) return NULL;
412
413 /* Find the length of the output string */
414 va_start(args, first);
415 len = strlen(first);
416 while ((arg = va_arg(args, const char *)) != NULL)
417 len += strlen(arg);
418 va_end(args);
419 result = (char *)LEPT_CALLOC(len + 1, sizeof(char));
420
421 /* Concatenate the args */
422 va_start(args, first);
423 ptr = result;
424 arg = first;
425 while (*arg)
426 *ptr++ = *arg++;
427 while ((arg = va_arg(args, const char *)) != NULL) {
428 while (*arg)
429 *ptr++ = *arg++;
430 }
431 va_end(args);
432 return result;
433 }
434
435
436 /*!
437 * \brief stringJoin()
438 *
439 * \param[in] src1 string [optional] can be null
440 * \param[in] src2 string [optional] can be null
441 * \return concatenated string, or NULL on error
442 *
443 * <pre>
444 * Notes:
445 * (1) This is a safe version of strcat; it makes a new string.
446 * (2) It is not an error if either or both of the strings
447 * are empty, or if either or both of the pointers are null.
448 * </pre>
449 */
450 char *
stringJoin(const char * src1,const char * src2)451 stringJoin(const char *src1,
452 const char *src2)
453 {
454 char *dest;
455 l_int32 srclen1, srclen2, destlen;
456
457 PROCNAME("stringJoin");
458
459 srclen1 = (src1) ? strlen(src1) : 0;
460 srclen2 = (src2) ? strlen(src2) : 0;
461 destlen = srclen1 + srclen2 + 3;
462
463 if ((dest = (char *)LEPT_CALLOC(destlen, sizeof(char))) == NULL)
464 return (char *)ERROR_PTR("calloc fail for dest", procName, NULL);
465
466 if (src1)
467 stringCopy(dest, src1, srclen1);
468 if (src2)
469 strncat(dest, src2, srclen2);
470 return dest;
471 }
472
473
474 /*!
475 * \brief stringJoinIP()
476 *
477 * \param[in,out] psrc1 string address of src1; cannot be on the stack
478 * \param[in] src2 string [optional] can be null
479 * \return 0 if OK, 1 on error
480 *
481 * <pre>
482 * Notes:
483 * (1) This is a safe in-place version of strcat. The contents of
484 * src1 is replaced by the concatenation of src1 and src2.
485 * (2) It is not an error if either or both of the strings
486 * are empty (""), or if the pointers to the strings (*psrc1, src2)
487 * are null.
488 * (3) src1 should be initialized to null or an empty string
489 * before the first call. Use one of these:
490 * char *src1 = NULL;
491 * char *src1 = stringNew("");
492 * Then call with:
493 * stringJoinIP(&src1, src2);
494 * (4) This can also be implemented as a macro:
495 * \code
496 * #define stringJoinIP(src1, src2) \
497 * {tmpstr = stringJoin((src1),(src2)); \
498 * LEPT_FREE(src1); \
499 * (src1) = tmpstr;}
500 * \endcode
501 * (5) Another function to consider for joining many strings is
502 * stringConcatNew().
503 * </pre>
504 */
505 l_int32
stringJoinIP(char ** psrc1,const char * src2)506 stringJoinIP(char **psrc1,
507 const char *src2)
508 {
509 char *tmpstr;
510
511 PROCNAME("stringJoinIP");
512
513 if (!psrc1)
514 return ERROR_INT("&src1 not defined", procName, 1);
515
516 tmpstr = stringJoin(*psrc1, src2);
517 LEPT_FREE(*psrc1);
518 *psrc1 = tmpstr;
519 return 0;
520 }
521
522
523 /*!
524 * \brief stringReverse()
525 *
526 * \param[in] src string
527 * \return dest newly-allocated reversed string
528 */
529 char *
stringReverse(const char * src)530 stringReverse(const char *src)
531 {
532 char *dest;
533 l_int32 i, len;
534
535 PROCNAME("stringReverse");
536
537 if (!src)
538 return (char *)ERROR_PTR("src not defined", procName, NULL);
539 len = strlen(src);
540 if ((dest = (char *)LEPT_CALLOC(len + 1, sizeof(char))) == NULL)
541 return (char *)ERROR_PTR("calloc fail for dest", procName, NULL);
542 for (i = 0; i < len; i++)
543 dest[i] = src[len - 1 - i];
544
545 return dest;
546 }
547
548
549 /*!
550 * \brief strtokSafe()
551 *
552 * \param[in] cstr input string to be sequentially parsed;
553 * use NULL after the first call
554 * \param[in] seps a string of character separators
555 * \param[out] psaveptr ptr to the next char after
556 * the last encountered separator
557 * \return substr a new string that is copied from the previous
558 * saveptr up to but not including the next
559 * separator character, or NULL if end of cstr.
560 *
561 * <pre>
562 * Notes:
563 * (1) This is a thread-safe implementation of strtok.
564 * (2) It has the same interface as strtok_r.
565 * (3) It differs from strtok_r in usage in two respects:
566 * (a) the input string is not altered
567 * (b) each returned substring is newly allocated and must
568 * be freed after use.
569 * (4) Let me repeat that. This is "safe" because the input
570 * string is not altered and because each returned string
571 * is newly allocated on the heap.
572 * (5) It is here because, surprisingly, some C libraries don't
573 * include strtok_r.
574 * (6) Important usage points:
575 * ~ Input the string to be parsed on the first invocation.
576 * ~ Then input NULL after that; the value returned in saveptr
577 * is used in all subsequent calls.
578 * (7) This is only slightly slower than strtok_k.
579 * </pre>
580 */
581 char *
strtokSafe(char * cstr,const char * seps,char ** psaveptr)582 strtokSafe(char *cstr,
583 const char *seps,
584 char **psaveptr)
585 {
586 char nextc;
587 char *start, *substr;
588 l_int32 istart, i, j, nchars;
589
590 PROCNAME("strtokSafe");
591
592 if (!seps)
593 return (char *)ERROR_PTR("seps not defined", procName, NULL);
594 if (!psaveptr)
595 return (char *)ERROR_PTR("&saveptr not defined", procName, NULL);
596
597 if (!cstr) {
598 start = *psaveptr;
599 } else {
600 start = cstr;
601 *psaveptr = NULL;
602 }
603 if (!start) /* nothing to do */
604 return NULL;
605
606 /* First time, scan for the first non-sep character */
607 istart = 0;
608 if (cstr) {
609 for (istart = 0;; istart++) {
610 if ((nextc = start[istart]) == '\0') {
611 *psaveptr = NULL; /* in case caller doesn't check ret value */
612 return NULL;
613 }
614 if (!strchr(seps, nextc))
615 break;
616 }
617 }
618
619 /* Scan through, looking for a sep character; if none is
620 * found, 'i' will be at the end of the string. */
621 for (i = istart;; i++) {
622 if ((nextc = start[i]) == '\0')
623 break;
624 if (strchr(seps, nextc))
625 break;
626 }
627
628 /* Save the substring */
629 nchars = i - istart;
630 substr = (char *)LEPT_CALLOC(nchars + 1, sizeof(char));
631 stringCopy(substr, start + istart, nchars);
632
633 /* Look for the next non-sep character.
634 * If this is the last substring, return a null saveptr. */
635 for (j = i;; j++) {
636 if ((nextc = start[j]) == '\0') {
637 *psaveptr = NULL; /* no more non-sep characters */
638 break;
639 }
640 if (!strchr(seps, nextc)) {
641 *psaveptr = start + j; /* start here on next call */
642 break;
643 }
644 }
645
646 return substr;
647 }
648
649
650 /*!
651 * \brief stringSplitOnToken()
652 *
653 * \param[in] cstr input string to be split; not altered
654 * \param[in] seps a string of character separators
655 * \param[out] phead ptr to copy of the input string, up to
656 * the first separator token encountered
657 * \param[out] ptail ptr to copy of the part of the input string
658 * starting with the first non-separator character
659 * that occurs after the first separator is found
660 * \return 0 if OK, 1 on error
661 *
662 * <pre>
663 * Notes:
664 * (1) The input string is not altered; all split parts are new strings.
665 * (2) The split occurs around the first consecutive sequence of
666 * tokens encountered.
667 * (3) The head goes from the beginning of the string up to
668 * but not including the first token found.
669 * (4) The tail contains the second part of the string, starting
670 * with the first char in that part that is NOT a token.
671 * (5) If no separator token is found, 'head' contains a copy
672 * of the input string and 'tail' is null.
673 * </pre>
674 */
675 l_int32
stringSplitOnToken(char * cstr,const char * seps,char ** phead,char ** ptail)676 stringSplitOnToken(char *cstr,
677 const char *seps,
678 char **phead,
679 char **ptail)
680 {
681 char *saveptr;
682
683 PROCNAME("stringSplitOnToken");
684
685 if (!phead)
686 return ERROR_INT("&head not defined", procName, 1);
687 if (!ptail)
688 return ERROR_INT("&tail not defined", procName, 1);
689 *phead = *ptail = NULL;
690 if (!cstr)
691 return ERROR_INT("cstr not defined", procName, 1);
692 if (!seps)
693 return ERROR_INT("seps not defined", procName, 1);
694
695 *phead = strtokSafe(cstr, seps, &saveptr);
696 if (saveptr)
697 *ptail = stringNew(saveptr);
698 return 0;
699 }
700
701
702 /*--------------------------------------------------------------------*
703 * Find and replace procs *
704 *--------------------------------------------------------------------*/
705 /*!
706 * \brief stringCheckForChars()
707 *
708 * \param[in] src input string; can be of zero length
709 * \param[in] chars string of chars to be searched for in %src
710 * \param[out] pfound 1 if any characters are found; 0 otherwise
711 * \return 0 if OK, 1 on error
712 *
713 * <pre>
714 * Notes:
715 * (1) This can be used to sanitize an operation by checking for
716 * special characters that don't belong in a string.
717 * </pre>
718 */
719 l_int32
stringCheckForChars(const char * src,const char * chars,l_int32 * pfound)720 stringCheckForChars(const char *src,
721 const char *chars,
722 l_int32 *pfound)
723 {
724 char ch;
725 l_int32 i, n;
726
727 PROCNAME("stringCheckForChars");
728
729 if (!pfound)
730 return ERROR_INT("&found not defined", procName, 1);
731 *pfound = FALSE;
732 if (!src || !chars)
733 return ERROR_INT("src and chars not both defined", procName, 1);
734
735 n = strlen(src);
736 for (i = 0; i < n; i++) {
737 ch = src[i];
738 if (strchr(chars, ch)) {
739 *pfound = TRUE;
740 break;
741 }
742 }
743 return 0;
744 }
745
746
747 /*!
748 * \brief stringRemoveChars()
749 *
750 * \param[in] src input string; can be of zero length
751 * \param[in] remchars string of chars to be removed from src
752 * \return dest string with specified chars removed, or NULL on error
753 */
754 char *
stringRemoveChars(const char * src,const char * remchars)755 stringRemoveChars(const char *src,
756 const char *remchars)
757 {
758 char ch;
759 char *dest;
760 l_int32 nsrc, i, k;
761
762 PROCNAME("stringRemoveChars");
763
764 if (!src)
765 return (char *)ERROR_PTR("src not defined", procName, NULL);
766 if (!remchars)
767 return stringNew(src);
768
769 if ((dest = (char *)LEPT_CALLOC(strlen(src) + 1, sizeof(char))) == NULL)
770 return (char *)ERROR_PTR("dest not made", procName, NULL);
771 nsrc = strlen(src);
772 for (i = 0, k = 0; i < nsrc; i++) {
773 ch = src[i];
774 if (!strchr(remchars, ch))
775 dest[k++] = ch;
776 }
777
778 return dest;
779 }
780
781
782 /*!
783 * \brief stringFindSubstr()
784 *
785 * \param[in] src input string; can be of zero length
786 * \param[in] sub substring to be searched for
787 * \param[out] ploc [optional] location of substring in src
788 * \return 1 if found; 0 if not found or on error
789 *
790 * <pre>
791 * Notes:
792 * (1) This is a wrapper around strstr().
793 * (2) Both %src and %sub must be defined, and %sub must have
794 * length of at least 1.
795 * (3) If the substring is not found and loc is returned, it has
796 * the value -1.
797 * </pre>
798 */
799 l_int32
stringFindSubstr(const char * src,const char * sub,l_int32 * ploc)800 stringFindSubstr(const char *src,
801 const char *sub,
802 l_int32 *ploc)
803 {
804 char *ptr;
805
806 PROCNAME("stringFindSubstr");
807
808 if (!src)
809 return ERROR_INT("src not defined", procName, 0);
810 if (!sub)
811 return ERROR_INT("sub not defined", procName, 0);
812 if (ploc) *ploc = -1;
813 if (strlen(sub) == 0)
814 return ERROR_INT("substring length 0", procName, 0);
815 if (strlen(src) == 0)
816 return 0;
817
818 if ((ptr = (char *)strstr(src, sub)) == NULL) /* not found */
819 return 0;
820
821 if (ploc)
822 *ploc = ptr - src;
823 return 1;
824 }
825
826
827 /*!
828 * \brief stringReplaceSubstr()
829 *
830 * \param[in] src input string; can be of zero length
831 * \param[in] sub1 substring to be replaced
832 * \param[in] sub2 substring to put in; can be ""
833 * \param[out] pfound [optional] 1 if sub1 is found; 0 otherwise
834 * \param[out] ploc [optional] location of ptr after replacement
835 * \return dest string with substring replaced, or NULL if the
836 * substring not found or on error.
837 *
838 * <pre>
839 * Notes:
840 * (1) Replaces the first instance.
841 * (2) To only remove sub1, use "" for sub2
842 * (3) Returns a new string if sub1 and sub2 are the same.
843 * (4) The optional loc is input as the byte offset within the src
844 * from which the search starts, and after the search it is the
845 * char position in the string of the next character after
846 * the substituted string.
847 * (5) N.B. If ploc is not null, loc must always be initialized.
848 * To search the string from the beginning, set loc = 0.
849 * </pre>
850 */
851 char *
stringReplaceSubstr(const char * src,const char * sub1,const char * sub2,l_int32 * pfound,l_int32 * ploc)852 stringReplaceSubstr(const char *src,
853 const char *sub1,
854 const char *sub2,
855 l_int32 *pfound,
856 l_int32 *ploc)
857 {
858 char *ptr, *dest;
859 l_int32 nsrc, nsub1, nsub2, len, npre, loc;
860
861 PROCNAME("stringReplaceSubstr");
862
863 if (!src)
864 return (char *)ERROR_PTR("src not defined", procName, NULL);
865 if (!sub1)
866 return (char *)ERROR_PTR("sub1 not defined", procName, NULL);
867 if (!sub2)
868 return (char *)ERROR_PTR("sub2 not defined", procName, NULL);
869
870 if (pfound)
871 *pfound = 0;
872 if (ploc)
873 loc = *ploc;
874 else
875 loc = 0;
876 if ((ptr = (char *)strstr(src + loc, sub1)) == NULL) {
877 return NULL;
878 }
879
880 if (pfound)
881 *pfound = 1;
882 nsrc = strlen(src);
883 nsub1 = strlen(sub1);
884 nsub2 = strlen(sub2);
885 len = nsrc + nsub2 - nsub1;
886 if ((dest = (char *)LEPT_CALLOC(len + 1, sizeof(char))) == NULL)
887 return (char *)ERROR_PTR("dest not made", procName, NULL);
888 npre = ptr - src;
889 memcpy(dest, src, npre);
890 strcpy(dest + npre, sub2);
891 strcpy(dest + npre + nsub2, ptr + nsub1);
892 if (ploc)
893 *ploc = npre + nsub2;
894
895 return dest;
896 }
897
898
899 /*!
900 * \brief stringReplaceEachSubstr()
901 *
902 * \param[in] src input string; can be of zero length
903 * \param[in] sub1 substring to be replaced
904 * \param[in] sub2 substring to put in; can be ""
905 * \param[out] pcount [optional] the number of times that sub1
906 * is found in src; 0 if not found
907 * \return dest string with substring replaced, or NULL if the
908 * substring not found or on error.
909 *
910 * <pre>
911 * Notes:
912 * (1) Replaces every instance.
913 * (2) To only remove each instance of sub1, use "" for sub2
914 * (3) Returns NULL if sub1 and sub2 are the same.
915 * </pre>
916 */
917 char *
stringReplaceEachSubstr(const char * src,const char * sub1,const char * sub2,l_int32 * pcount)918 stringReplaceEachSubstr(const char *src,
919 const char *sub1,
920 const char *sub2,
921 l_int32 *pcount)
922 {
923 char *currstr, *newstr;
924 l_int32 loc;
925
926 PROCNAME("stringReplaceEachSubstr");
927
928 if (pcount) *pcount = 0;
929 if (!src)
930 return (char *)ERROR_PTR("src not defined", procName, NULL);
931 if (!sub1)
932 return (char *)ERROR_PTR("sub1 not defined", procName, NULL);
933 if (!sub2)
934 return (char *)ERROR_PTR("sub2 not defined", procName, NULL);
935
936 loc = 0;
937 if ((newstr = stringReplaceSubstr(src, sub1, sub2, NULL, &loc)) == NULL)
938 return NULL;
939
940 if (pcount)
941 (*pcount)++;
942 while (1) {
943 currstr = newstr;
944 newstr = stringReplaceSubstr(currstr, sub1, sub2, NULL, &loc);
945 if (!newstr)
946 return currstr;
947 LEPT_FREE(currstr);
948 if (pcount)
949 (*pcount)++;
950 }
951 }
952
953
954 /*!
955 * \brief arrayFindEachSequence()
956 *
957 * \param[in] data byte array
958 * \param[in] datalen length of data, in bytes
959 * \param[in] sequence subarray of bytes to find in data
960 * \param[in] seqlen length of sequence, in bytes
961 * \return dna of offsets where the sequence is found, or NULL if
962 * none are found or on error
963 *
964 * <pre>
965 * Notes:
966 * (1) The byte arrays %data and %sequence are not C strings,
967 * as they can contain null bytes. Therefore, for each
968 * we must give the length of the array.
969 * (2) This finds every occurrence in %data of %sequence.
970 * </pre>
971 */
972 L_DNA *
arrayFindEachSequence(const l_uint8 * data,size_t datalen,const l_uint8 * sequence,size_t seqlen)973 arrayFindEachSequence(const l_uint8 *data,
974 size_t datalen,
975 const l_uint8 *sequence,
976 size_t seqlen)
977 {
978 l_int32 start, offset, realoffset, found;
979 L_DNA *da;
980
981 PROCNAME("arrayFindEachSequence");
982
983 if (!data || !sequence)
984 return (L_DNA *)ERROR_PTR("data & sequence not both defined",
985 procName, NULL);
986
987 da = l_dnaCreate(0);
988 start = 0;
989 while (1) {
990 arrayFindSequence(data + start, datalen - start, sequence, seqlen,
991 &offset, &found);
992 if (found == FALSE)
993 break;
994
995 realoffset = start + offset;
996 l_dnaAddNumber(da, realoffset);
997 start = realoffset + seqlen;
998 if (start >= datalen)
999 break;
1000 }
1001
1002 if (l_dnaGetCount(da) == 0)
1003 l_dnaDestroy(&da);
1004 return da;
1005 }
1006
1007
1008 /*!
1009 * \brief arrayFindSequence()
1010 *
1011 * \param[in] data byte array
1012 * \param[in] datalen length of data, in bytes
1013 * \param[in] sequence subarray of bytes to find in data
1014 * \param[in] seqlen length of sequence, in bytes
1015 * \param[out] poffset offset from beginning of
1016 * data where the sequence begins
1017 * \param[out] pfound 1 if sequence is found; 0 otherwise
1018 * \return 0 if OK, 1 on error
1019 *
1020 * <pre>
1021 * Notes:
1022 * (1) The byte arrays 'data' and 'sequence' are not C strings,
1023 * as they can contain null bytes. Therefore, for each
1024 * we must give the length of the array.
1025 * (2) This searches for the first occurrence in %data of %sequence,
1026 * which consists of %seqlen bytes. The parameter %seqlen
1027 * must not exceed the actual length of the %sequence byte array.
1028 * (3) If the sequence is not found, the offset will be 0, so you
1029 * must check %found.
1030 * </pre>
1031 */
1032 l_int32
arrayFindSequence(const l_uint8 * data,size_t datalen,const l_uint8 * sequence,size_t seqlen,l_int32 * poffset,l_int32 * pfound)1033 arrayFindSequence(const l_uint8 *data,
1034 size_t datalen,
1035 const l_uint8 *sequence,
1036 size_t seqlen,
1037 l_int32 *poffset,
1038 l_int32 *pfound)
1039 {
1040 l_int32 i, j, found, lastpos;
1041
1042 PROCNAME("arrayFindSequence");
1043
1044 if (poffset) *poffset = 0;
1045 if (pfound) *pfound = FALSE;
1046 if (!data || !sequence)
1047 return ERROR_INT("data & sequence not both defined", procName, 1);
1048 if (!poffset || !pfound)
1049 return ERROR_INT("&offset and &found not defined", procName, 1);
1050
1051 lastpos = datalen - seqlen + 1;
1052 found = FALSE;
1053 for (i = 0; i < lastpos; i++) {
1054 for (j = 0; j < seqlen; j++) {
1055 if (data[i + j] != sequence[j])
1056 break;
1057 if (j == seqlen - 1)
1058 found = TRUE;
1059 }
1060 if (found == TRUE)
1061 break;
1062 }
1063
1064 if (found == TRUE) {
1065 *poffset = i;
1066 *pfound = TRUE;
1067 }
1068 return 0;
1069 }
1070
1071
1072 /*--------------------------------------------------------------------*
1073 * Safe realloc *
1074 *--------------------------------------------------------------------*/
1075 /*!
1076 * \brief reallocNew()
1077 *
1078 * \param[in,out] pindata [optional]; nulls indata
1079 * \param[in] oldsize size of input data to be copied, in bytes
1080 * \param[in] newsize size of data to be reallocated in bytes
1081 * \return ptr to new data, or NULL on error
1082 *
1083 * Action: !N.B. 3) and (4!
1084 * 1 Allocates memory, initialized to 0
1085 * 2 Copies as much of the input data as possible
1086 * to the new block, truncating the copy if necessary
1087 * 3 Frees the input data
1088 * 4 Zeroes the input data ptr
1089 *
1090 * <pre>
1091 * Notes:
1092 * (1) If newsize <=0, just frees input data and nulls ptr
1093 * (2) If input ptr is null, just callocs new memory
1094 * (3) This differs from realloc in that it always allocates
1095 * new memory (if newsize > 0) and initializes it to 0,
1096 * it requires the amount of old data to be copied,
1097 * and it takes the address of the input ptr and
1098 * nulls the handle.
1099 * </pre>
1100 */
1101 void *
reallocNew(void ** pindata,l_int32 oldsize,l_int32 newsize)1102 reallocNew(void **pindata,
1103 l_int32 oldsize,
1104 l_int32 newsize)
1105 {
1106 l_int32 minsize;
1107 void *indata;
1108 void *newdata;
1109
1110 PROCNAME("reallocNew");
1111
1112 if (!pindata)
1113 return ERROR_PTR("input data not defined", procName, NULL);
1114 indata = *pindata;
1115
1116 if (newsize <= 0) { /* nonstandard usage */
1117 if (indata) {
1118 LEPT_FREE(indata);
1119 *pindata = NULL;
1120 }
1121 return NULL;
1122 }
1123
1124 if (!indata) { /* nonstandard usage */
1125 if ((newdata = (void *)LEPT_CALLOC(1, newsize)) == NULL)
1126 return ERROR_PTR("newdata not made", procName, NULL);
1127 return newdata;
1128 }
1129
1130 /* Standard usage */
1131 if ((newdata = (void *)LEPT_CALLOC(1, newsize)) == NULL)
1132 return ERROR_PTR("newdata not made", procName, NULL);
1133 minsize = L_MIN(oldsize, newsize);
1134 memcpy((char *)newdata, (char *)indata, minsize);
1135
1136 LEPT_FREE(indata);
1137 *pindata = NULL;
1138
1139 return newdata;
1140 }
1141
1142
1143 /*--------------------------------------------------------------------*
1144 * Read and write between file and memory *
1145 *--------------------------------------------------------------------*/
1146 /*!
1147 * \brief l_binaryRead()
1148 *
1149 * \param[in] filename
1150 * \param[out] pnbytes number of bytes read
1151 * \return data, or NULL on error
1152 */
1153 l_uint8 *
l_binaryRead(const char * filename,size_t * pnbytes)1154 l_binaryRead(const char *filename,
1155 size_t *pnbytes)
1156 {
1157 l_uint8 *data;
1158 FILE *fp;
1159
1160 PROCNAME("l_binaryRead");
1161
1162 if (!pnbytes)
1163 return (l_uint8 *)ERROR_PTR("pnbytes not defined", procName, NULL);
1164 *pnbytes = 0;
1165 if (!filename)
1166 return (l_uint8 *)ERROR_PTR("filename not defined", procName, NULL);
1167
1168 if ((fp = fopenReadStream(filename)) == NULL)
1169 return (l_uint8 *)ERROR_PTR("file stream not opened", procName, NULL);
1170 data = l_binaryReadStream(fp, pnbytes);
1171 fclose(fp);
1172 return data;
1173 }
1174
1175
1176 /*!
1177 * \brief l_binaryReadStream()
1178 *
1179 * \param[in] fp file stream opened to read; can be stdin
1180 * \param[out] pnbytes number of bytes read
1181 * \return null-terminated array, or NULL on error
1182 * reading 0 bytes is not an error
1183 *
1184 * <pre>
1185 * Notes:
1186 * (1) The returned array is terminated with a null byte so that it can
1187 * be used to read ascii data from a file into a proper C string.
1188 * (2) This can be used to capture data that is piped in via stdin,
1189 * because it does not require seeking within the file.
1190 * (3) For example, you can read an image from stdin into memory
1191 * using shell redirection, with one of these shell commands:
1192 * cat <imagefile> | readprog
1193 * readprog < <imagefile>
1194 * where readprog is:
1195 * l_uint8 *data = l_binaryReadStream(stdin, &nbytes);
1196 * Pix *pix = pixReadMem(data, nbytes);
1197 * </pre>
1198 */
1199 l_uint8 *
l_binaryReadStream(FILE * fp,size_t * pnbytes)1200 l_binaryReadStream(FILE *fp,
1201 size_t *pnbytes)
1202 {
1203 l_uint8 *data;
1204 l_int32 seekable, navail, nadd, nread;
1205 L_BBUFFER *bb;
1206
1207 PROCNAME("l_binaryReadStream");
1208
1209 if (!pnbytes)
1210 return (l_uint8 *)ERROR_PTR("&nbytes not defined", procName, NULL);
1211 *pnbytes = 0;
1212 if (!fp)
1213 return (l_uint8 *)ERROR_PTR("fp not defined", procName, NULL);
1214
1215 /* Test if the stream is seekable, by attempting to seek to
1216 * the start of data. This is a no-op. If it is seekable, use
1217 * l_binaryReadSelectStream() to determine the size of the
1218 * data to be read in advance. */
1219 seekable = (ftell(fp) == 0) ? 1 : 0;
1220 if (seekable)
1221 return l_binaryReadSelectStream(fp, 0, 0, pnbytes);
1222
1223 /* If it is not seekable, use the bbuffer to realloc memory
1224 * as needed during reading. */
1225 bb = bbufferCreate(NULL, 4096);
1226 while (1) {
1227 navail = bb->nalloc - bb->n;
1228 if (navail < 4096) {
1229 nadd = L_MAX(bb->nalloc, 4096);
1230 bbufferExtendArray(bb, nadd);
1231 }
1232 nread = fread((void *)(bb->array + bb->n), 1, 4096, fp);
1233 bb->n += nread;
1234 if (nread != 4096) break;
1235 }
1236
1237 /* Copy the data to a new array sized for the data, because
1238 * the bbuffer array can be nearly twice the size we need. */
1239 if ((data = (l_uint8 *)LEPT_CALLOC(bb->n + 1, sizeof(l_uint8))) != NULL) {
1240 memcpy(data, bb->array, bb->n);
1241 *pnbytes = bb->n;
1242 } else {
1243 L_ERROR("calloc fail for data\n", procName);
1244 }
1245
1246 bbufferDestroy(&bb);
1247 return data;
1248 }
1249
1250
1251 /*!
1252 * \brief l_binaryReadSelect()
1253 *
1254 * \param[in] filename
1255 * \param[in] start first byte to read
1256 * \param[in] nbytes number of bytes to read; use 0 to read to end of file
1257 * \param[out] pnread number of bytes actually read
1258 * \return data, or NULL on error
1259 *
1260 * <pre>
1261 * Notes:
1262 * (1) The returned array is terminated with a null byte so that it can
1263 * be used to read ascii data from a file into a proper C string.
1264 * </pre>
1265 */
1266 l_uint8 *
l_binaryReadSelect(const char * filename,size_t start,size_t nbytes,size_t * pnread)1267 l_binaryReadSelect(const char *filename,
1268 size_t start,
1269 size_t nbytes,
1270 size_t *pnread)
1271 {
1272 l_uint8 *data;
1273 FILE *fp;
1274
1275 PROCNAME("l_binaryReadSelect");
1276
1277 if (!pnread)
1278 return (l_uint8 *)ERROR_PTR("pnread not defined", procName, NULL);
1279 *pnread = 0;
1280 if (!filename)
1281 return (l_uint8 *)ERROR_PTR("filename not defined", procName, NULL);
1282
1283 if ((fp = fopenReadStream(filename)) == NULL)
1284 return (l_uint8 *)ERROR_PTR("file stream not opened", procName, NULL);
1285 data = l_binaryReadSelectStream(fp, start, nbytes, pnread);
1286 fclose(fp);
1287 return data;
1288 }
1289
1290
1291 /*!
1292 * \brief l_binaryReadSelectStream()
1293 *
1294 * \param[in] fp file stream
1295 * \param[in] start first byte to read
1296 * \param[in] nbytes number of bytes to read; use 0 to read to end of file
1297 * \param[out] pnread number of bytes actually read
1298 * \return null-terminated array, or NULL on error
1299 * reading 0 bytes is not an error
1300 *
1301 * <pre>
1302 * Notes:
1303 * (1) The returned array is terminated with a null byte so that it can
1304 * be used to read ascii data from a file into a proper C string.
1305 * If the file to be read is empty and %start == 0, an array
1306 * with a single null byte is returned.
1307 * (2) Side effect: the stream pointer is re-positioned to the
1308 * beginning of the file.
1309 * </pre>
1310 */
1311 l_uint8 *
l_binaryReadSelectStream(FILE * fp,size_t start,size_t nbytes,size_t * pnread)1312 l_binaryReadSelectStream(FILE *fp,
1313 size_t start,
1314 size_t nbytes,
1315 size_t *pnread)
1316 {
1317 l_uint8 *data;
1318 size_t bytesleft, bytestoread, nread, filebytes;
1319
1320 PROCNAME("l_binaryReadSelectStream");
1321
1322 if (!pnread)
1323 return (l_uint8 *)ERROR_PTR("&nread not defined", procName, NULL);
1324 *pnread = 0;
1325 if (!fp)
1326 return (l_uint8 *)ERROR_PTR("stream not defined", procName, NULL);
1327
1328 /* Verify and adjust the parameters if necessary */
1329 fseek(fp, 0, SEEK_END); /* EOF */
1330 filebytes = ftell(fp);
1331 fseek(fp, 0, SEEK_SET);
1332 if (start > filebytes) {
1333 L_ERROR("start = %lu but filebytes = %lu\n", procName,
1334 (unsigned long)start, (unsigned long)filebytes);
1335 return NULL;
1336 }
1337 if (filebytes == 0) /* start == 0; nothing to read; return null byte */
1338 return (l_uint8 *)LEPT_CALLOC(1, 1);
1339 bytesleft = filebytes - start; /* greater than 0 */
1340 if (nbytes == 0) nbytes = bytesleft;
1341 bytestoread = (bytesleft >= nbytes) ? nbytes : bytesleft;
1342
1343 /* Read the data */
1344 if ((data = (l_uint8 *)LEPT_CALLOC(1, bytestoread + 1)) == NULL)
1345 return (l_uint8 *)ERROR_PTR("calloc fail for data", procName, NULL);
1346 fseek(fp, start, SEEK_SET);
1347 nread = fread(data, 1, bytestoread, fp);
1348 if (nbytes != nread)
1349 L_INFO("%lu bytes requested; %lu bytes read\n", procName,
1350 (unsigned long)nbytes, (unsigned long)nread);
1351 *pnread = nread;
1352 fseek(fp, 0, SEEK_SET);
1353 return data;
1354 }
1355
1356
1357 /*!
1358 * \brief l_binaryWrite()
1359 *
1360 * \param[in] filename output
1361 * \param[in] operation "w" for write; "a" for append
1362 * \param[in] data binary data to be written
1363 * \param[in] nbytes size of data array
1364 * \return 0 if OK; 1 on error
1365 */
1366 l_int32
l_binaryWrite(const char * filename,const char * operation,void * data,size_t nbytes)1367 l_binaryWrite(const char *filename,
1368 const char *operation,
1369 void *data,
1370 size_t nbytes)
1371 {
1372 char actualOperation[20];
1373 FILE *fp;
1374
1375 PROCNAME("l_binaryWrite");
1376
1377 if (!filename)
1378 return ERROR_INT("filename not defined", procName, 1);
1379 if (!operation)
1380 return ERROR_INT("operation not defined", procName, 1);
1381 if (!data)
1382 return ERROR_INT("data not defined", procName, 1);
1383 if (nbytes <= 0)
1384 return ERROR_INT("nbytes must be > 0", procName, 1);
1385
1386 if (strcmp(operation, "w") && strcmp(operation, "a"))
1387 return ERROR_INT("operation not one of {'w','a'}", procName, 1);
1388
1389 /* The 'b' flag to fopen() is ignored for all POSIX
1390 * conforming systems. However, Windows needs the 'b' flag. */
1391 stringCopy(actualOperation, operation, 2);
1392 strncat(actualOperation, "b", 2);
1393
1394 if ((fp = fopenWriteStream(filename, actualOperation)) == NULL)
1395 return ERROR_INT("stream not opened", procName, 1);
1396 fwrite(data, 1, nbytes, fp);
1397 fclose(fp);
1398 return 0;
1399 }
1400
1401
1402 /*!
1403 * \brief nbytesInFile()
1404 *
1405 * \param[in] filename
1406 * \return nbytes in file; 0 on error
1407 */
1408 size_t
nbytesInFile(const char * filename)1409 nbytesInFile(const char *filename)
1410 {
1411 size_t nbytes;
1412 FILE *fp;
1413
1414 PROCNAME("nbytesInFile");
1415
1416 if (!filename)
1417 return ERROR_INT("filename not defined", procName, 0);
1418 if ((fp = fopenReadStream(filename)) == NULL)
1419 return ERROR_INT("stream not opened", procName, 0);
1420 nbytes = fnbytesInFile(fp);
1421 fclose(fp);
1422 return nbytes;
1423 }
1424
1425
1426 /*!
1427 * \brief fnbytesInFile()
1428 *
1429 * \param[in] fp file stream
1430 * \return nbytes in file; 0 on error
1431 */
1432 size_t
fnbytesInFile(FILE * fp)1433 fnbytesInFile(FILE *fp)
1434 {
1435 l_int64 pos, nbytes;
1436
1437 PROCNAME("fnbytesInFile");
1438
1439 if (!fp)
1440 return ERROR_INT("stream not open", procName, 0);
1441
1442 pos = ftell(fp); /* initial position */
1443 fseek(fp, 0, SEEK_END); /* EOF */
1444 nbytes = ftell(fp);
1445 fseek(fp, pos, SEEK_SET); /* back to initial position */
1446 return nbytes;
1447 }
1448
1449
1450 /*--------------------------------------------------------------------*
1451 * Copy in memory *
1452 *--------------------------------------------------------------------*/
1453 /*!
1454 * \brief l_binaryCopy()
1455 *
1456 * \param[in] datas
1457 * \param[in] size of data array
1458 * \return datad on heap, or NULL on error
1459 *
1460 * <pre>
1461 * Notes:
1462 * (1) We add 4 bytes to the zeroed output because in some cases
1463 * (e.g., string handling) it is important to have the data
1464 * be null terminated. This guarantees that after the memcpy,
1465 * the result is automatically null terminated.
1466 * </pre>
1467 */
1468 l_uint8 *
l_binaryCopy(l_uint8 * datas,size_t size)1469 l_binaryCopy(l_uint8 *datas,
1470 size_t size)
1471 {
1472 l_uint8 *datad;
1473
1474 PROCNAME("l_binaryCopy");
1475
1476 if (!datas)
1477 return (l_uint8 *)ERROR_PTR("datas not defined", procName, NULL);
1478
1479 if ((datad = (l_uint8 *)LEPT_CALLOC(size + 4, sizeof(l_uint8))) == NULL)
1480 return (l_uint8 *)ERROR_PTR("datad not made", procName, NULL);
1481 memcpy(datad, datas, size);
1482 return datad;
1483 }
1484
1485
1486 /*--------------------------------------------------------------------*
1487 * File copy operations *
1488 *--------------------------------------------------------------------*/
1489 /*!
1490 * \brief fileCopy()
1491 *
1492 * \param[in] srcfile copy this file
1493 * \param[in] newfile to this file
1494 * \return 0 if OK, 1 on error
1495 */
1496 l_int32
fileCopy(const char * srcfile,const char * newfile)1497 fileCopy(const char *srcfile,
1498 const char *newfile)
1499 {
1500 l_int32 ret;
1501 size_t nbytes;
1502 l_uint8 *data;
1503
1504 PROCNAME("fileCopy");
1505
1506 if (!srcfile)
1507 return ERROR_INT("srcfile not defined", procName, 1);
1508 if (!newfile)
1509 return ERROR_INT("newfile not defined", procName, 1);
1510
1511 if ((data = l_binaryRead(srcfile, &nbytes)) == NULL)
1512 return ERROR_INT("data not returned", procName, 1);
1513 ret = l_binaryWrite(newfile, "w", data, nbytes);
1514 LEPT_FREE(data);
1515 return ret;
1516 }
1517
1518
1519 /*!
1520 * \brief fileConcatenate()
1521 *
1522 * \param[in] srcfile file to append
1523 * \param[in] destfile file to add to
1524 * \return 0 if OK, 1 on error
1525 */
1526 l_int32
fileConcatenate(const char * srcfile,const char * destfile)1527 fileConcatenate(const char *srcfile,
1528 const char *destfile)
1529 {
1530 size_t nbytes;
1531 l_uint8 *data;
1532
1533 PROCNAME("fileConcatenate");
1534
1535 if (!srcfile)
1536 return ERROR_INT("srcfile not defined", procName, 1);
1537 if (!destfile)
1538 return ERROR_INT("destfile not defined", procName, 1);
1539
1540 data = l_binaryRead(srcfile, &nbytes);
1541 l_binaryWrite(destfile, "a", data, nbytes);
1542 LEPT_FREE(data);
1543 return 0;
1544 }
1545
1546
1547 /*!
1548 * \brief fileAppendString()
1549 *
1550 * \param[in] filename
1551 * \param[in] str string to append to file
1552 * \return 0 if OK, 1 on error
1553 */
1554 l_int32
fileAppendString(const char * filename,const char * str)1555 fileAppendString(const char *filename,
1556 const char *str)
1557 {
1558 FILE *fp;
1559
1560 PROCNAME("fileAppendString");
1561
1562 if (!filename)
1563 return ERROR_INT("filename not defined", procName, 1);
1564 if (!str)
1565 return ERROR_INT("str not defined", procName, 1);
1566
1567 if ((fp = fopenWriteStream(filename, "a")) == NULL)
1568 return ERROR_INT("stream not opened", procName, 1);
1569 fprintf(fp, "%s", str);
1570 fclose(fp);
1571 return 0;
1572 }
1573
1574
1575 /*--------------------------------------------------------------------*
1576 * Multi-platform functions for opening file streams *
1577 *--------------------------------------------------------------------*/
1578 /*!
1579 * \brief fopenReadStream()
1580 *
1581 * \param[in] filename
1582 * \return stream, or NULL on error
1583 *
1584 * <pre>
1585 * Notes:
1586 * (1) This should be used whenever you want to run fopen() to
1587 * read from a stream. Never call fopen() directory.
1588 * (2) This handles the temp directory pathname conversion on windows:
1589 * /tmp ==> <Windows Temp directory>
1590 * </pre>
1591 */
1592 FILE *
fopenReadStream(const char * filename)1593 fopenReadStream(const char *filename)
1594 {
1595 char *fname, *tail;
1596 FILE *fp;
1597
1598 PROCNAME("fopenReadStream");
1599
1600 if (!filename)
1601 return (FILE *)ERROR_PTR("filename not defined", procName, NULL);
1602
1603 /* Try input filename */
1604 fname = genPathname(filename, NULL);
1605 fp = fopen(fname, "rb");
1606 LEPT_FREE(fname);
1607 if (fp) return fp;
1608
1609 /* Else, strip directory and try locally */
1610 splitPathAtDirectory(filename, NULL, &tail);
1611 fp = fopen(tail, "rb");
1612 LEPT_FREE(tail);
1613
1614 if (!fp)
1615 return (FILE *)ERROR_PTR("file not found", procName, NULL);
1616 return fp;
1617 }
1618
1619
1620 /*!
1621 * \brief fopenWriteStream()
1622 *
1623 * \param[in] filename
1624 * \param[in] modestring
1625 * \return stream, or NULL on error
1626 *
1627 * <pre>
1628 * Notes:
1629 * (1) This should be used whenever you want to run fopen() to
1630 * write or append to a stream. Never call fopen() directory.
1631 * (2) This handles the temp directory pathname conversion on windows:
1632 * /tmp ==> <Windows Temp directory>
1633 * </pre>
1634 */
1635 FILE *
fopenWriteStream(const char * filename,const char * modestring)1636 fopenWriteStream(const char *filename,
1637 const char *modestring)
1638 {
1639 char *fname;
1640 FILE *fp;
1641
1642 PROCNAME("fopenWriteStream");
1643
1644 if (!filename)
1645 return (FILE *)ERROR_PTR("filename not defined", procName, NULL);
1646
1647 fname = genPathname(filename, NULL);
1648 fp = fopen(fname, modestring);
1649 LEPT_FREE(fname);
1650 if (!fp)
1651 return (FILE *)ERROR_PTR("stream not opened", procName, NULL);
1652 return fp;
1653 }
1654
1655
1656 /*!
1657 * \brief fopenReadFromMemory()
1658 *
1659 * \param[in] data, size
1660 * \return file stream, or NULL on error
1661 *
1662 * <pre>
1663 * Notes:
1664 * (1) Work-around if fmemopen() not available.
1665 * (2) Windows tmpfile() writes into the root C:\ directory, which
1666 * requires admin privileges. This also works around that.
1667 * </pre>
1668 */
1669 FILE *
fopenReadFromMemory(const l_uint8 * data,size_t size)1670 fopenReadFromMemory(const l_uint8 *data,
1671 size_t size)
1672 {
1673 FILE *fp;
1674
1675 PROCNAME("fopenReadFromMemory");
1676
1677 if (!data)
1678 return (FILE *)ERROR_PTR("data not defined", procName, NULL);
1679
1680 #if HAVE_FMEMOPEN
1681 if ((fp = fmemopen((void *)data, size, "rb")) == NULL)
1682 return (FILE *)ERROR_PTR("stream not opened", procName, NULL);
1683 #else /* write to tmp file */
1684 L_INFO("work-around: writing to a temp file\n", procName);
1685 #ifdef _WIN32
1686 if ((fp = fopenWriteWinTempfile()) == NULL)
1687 return (FILE *)ERROR_PTR("tmpfile stream not opened", procName, NULL);
1688 #else
1689 if ((fp = tmpfile()) == NULL)
1690 return (FILE *)ERROR_PTR("tmpfile stream not opened", procName, NULL);
1691 #endif /* _WIN32 */
1692 fwrite(data, 1, size, fp);
1693 rewind(fp);
1694 #endif /* HAVE_FMEMOPEN */
1695
1696 return fp;
1697 }
1698
1699
1700 /*--------------------------------------------------------------------*
1701 * Opening a windows tmpfile for writing *
1702 *--------------------------------------------------------------------*/
1703 /*!
1704 * \brief fopenWriteWinTempfile()
1705 *
1706 * \return file stream, or NULL on error
1707 *
1708 * <pre>
1709 * Notes:
1710 * (1) The Windows version of tmpfile() writes into the root
1711 * C:\ directory, which requires admin privileges. This
1712 * function provides an alternative implementation.
1713 * </pre>
1714 */
1715 FILE *
fopenWriteWinTempfile()1716 fopenWriteWinTempfile()
1717 {
1718 #ifdef _WIN32
1719 l_int32 handle;
1720 FILE *fp;
1721 char *filename;
1722
1723 PROCNAME("fopenWriteWinTempfile");
1724
1725 if ((filename = l_makeTempFilename()) == NULL) {
1726 L_ERROR("l_makeTempFilename failed, %s\n", procName, strerror(errno));
1727 return NULL;
1728 }
1729
1730 handle = _open(filename, _O_CREAT | _O_RDWR | _O_SHORT_LIVED |
1731 _O_TEMPORARY | _O_BINARY, _S_IREAD | _S_IWRITE);
1732 lept_free(filename);
1733 if (handle == -1) {
1734 L_ERROR("_open failed, %s\n", procName, strerror(errno));
1735 return NULL;
1736 }
1737
1738 if ((fp = _fdopen(handle, "r+b")) == NULL) {
1739 L_ERROR("_fdopen failed, %s\n", procName, strerror(errno));
1740 return NULL;
1741 }
1742
1743 return fp;
1744 #else
1745 return NULL;
1746 #endif /* _WIN32 */
1747 }
1748
1749
1750 /*--------------------------------------------------------------------*
1751 * Multi-platform functions that avoid C-runtime boundary *
1752 * crossing for applications with Windows DLLs *
1753 *--------------------------------------------------------------------*/
1754 /*
1755 * Problems arise when pointers to streams and data are passed
1756 * between two Windows DLLs that have been generated with different
1757 * C runtimes. To avoid this, leptonica provides wrappers for
1758 * several C library calls.
1759 */
1760 /*!
1761 * \brief lept_fopen()
1762 *
1763 * \param[in] filename
1764 * \param[in] mode same as for fopen(); e.g., "rb"
1765 * \return stream or NULL on error
1766 *
1767 * <pre>
1768 * Notes:
1769 * (1) This must be used by any application that passes
1770 * a file handle to a leptonica Windows DLL.
1771 * </pre>
1772 */
1773 FILE *
lept_fopen(const char * filename,const char * mode)1774 lept_fopen(const char *filename,
1775 const char *mode)
1776 {
1777 PROCNAME("lept_fopen");
1778
1779 if (!filename)
1780 return (FILE *)ERROR_PTR("filename not defined", procName, NULL);
1781 if (!mode)
1782 return (FILE *)ERROR_PTR("mode not defined", procName, NULL);
1783
1784 if (stringFindSubstr(mode, "r", NULL))
1785 return fopenReadStream(filename);
1786 else
1787 return fopenWriteStream(filename, mode);
1788 }
1789
1790
1791 /*!
1792 * \brief lept_fclose()
1793 *
1794 * \param[in] fp file stream
1795 * \return 0 if OK, 1 on error
1796 *
1797 * <pre>
1798 * Notes:
1799 * (1) This should be used by any application that accepts
1800 * a file handle generated by a leptonica Windows DLL.
1801 * </pre>
1802 */
1803 l_int32
lept_fclose(FILE * fp)1804 lept_fclose(FILE *fp)
1805 {
1806 PROCNAME("lept_fclose");
1807
1808 if (!fp)
1809 return ERROR_INT("stream not defined", procName, 1);
1810
1811 return fclose(fp);
1812 }
1813
1814
1815 /*!
1816 * \brief lept_calloc()
1817 *
1818 * \param[in] nmemb number of members
1819 * \param[in] size of each member
1820 * \return void ptr, or NULL on error
1821 *
1822 * <pre>
1823 * Notes:
1824 * (1) For safety with windows DLLs, this can be used in conjunction
1825 * with lept_free() to avoid C-runtime boundary problems.
1826 * Just use these two functions throughout your application.
1827 * </pre>
1828 */
1829 void *
lept_calloc(size_t nmemb,size_t size)1830 lept_calloc(size_t nmemb,
1831 size_t size)
1832 {
1833 if (nmemb <= 0 || size <= 0)
1834 return NULL;
1835 return LEPT_CALLOC(nmemb, size);
1836 }
1837
1838
1839 /*!
1840 * \brief lept_free()
1841 *
1842 * \param[in] ptr
1843 *
1844 * <pre>
1845 * Notes:
1846 * (1) This should be used by any application that accepts
1847 * heap data allocated by a leptonica Windows DLL.
1848 * </pre>
1849 */
1850 void
lept_free(void * ptr)1851 lept_free(void *ptr)
1852 {
1853 if (!ptr) return;
1854 LEPT_FREE(ptr);
1855 return;
1856 }
1857
1858
1859 /*--------------------------------------------------------------------*
1860 * Multi-platform file system operations *
1861 * [ These only write to /tmp or its subdirectories ] *
1862 *--------------------------------------------------------------------*/
1863 /*!
1864 * \brief lept_mkdir()
1865 *
1866 * \param[in] subdir of /tmp or its equivalent on Windows
1867 * \return 0 on success, non-zero on failure
1868 *
1869 * <pre>
1870 * Notes:
1871 * (1) %subdir is a partial path that can consist of one or more
1872 * directories.
1873 * (2) This makes any subdirectories of /tmp that are required.
1874 * (3) The root temp directory is:
1875 * /tmp (unix) [default]
1876 * <Temp> (windows)
1877 * </pre>
1878 */
1879 l_int32
lept_mkdir(const char * subdir)1880 lept_mkdir(const char *subdir)
1881 {
1882 char *dir, *tmpdir;
1883 l_int32 i, n;
1884 l_int32 ret = 0;
1885 SARRAY *sa;
1886 #ifdef _WIN32
1887 l_uint32 attributes;
1888 #endif /* _WIN32 */
1889
1890 PROCNAME("lept_mkdir");
1891
1892 if (!LeptDebugOK) {
1893 L_INFO("making named temp subdirectory %s is disabled\n",
1894 procName, subdir);
1895 return 0;
1896 }
1897
1898 if (!subdir)
1899 return ERROR_INT("subdir not defined", procName, 1);
1900 if ((strlen(subdir) == 0) || (subdir[0] == '.') || (subdir[0] == '/'))
1901 return ERROR_INT("subdir not an actual subdirectory", procName, 1);
1902
1903 sa = sarrayCreate(0);
1904 sarraySplitString(sa, subdir, "/");
1905 n = sarrayGetCount(sa);
1906 dir = genPathname("/tmp", NULL);
1907 /* Make sure the tmp directory exists */
1908 #ifndef _WIN32
1909 ret = mkdir(dir, 0777);
1910 #else
1911 attributes = GetFileAttributes(dir);
1912 if (attributes == INVALID_FILE_ATTRIBUTES)
1913 ret = (CreateDirectory(dir, NULL) ? 0 : 1);
1914 #endif
1915 /* Make all the subdirectories */
1916 for (i = 0; i < n; i++) {
1917 tmpdir = pathJoin(dir, sarrayGetString(sa, i, L_NOCOPY));
1918 #ifndef _WIN32
1919 ret += mkdir(tmpdir, 0777);
1920 #else
1921 if (CreateDirectory(tmpdir, NULL) == 0)
1922 ret += (GetLastError () != ERROR_ALREADY_EXISTS);
1923 #endif
1924 LEPT_FREE(dir);
1925 dir = tmpdir;
1926 }
1927 LEPT_FREE(dir);
1928 sarrayDestroy(&sa);
1929 if (ret > 0)
1930 L_ERROR("failure to create %d directories\n", procName, ret);
1931 return ret;
1932 }
1933
1934
1935 /*!
1936 * \brief lept_rmdir()
1937 *
1938 * \param[in] subdir of /tmp or its equivalent on Windows
1939 * \return 0 on success, non-zero on failure
1940 *
1941 * <pre>
1942 * Notes:
1943 * (1) %subdir is a partial path that can consist of one or more
1944 * directories.
1945 * (2) This removes all files from the specified subdirectory of
1946 * the root temp directory:
1947 * /tmp (unix)
1948 * <Temp> (windows)
1949 * and then removes the subdirectory.
1950 * (3) The combination
1951 * lept_rmdir(subdir);
1952 * lept_mkdir(subdir);
1953 * is guaranteed to give you an empty subdirectory.
1954 * </pre>
1955 */
1956 l_int32
lept_rmdir(const char * subdir)1957 lept_rmdir(const char *subdir)
1958 {
1959 char *dir, *realdir, *fname, *fullname;
1960 l_int32 exists, ret, i, nfiles;
1961 SARRAY *sa;
1962 #ifdef _WIN32
1963 char *newpath;
1964 #endif /* _WIN32 */
1965
1966 PROCNAME("lept_rmdir");
1967
1968 if (!subdir)
1969 return ERROR_INT("subdir not defined", procName, 1);
1970 if ((strlen(subdir) == 0) || (subdir[0] == '.') || (subdir[0] == '/'))
1971 return ERROR_INT("subdir not an actual subdirectory", procName, 1);
1972
1973 /* Find the temp subdirectory */
1974 dir = pathJoin("/tmp", subdir);
1975 if (!dir)
1976 return ERROR_INT("directory name not made", procName, 1);
1977 lept_direxists(dir, &exists);
1978 if (!exists) { /* fail silently */
1979 LEPT_FREE(dir);
1980 return 0;
1981 }
1982
1983 /* List all the files in that directory */
1984 if ((sa = getFilenamesInDirectory(dir)) == NULL) {
1985 L_ERROR("directory %s does not exist!\n", procName, dir);
1986 LEPT_FREE(dir);
1987 return 1;
1988 }
1989 nfiles = sarrayGetCount(sa);
1990
1991 for (i = 0; i < nfiles; i++) {
1992 fname = sarrayGetString(sa, i, L_NOCOPY);
1993 fullname = genPathname(dir, fname);
1994 remove(fullname);
1995 LEPT_FREE(fullname);
1996 }
1997
1998 #ifndef _WIN32
1999 realdir = genPathname("/tmp", subdir);
2000 ret = rmdir(realdir);
2001 LEPT_FREE(realdir);
2002 #else
2003 newpath = genPathname(dir, NULL);
2004 ret = (RemoveDirectory(newpath) ? 0 : 1);
2005 LEPT_FREE(newpath);
2006 #endif /* !_WIN32 */
2007
2008 sarrayDestroy(&sa);
2009 LEPT_FREE(dir);
2010 return ret;
2011 }
2012
2013
2014 /*!
2015 * \brief lept_direxists()
2016 *
2017 * \param[in] dir
2018 * \param[out] pexists 1 if it exists; 0 otherwise
2019 * \return void
2020 *
2021 * <pre>
2022 * Notes:
2023 * (1) Always use unix pathname separators.
2024 * (2) By calling genPathname(), if the pathname begins with "/tmp"
2025 * this does an automatic directory translation on windows
2026 * to a path in the windows <Temp> directory:
2027 * "/tmp" ==> <Temp> (windows)
2028 * </pre>
2029 */
2030 void
lept_direxists(const char * dir,l_int32 * pexists)2031 lept_direxists(const char *dir,
2032 l_int32 *pexists)
2033 {
2034 char *realdir;
2035
2036 if (!pexists) return;
2037 *pexists = 0;
2038 if (!dir) return;
2039 if ((realdir = genPathname(dir, NULL)) == NULL)
2040 return;
2041
2042 #ifndef _WIN32
2043 {
2044 struct stat s;
2045 l_int32 err = stat(realdir, &s);
2046 if (err != -1 && S_ISDIR(s.st_mode))
2047 *pexists = 1;
2048 }
2049 #else /* _WIN32 */
2050 l_uint32 attributes;
2051 attributes = GetFileAttributes(realdir);
2052 if (attributes != INVALID_FILE_ATTRIBUTES &&
2053 (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
2054 *pexists = 1;
2055 }
2056 #endif /* _WIN32 */
2057
2058 LEPT_FREE(realdir);
2059 return;
2060 }
2061
2062
2063 /*!
2064 * \brief lept_rm_match()
2065 *
2066 * \param[in] subdir [optional] If NULL, the removed files are in /tmp
2067 * \param[in] substr [optional] pattern to match in filename
2068 * \return 0 on success, non-zero on failure
2069 *
2070 * <pre>
2071 * Notes:
2072 * (1) This removes the matched files in /tmp or a subdirectory of /tmp.
2073 * Use NULL for %subdir if the files are in /tmp.
2074 * (2) If %substr == NULL, this removes all files in the directory.
2075 * If %substr == "" (empty), this removes no files.
2076 * If both %subdir == NULL and %substr == NULL, this removes
2077 * all files in /tmp.
2078 * (3) Use unix pathname separators.
2079 * (4) By calling genPathname(), if the pathname begins with "/tmp"
2080 * this does an automatic directory translation on windows
2081 * to a path in the windows <Temp> directory:
2082 * "/tmp" ==> <Temp> (windows)
2083 * (5) Error conditions:
2084 * * returns -1 if the directory is not found
2085 * * returns the number of files (> 0) that it was unable to remove.
2086 * </pre>
2087 */
2088 l_int32
lept_rm_match(const char * subdir,const char * substr)2089 lept_rm_match(const char *subdir,
2090 const char *substr)
2091 {
2092 char *path, *fname;
2093 char tempdir[256];
2094 l_int32 i, n, ret;
2095 SARRAY *sa;
2096
2097 PROCNAME("lept_rm_match");
2098
2099 makeTempDirname(tempdir, 256, subdir);
2100 if ((sa = getSortedPathnamesInDirectory(tempdir, substr, 0, 0)) == NULL)
2101 return ERROR_INT("sa not made", procName, -1);
2102 n = sarrayGetCount(sa);
2103 if (n == 0) {
2104 L_WARNING("no matching files found\n", procName);
2105 sarrayDestroy(&sa);
2106 return 0;
2107 }
2108
2109 ret = 0;
2110 for (i = 0; i < n; i++) {
2111 fname = sarrayGetString(sa, i, L_NOCOPY);
2112 path = genPathname(fname, NULL);
2113 if (lept_rmfile(path) != 0) {
2114 L_ERROR("failed to remove %s\n", procName, path);
2115 ret++;
2116 }
2117 LEPT_FREE(path);
2118 }
2119 sarrayDestroy(&sa);
2120 return ret;
2121 }
2122
2123
2124 /*!
2125 * \brief lept_rm()
2126 *
2127 * \param[in] subdir [optional] of '/tmp'; can be NULL
2128 * \param[in] tail filename without the directory
2129 * \return 0 on success, non-zero on failure
2130 *
2131 * <pre>
2132 * Notes:
2133 * (1) By calling genPathname(), this does an automatic directory
2134 * translation on windows to a path in the windows <Temp> directory:
2135 * "/tmp/..." ==> <Temp>/... (windows)
2136 * </pre>
2137 */
2138 l_int32
lept_rm(const char * subdir,const char * tail)2139 lept_rm(const char *subdir,
2140 const char *tail)
2141 {
2142 char *path;
2143 char newtemp[256];
2144 l_int32 ret;
2145
2146 PROCNAME("lept_rm");
2147
2148 if (!tail || strlen(tail) == 0)
2149 return ERROR_INT("tail undefined or empty", procName, 1);
2150
2151 if (makeTempDirname(newtemp, 256, subdir))
2152 return ERROR_INT("temp dirname not made", procName, 1);
2153 path = genPathname(newtemp, tail);
2154 ret = lept_rmfile(path);
2155 LEPT_FREE(path);
2156 return ret;
2157 }
2158
2159
2160 /*!
2161 * \brief
2162 *
2163 * lept_rmfile()
2164 *
2165 * \param[in] filepath full path to file including the directory
2166 * \return 0 on success, non-zero on failure
2167 *
2168 * <pre>
2169 * Notes:
2170 * (1) This removes the named file.
2171 * (2) Use unix pathname separators.
2172 * (3) There is no name translation.
2173 * (4) Unlike the other lept_* functions in this section, this can remove
2174 * any file -- it is not restricted to files that are in /tmp or a
2175 * subdirectory of it.
2176 * </pre>
2177 */
2178 l_int32
lept_rmfile(const char * filepath)2179 lept_rmfile(const char *filepath)
2180 {
2181 l_int32 ret;
2182
2183 PROCNAME("lept_rmfile");
2184
2185 if (!filepath || strlen(filepath) == 0)
2186 return ERROR_INT("filepath undefined or empty", procName, 1);
2187
2188 #ifndef _WIN32
2189 ret = remove(filepath);
2190 #else
2191 /* Set attributes to allow deletion of read-only files */
2192 SetFileAttributes(filepath, FILE_ATTRIBUTE_NORMAL);
2193 ret = DeleteFile(filepath) ? 0 : 1;
2194 #endif /* !_WIN32 */
2195
2196 return ret;
2197 }
2198
2199
2200 /*!
2201 * \brief lept_mv()
2202 *
2203 * \param[in] srcfile
2204 * \param[in] newdir [optional]; can be NULL
2205 * \param[in] newtail [optional]; can be NULL
2206 * \param[out] pnewpath [optional] of actual path; can be NULL
2207 * \return 0 on success, non-zero on failure
2208 *
2209 * <pre>
2210 * Notes:
2211 * (1) This moves %srcfile to /tmp or to a subdirectory of /tmp.
2212 * (2) %srcfile can either be a full path or relative to the
2213 * current directory.
2214 * (3) %newdir can either specify an existing subdirectory of /tmp
2215 * or can be NULL. In the latter case, the file will be written
2216 * into /tmp.
2217 * (4) %newtail can either specify a filename tail or, if NULL,
2218 * the filename is taken from src-tail, the tail of %srcfile.
2219 * (5) For debugging, the computed newpath can be returned. It must
2220 * be freed by the caller.
2221 * (6) Reminders:
2222 * (a) specify files using unix pathnames
2223 * (b) for windows, translates
2224 * /tmp ==> <Temp>
2225 * where <Temp> is the windows temp directory
2226 * (7) Examples:
2227 * * newdir = NULL, newtail = NULL ==> /tmp/src-tail
2228 * * newdir = NULL, newtail = abc ==> /tmp/abc
2229 * * newdir = def/ghi, newtail = NULL ==> /tmp/def/ghi/src-tail
2230 * * newdir = def/ghi, newtail = abc ==> /tmp/def/ghi/abc
2231 * </pre>
2232 */
2233 l_int32
lept_mv(const char * srcfile,const char * newdir,const char * newtail,char ** pnewpath)2234 lept_mv(const char *srcfile,
2235 const char *newdir,
2236 const char *newtail,
2237 char **pnewpath)
2238 {
2239 char *srcpath, *newpath, *realpath, *dir, *srctail;
2240 char newtemp[256];
2241 l_int32 ret;
2242
2243 PROCNAME("lept_mv");
2244
2245 if (!srcfile)
2246 return ERROR_INT("srcfile not defined", procName, 1);
2247
2248 /* Require output pathname to be in /tmp/ or a subdirectory */
2249 if (makeTempDirname(newtemp, 256, newdir) == 1)
2250 return ERROR_INT("newdir not NULL or a subdir of /tmp", procName, 1);
2251
2252 /* Get canonical src pathname */
2253 splitPathAtDirectory(srcfile, &dir, &srctail);
2254
2255 #ifndef _WIN32
2256 srcpath = pathJoin(dir, srctail);
2257 LEPT_FREE(dir);
2258
2259 /* Generate output pathname */
2260 if (!newtail || newtail[0] == '\0')
2261 newpath = pathJoin(newtemp, srctail);
2262 else
2263 newpath = pathJoin(newtemp, newtail);
2264 LEPT_FREE(srctail);
2265
2266 /* Overwrite any existing file at 'newpath' */
2267 ret = fileCopy(srcpath, newpath);
2268 if (!ret) {
2269 realpath = genPathname(srcpath, NULL);
2270 remove(realpath);
2271 LEPT_FREE(realpath);
2272 }
2273 #else
2274 srcpath = genPathname(dir, srctail);
2275 LEPT_FREE(dir);
2276
2277 /* Generate output pathname */
2278 if (!newtail || newtail[0] == '\0')
2279 newpath = genPathname(newtemp, srctail);
2280 else
2281 newpath = genPathname(newtemp, newtail);
2282 LEPT_FREE(srctail);
2283
2284 /* Overwrite any existing file at 'newpath' */
2285 ret = MoveFileEx(srcpath, newpath,
2286 MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING) ? 0 : 1;
2287 #endif /* ! _WIN32 */
2288
2289 LEPT_FREE(srcpath);
2290 if (pnewpath)
2291 *pnewpath = newpath;
2292 else
2293 LEPT_FREE(newpath);
2294 return ret;
2295 }
2296
2297
2298 /*!
2299 * \brief lept_cp()
2300 *
2301 * \param[in] srcfile
2302 * \param[in] newdir [optional]; can be NULL
2303 * \param[in] newtail [optional]; can be NULL
2304 * \param[out] pnewpath [optional] of actual path; can be NULL
2305 * \return 0 on success, non-zero on failure
2306 *
2307 * <pre>
2308 * Notes:
2309 * (1) This copies %srcfile to /tmp or to a subdirectory of /tmp.
2310 * (2) %srcfile can either be a full path or relative to the
2311 * current directory.
2312 * (3) %newdir can either specify an existing subdirectory of /tmp,
2313 * or can be NULL. In the latter case, the file will be written
2314 * into /tmp.
2315 * (4) %newtail can either specify a filename tail or, if NULL,
2316 * the filename is taken from src-tail, the tail of %srcfile.
2317 * (5) For debugging, the computed newpath can be returned. It must
2318 * be freed by the caller.
2319 * (6) Reminders:
2320 * (a) specify files using unix pathnames
2321 * (b) for windows, translates
2322 * /tmp ==> <Temp>
2323 * where <Temp> is the windows temp directory
2324 * (7) Examples:
2325 * * newdir = NULL, newtail = NULL ==> /tmp/src-tail
2326 * * newdir = NULL, newtail = abc ==> /tmp/abc
2327 * * newdir = def/ghi, newtail = NULL ==> /tmp/def/ghi/src-tail
2328 * * newdir = def/ghi, newtail = abc ==> /tmp/def/ghi/abc
2329 *
2330 * </pre>
2331 */
2332 l_int32
lept_cp(const char * srcfile,const char * newdir,const char * newtail,char ** pnewpath)2333 lept_cp(const char *srcfile,
2334 const char *newdir,
2335 const char *newtail,
2336 char **pnewpath)
2337 {
2338 char *srcpath, *newpath, *dir, *srctail;
2339 char newtemp[256];
2340 l_int32 ret;
2341
2342 PROCNAME("lept_cp");
2343
2344 if (!srcfile)
2345 return ERROR_INT("srcfile not defined", procName, 1);
2346
2347 /* Require output pathname to be in /tmp or a subdirectory */
2348 if (makeTempDirname(newtemp, 256, newdir) == 1)
2349 return ERROR_INT("newdir not NULL or a subdir of /tmp", procName, 1);
2350
2351 /* Get canonical src pathname */
2352 splitPathAtDirectory(srcfile, &dir, &srctail);
2353
2354 #ifndef _WIN32
2355 srcpath = pathJoin(dir, srctail);
2356 LEPT_FREE(dir);
2357
2358 /* Generate output pathname */
2359 if (!newtail || newtail[0] == '\0')
2360 newpath = pathJoin(newtemp, srctail);
2361 else
2362 newpath = pathJoin(newtemp, newtail);
2363 LEPT_FREE(srctail);
2364
2365 /* Overwrite any existing file at 'newpath' */
2366 ret = fileCopy(srcpath, newpath);
2367 #else
2368 srcpath = genPathname(dir, srctail);
2369 LEPT_FREE(dir);
2370
2371 /* Generate output pathname */
2372 if (!newtail || newtail[0] == '\0')
2373 newpath = genPathname(newtemp, srctail);
2374 else
2375 newpath = genPathname(newtemp, newtail);
2376 LEPT_FREE(srctail);
2377
2378 /* Overwrite any existing file at 'newpath' */
2379 ret = CopyFile(srcpath, newpath, FALSE) ? 0 : 1;
2380 #endif /* !_WIN32 */
2381
2382 LEPT_FREE(srcpath);
2383 if (pnewpath)
2384 *pnewpath = newpath;
2385 else
2386 LEPT_FREE(newpath);
2387 return ret;
2388 }
2389
2390
2391 /*--------------------------------------------------------------------*
2392 * General file name operations *
2393 *--------------------------------------------------------------------*/
2394 /*!
2395 * \brief splitPathAtDirectory()
2396 *
2397 * \param[in] pathname full path; can be a directory
2398 * \param[out] pdir [optional] root directory name of
2399 * input path, including trailing '/'
2400 * \param[out] ptail [optional] path tail, which is either
2401 * the file name within the root directory or
2402 * the last sub-directory in the path
2403 * \return 0 if OK, 1 on error
2404 *
2405 * <pre>
2406 * Notes:
2407 * (1) If you only want the tail, input null for the root directory ptr.
2408 * (2) If you only want the root directory name, input null for the
2409 * tail ptr.
2410 * (3) This function makes decisions based only on the lexical
2411 * structure of the input. Examples:
2412 * /usr/tmp/abc --> dir: /usr/tmp/ tail: abc
2413 * /usr/tmp/ --> dir: /usr/tmp/ tail: [empty string]
2414 * /usr/tmp --> dir: /usr/ tail: tmp
2415 * abc --> dir: [empty string] tail: abc
2416 * (4) The input can have either forward (unix) or backward (win)
2417 * slash separators. The output has unix separators.
2418 * Note that Win32 pathname functions generally accept both
2419 * slash forms, but the windows command line interpreter
2420 * only accepts backward slashes, because forward slashes are
2421 * used to demarcate switches (vs. dashes in unix).
2422 * </pre>
2423 */
2424 l_int32
splitPathAtDirectory(const char * pathname,char ** pdir,char ** ptail)2425 splitPathAtDirectory(const char *pathname,
2426 char **pdir,
2427 char **ptail)
2428 {
2429 char *cpathname, *lastslash;
2430
2431 PROCNAME("splitPathAtDirectory");
2432
2433 if (!pdir && !ptail)
2434 return ERROR_INT("null input for both strings", procName, 1);
2435 if (pdir) *pdir = NULL;
2436 if (ptail) *ptail = NULL;
2437 if (!pathname)
2438 return ERROR_INT("pathname not defined", procName, 1);
2439
2440 cpathname = stringNew(pathname);
2441 convertSepCharsInPath(cpathname, UNIX_PATH_SEPCHAR);
2442 lastslash = strrchr(cpathname, '/');
2443 if (lastslash) {
2444 if (ptail)
2445 *ptail = stringNew(lastslash + 1);
2446 if (pdir) {
2447 *(lastslash + 1) = '\0';
2448 *pdir = cpathname;
2449 } else {
2450 LEPT_FREE(cpathname);
2451 }
2452 } else { /* no directory */
2453 if (pdir)
2454 *pdir = stringNew("");
2455 if (ptail)
2456 *ptail = cpathname;
2457 else
2458 LEPT_FREE(cpathname);
2459 }
2460
2461 return 0;
2462 }
2463
2464
2465 /*!
2466 * \brief splitPathAtExtension()
2467 *
2468 * \param[in] pathname full path; can be a directory
2469 * \param[out] pbasename [optional] pathname not including the
2470 * last dot and characters after that
2471 * \param[out] pextension [optional] path extension, which is
2472 * the last dot and the characters after it. If
2473 * there is no extension, it returns the empty string
2474 * \return 0 if OK, 1 on error
2475 *
2476 * <pre>
2477 * Notes:
2478 * (1) If you only want the extension, input null for the basename ptr.
2479 * (2) If you only want the basename without extension, input null
2480 * for the extension ptr.
2481 * (3) This function makes decisions based only on the lexical
2482 * structure of the input. Examples:
2483 * /usr/tmp/abc.jpg --> basename: /usr/tmp/abc ext: .jpg
2484 * /usr/tmp/.jpg --> basename: /usr/tmp/ ext: .jpg
2485 * /usr/tmp.jpg/ --> basename: /usr/tmp.jpg/ ext: [empty str]
2486 * ./.jpg --> basename: ./ ext: .jpg
2487 * (4) The input can have either forward (unix) or backward (win)
2488 * slash separators. The output has unix separators.
2489 * </pre>
2490 */
2491 l_int32
splitPathAtExtension(const char * pathname,char ** pbasename,char ** pextension)2492 splitPathAtExtension(const char *pathname,
2493 char **pbasename,
2494 char **pextension)
2495 {
2496 char *tail, *dir, *lastdot;
2497 char empty[4] = "";
2498
2499 PROCNAME("splitPathExtension");
2500
2501 if (!pbasename && !pextension)
2502 return ERROR_INT("null input for both strings", procName, 1);
2503 if (pbasename) *pbasename = NULL;
2504 if (pextension) *pextension = NULL;
2505 if (!pathname)
2506 return ERROR_INT("pathname not defined", procName, 1);
2507
2508 /* Split out the directory first */
2509 splitPathAtDirectory(pathname, &dir, &tail);
2510
2511 /* Then look for a "." in the tail part.
2512 * This way we ignore all "." in the directory. */
2513 if ((lastdot = strrchr(tail, '.'))) {
2514 if (pextension)
2515 *pextension = stringNew(lastdot);
2516 if (pbasename) {
2517 *lastdot = '\0';
2518 *pbasename = stringJoin(dir, tail);
2519 }
2520 } else {
2521 if (pextension)
2522 *pextension = stringNew(empty);
2523 if (pbasename)
2524 *pbasename = stringNew(pathname);
2525 }
2526 LEPT_FREE(dir);
2527 LEPT_FREE(tail);
2528 return 0;
2529 }
2530
2531
2532 /*!
2533 * \brief pathJoin()
2534 *
2535 * \param[in] dir [optional] can be null
2536 * \param[in] fname [optional] can be null
2537 * \return specially concatenated path, or NULL on error
2538 *
2539 * <pre>
2540 * Notes:
2541 * (1) Use unix-style pathname separators ('/').
2542 * (2) %fname can be the entire path, or part of the path containing
2543 * at least one directory, or a tail without a directory, or NULL.
2544 * (3) It produces a path that strips multiple slashes to a single
2545 * slash, joins %dir and %fname by a slash, and has no trailing
2546 * slashes (except in the cases where %dir == "/" and
2547 * %fname == NULL, or v.v.).
2548 * (4) If both %dir and %fname are null, produces an empty string.
2549 * (5) Neither %dir nor %fname can begin with '..'.
2550 * (6) The result is not canonicalized or tested for correctness:
2551 * garbage in (e.g., /&%), garbage out.
2552 * (7) Examples:
2553 * //tmp// + //abc/ --> /tmp/abc
2554 * tmp/ + /abc/ --> tmp/abc
2555 * tmp/ + abc/ --> tmp/abc
2556 * /tmp/ + /// --> /tmp
2557 * /tmp/ + NULL --> /tmp
2558 * // + /abc// --> /abc
2559 * // + NULL --> /
2560 * NULL + /abc/def/ --> /abc/def
2561 * NULL + abc// --> abc
2562 * NULL + // --> /
2563 * NULL + NULL --> (empty string)
2564 * "" + "" --> (empty string)
2565 * "" + / --> /
2566 * ".." + /etc/foo --> NULL
2567 * /tmp + ".." --> NULL
2568 * </pre>
2569 */
2570 char *
pathJoin(const char * dir,const char * fname)2571 pathJoin(const char *dir,
2572 const char *fname)
2573 {
2574 char *slash = (char *)"/";
2575 char *str, *dest;
2576 l_int32 i, n1, n2, emptydir;
2577 size_t size;
2578 SARRAY *sa1, *sa2;
2579 L_BYTEA *ba;
2580
2581 PROCNAME("pathJoin");
2582
2583 if (!dir && !fname)
2584 return stringNew("");
2585 if (dir && strlen(dir) >= 2 && dir[0] == '.' && dir[1] == '.')
2586 return (char *)ERROR_PTR("dir starts with '..'", procName, NULL);
2587 if (fname && strlen(fname) >= 2 && fname[0] == '.' && fname[1] == '.')
2588 return (char *)ERROR_PTR("fname starts with '..'", procName, NULL);
2589
2590 sa1 = sarrayCreate(0);
2591 sa2 = sarrayCreate(0);
2592 ba = l_byteaCreate(4);
2593
2594 /* Process %dir */
2595 if (dir && strlen(dir) > 0) {
2596 if (dir[0] == '/')
2597 l_byteaAppendString(ba, slash);
2598 sarraySplitString(sa1, dir, "/"); /* removes all slashes */
2599 n1 = sarrayGetCount(sa1);
2600 for (i = 0; i < n1; i++) {
2601 str = sarrayGetString(sa1, i, L_NOCOPY);
2602 l_byteaAppendString(ba, str);
2603 l_byteaAppendString(ba, slash);
2604 }
2605 }
2606
2607 /* Special case to add leading slash: dir NULL or empty string */
2608 emptydir = dir && strlen(dir) == 0;
2609 if ((!dir || emptydir) && fname && strlen(fname) > 0 && fname[0] == '/')
2610 l_byteaAppendString(ba, slash);
2611
2612 /* Process %fname */
2613 if (fname && strlen(fname) > 0) {
2614 sarraySplitString(sa2, fname, "/");
2615 n2 = sarrayGetCount(sa2);
2616 for (i = 0; i < n2; i++) {
2617 str = sarrayGetString(sa2, i, L_NOCOPY);
2618 l_byteaAppendString(ba, str);
2619 l_byteaAppendString(ba, slash);
2620 }
2621 }
2622
2623 /* Remove trailing slash */
2624 dest = (char *)l_byteaCopyData(ba, &size);
2625 if (size > 1 && dest[size - 1] == '/')
2626 dest[size - 1] = '\0';
2627
2628 sarrayDestroy(&sa1);
2629 sarrayDestroy(&sa2);
2630 l_byteaDestroy(&ba);
2631 return dest;
2632 }
2633
2634
2635 /*!
2636 * \brief appendSubdirs()
2637 *
2638 * \param[in] basedir
2639 * \param[in] subdirs
2640 * \return concatenated full directory path without trailing slash,
2641 * or NULL on error
2642 *
2643 * <pre>
2644 * Notes:
2645 * (1) Use unix pathname separators
2646 * (2) Allocates a new string: <basedir>/<subdirs>
2647 * </pre>
2648 */
2649 char *
appendSubdirs(const char * basedir,const char * subdirs)2650 appendSubdirs(const char *basedir,
2651 const char *subdirs)
2652 {
2653 char *newdir;
2654 size_t len1, len2, len3, len4;
2655
2656 PROCNAME("appendSubdirs");
2657
2658 if (!basedir || !subdirs)
2659 return (char *)ERROR_PTR("basedir and subdirs not both defined",
2660 procName, NULL);
2661
2662 len1 = strlen(basedir);
2663 len2 = strlen(subdirs);
2664 len3 = len1 + len2 + 6;
2665 if ((newdir = (char *)LEPT_CALLOC(len3 + 1, 1)) == NULL)
2666 return (char *)ERROR_PTR("newdir not made", procName, NULL);
2667 strncat(newdir, basedir, len3); /* add basedir */
2668 if (newdir[len1 - 1] != '/') /* add '/' if necessary */
2669 newdir[len1] = '/';
2670 if (subdirs[0] == '/') /* add subdirs, stripping leading '/' */
2671 strncat(newdir, subdirs + 1, len3);
2672 else
2673 strncat(newdir, subdirs, len3);
2674 len4 = strlen(newdir);
2675 if (newdir[len4 - 1] == '/') /* strip trailing '/' */
2676 newdir[len4 - 1] = '\0';
2677
2678 return newdir;
2679 }
2680
2681
2682 /*--------------------------------------------------------------------*
2683 * Special file name operations *
2684 *--------------------------------------------------------------------*/
2685 /*!
2686 * \brief convertSepCharsInPath()
2687 *
2688 * \param[in] path
2689 * \param[in] type UNIX_PATH_SEPCHAR, WIN_PATH_SEPCHAR
2690 * \return 0 if OK, 1 on error
2691 *
2692 * <pre>
2693 * Notes:
2694 * (1) In-place conversion.
2695 * (2) Type is the resulting type:
2696 * * UNIX_PATH_SEPCHAR: '\\' ==> '/'
2697 * * WIN_PATH_SEPCHAR: '/' ==> '\\'
2698 * (3) Virtually all path operations in leptonica use unix separators.
2699 * </pre>
2700 */
2701 l_int32
convertSepCharsInPath(char * path,l_int32 type)2702 convertSepCharsInPath(char *path,
2703 l_int32 type)
2704 {
2705 l_int32 i;
2706 size_t len;
2707
2708 PROCNAME("convertSepCharsInPath");
2709 if (!path)
2710 return ERROR_INT("path not defined", procName, 1);
2711 if (type != UNIX_PATH_SEPCHAR && type != WIN_PATH_SEPCHAR)
2712 return ERROR_INT("invalid type", procName, 1);
2713
2714 len = strlen(path);
2715 if (type == UNIX_PATH_SEPCHAR) {
2716 for (i = 0; i < len; i++) {
2717 if (path[i] == '\\')
2718 path[i] = '/';
2719 }
2720 } else { /* WIN_PATH_SEPCHAR */
2721 for (i = 0; i < len; i++) {
2722 if (path[i] == '/')
2723 path[i] = '\\';
2724 }
2725 }
2726 return 0;
2727 }
2728
2729
2730 /*!
2731 * \brief genPathname()
2732 *
2733 * \param[in] dir [optional] directory or full path name, with or without
2734 * trailing '/'
2735 * \param[in] fname [optional] file name within a directory
2736 * \return pathname either a directory or full path, or NULL on error
2737 *
2738 * <pre>
2739 * Notes:
2740 * (1) This function generates actual paths in the following ways:
2741 * * from two sub-parts (e.g., a directory and a file name).
2742 * * from a single path full path, placed in %dir, with
2743 * %fname == NULL.
2744 * * from the name of a file in the local directory placed in
2745 * %fname, with %dir == NULL.
2746 * * if in a "/tmp" directory and on windows, the windows
2747 * temp directory is used.
2748 * (2) On windows, if the root of %dir is '/tmp', this does a name
2749 * translation:
2750 * "/tmp" ==> <Temp> (windows)
2751 * where <Temp> is the windows temp directory.
2752 * (3) On unix, the TMPDIR variable is ignored. No rewriting
2753 * of temp directories is permitted.
2754 * (4) There are four cases for the input:
2755 * (a) %dir is a directory and %fname is defined: result is a full path
2756 * (b) %dir is a directory and %fname is null: result is a directory
2757 * (c) %dir is a full path and %fname is null: result is a full path
2758 * (d) %dir is null or an empty string: start in the current dir;
2759 * result is a full path
2760 * (5) In all cases, the resulting pathname is not terminated with a slash
2761 * (6) The caller is responsible for freeing the returned pathname.
2762 * </pre>
2763 */
2764 char *
genPathname(const char * dir,const char * fname)2765 genPathname(const char *dir,
2766 const char *fname)
2767 {
2768 l_int32 is_win32 = FALSE;
2769 char *cdir, *pathout;
2770 l_int32 dirlen, namelen, size;
2771
2772 PROCNAME("genPathname");
2773
2774 if (!dir && !fname)
2775 return (char *)ERROR_PTR("no input", procName, NULL);
2776
2777 /* Handle the case where we start from the current directory */
2778 if (!dir || dir[0] == '\0') {
2779 if ((cdir = getcwd(NULL, 0)) == NULL)
2780 return (char *)ERROR_PTR("no current dir found", procName, NULL);
2781 } else {
2782 cdir = stringNew(dir);
2783 }
2784
2785 /* Convert to unix path separators, and remove the trailing
2786 * slash in the directory, except when dir == "/" */
2787 convertSepCharsInPath(cdir, UNIX_PATH_SEPCHAR);
2788 dirlen = strlen(cdir);
2789 if (cdir[dirlen - 1] == '/' && dirlen != 1) {
2790 cdir[dirlen - 1] = '\0';
2791 dirlen--;
2792 }
2793
2794 namelen = (fname) ? strlen(fname) : 0;
2795 size = dirlen + namelen + 256;
2796 if ((pathout = (char *)LEPT_CALLOC(size, sizeof(char))) == NULL) {
2797 LEPT_FREE(cdir);
2798 return (char *)ERROR_PTR("pathout not made", procName, NULL);
2799 }
2800
2801 #ifdef _WIN32
2802 is_win32 = TRUE;
2803 #endif /* _WIN32 */
2804
2805 /* First handle %dir (which may be a full pathname).
2806 * There is no path rewriting on unix, and on win32, we do not
2807 * rewrite unless the specified directory is /tmp or
2808 * a subdirectory of /tmp */
2809 if (!is_win32 || dirlen < 4 ||
2810 (dirlen == 4 && strncmp(cdir, "/tmp", 4) != 0) || /* not in "/tmp" */
2811 (dirlen > 4 && strncmp(cdir, "/tmp/", 5) != 0)) { /* not in "/tmp/" */
2812 stringCopy(pathout, cdir, dirlen);
2813 } else { /* Rewrite for win32 with "/tmp" specified for the directory. */
2814 #ifdef _WIN32
2815 l_int32 tmpdirlen;
2816 char tmpdir[MAX_PATH];
2817 GetTempPath(sizeof(tmpdir), tmpdir); /* get the windows temp dir */
2818 tmpdirlen = strlen(tmpdir);
2819 if (tmpdirlen > 0 && tmpdir[tmpdirlen - 1] == '\\') {
2820 tmpdir[tmpdirlen - 1] = '\0'; /* trim the trailing '\' */
2821 }
2822 tmpdirlen = strlen(tmpdir);
2823 stringCopy(pathout, tmpdir, tmpdirlen);
2824
2825 /* Add the rest of cdir */
2826 if (dirlen > 4)
2827 stringCat(pathout, size, cdir + 4);
2828 #endif /* _WIN32 */
2829 }
2830
2831 /* Now handle %fname */
2832 if (fname && strlen(fname) > 0) {
2833 dirlen = strlen(pathout);
2834 pathout[dirlen] = '/';
2835 strncat(pathout, fname, namelen);
2836 }
2837
2838 LEPT_FREE(cdir);
2839 return pathout;
2840 }
2841
2842
2843 /*!
2844 * \brief makeTempDirname()
2845 *
2846 * \param[in] result preallocated on stack or heap and passed in
2847 * \param[in] nbytes size of %result array, in bytes
2848 * \param[in] subdir [optional]; can be NULL or an empty string
2849 * \return 0 if OK, 1 on error
2850 *
2851 * <pre>
2852 * Notes:
2853 * (1) This generates the directory path for output temp files,
2854 * written into %result with unix separators.
2855 * (2) Caller allocates %result, large enough to hold the path,
2856 * which is:
2857 * /tmp/%subdir (unix)
2858 * <Temp>/%subdir (windows)
2859 * where <Temp> is a path on windows determined by GenTempPath()
2860 * and %subdir is in general a set of nested subdirectories:
2861 * dir1/dir2/.../dirN
2862 * which in use would not typically exceed 2 levels.
2863 * (3) Usage example:
2864 * \code
2865 * char result[256];
2866 * makeTempDirname(result, 256, "lept/golden");
2867 * \endcode
2868 * </pre>
2869 */
2870 l_int32
makeTempDirname(char * result,size_t nbytes,const char * subdir)2871 makeTempDirname(char *result,
2872 size_t nbytes,
2873 const char *subdir)
2874 {
2875 char *dir, *path;
2876 l_int32 ret = 0;
2877 size_t pathlen;
2878
2879 PROCNAME("makeTempDirname");
2880
2881 if (!result)
2882 return ERROR_INT("result not defined", procName, 1);
2883 if (subdir && ((subdir[0] == '.') || (subdir[0] == '/')))
2884 return ERROR_INT("subdir not an actual subdirectory", procName, 1);
2885
2886 memset(result, 0, nbytes);
2887 dir = pathJoin("/tmp", subdir);
2888 #ifndef _WIN32
2889 path = stringNew(dir);
2890 #else
2891 path = genPathname(dir, NULL);
2892 #endif /* ~ _WIN32 */
2893 pathlen = strlen(path);
2894 if (pathlen < nbytes - 1) {
2895 strncpy(result, path, pathlen);
2896 } else {
2897 L_ERROR("result array too small for path\n", procName);
2898 ret = 1;
2899 }
2900
2901 LEPT_FREE(dir);
2902 LEPT_FREE(path);
2903 return ret;
2904 }
2905
2906
2907 /*!
2908 * \brief modifyTrailingSlash()
2909 *
2910 * \param[in] path preallocated on stack or heap and passed in
2911 * \param[in] nbytes size of %path array, in bytes
2912 * \param[in] flag L_ADD_TRAIL_SLASH or L_REMOVE_TRAIL_SLASH
2913 * \return 0 if OK, 1 on error
2914 *
2915 * <pre>
2916 * Notes:
2917 * (1) This carries out the requested action if necessary.
2918 * </pre>
2919 */
2920 l_int32
modifyTrailingSlash(char * path,size_t nbytes,l_int32 flag)2921 modifyTrailingSlash(char *path,
2922 size_t nbytes,
2923 l_int32 flag)
2924 {
2925 char lastchar;
2926 size_t len;
2927
2928 PROCNAME("modifyTrailingSlash");
2929
2930 if (!path)
2931 return ERROR_INT("path not defined", procName, 1);
2932 if (flag != L_ADD_TRAIL_SLASH && flag != L_REMOVE_TRAIL_SLASH)
2933 return ERROR_INT("invalid flag", procName, 1);
2934
2935 len = strlen(path);
2936 lastchar = path[len - 1];
2937 if (flag == L_ADD_TRAIL_SLASH && lastchar != '/' && len < nbytes - 2) {
2938 path[len] = '/';
2939 path[len + 1] = '\0';
2940 } else if (flag == L_REMOVE_TRAIL_SLASH && lastchar == '/') {
2941 path[len - 1] = '\0';
2942 }
2943 return 0;
2944 }
2945
2946
2947 /*!
2948 * \brief l_makeTempFilename()
2949 *
2950 * \return fname : heap allocated filename; returns NULL on failure.
2951 *
2952 * <pre>
2953 * Notes:
2954 * (1) On unix, this makes a filename of the form
2955 * "/tmp/lept.XXXXXX",
2956 * where each X is a random character.
2957 * (2) On windows, this makes a filename of the form
2958 * "/<Temp>/lp.XXXXXX".
2959 * (3) On all systems, this fails if the file is not writable.
2960 * (4) Safest usage is to write to a subdirectory in debug code.
2961 * (5) The returned filename must be freed by the caller, using lept_free.
2962 * (6) The tail of the filename has a '.', so that cygwin interprets
2963 * the file as having an extension. Otherwise, cygwin assumes it
2964 * is an executable and appends ".exe" to the filename.
2965 * (7) On unix, whenever possible use tmpfile() instead. tmpfile()
2966 * hides the file name, returns a stream opened for write,
2967 * and deletes the temp file when the stream is closed.
2968 */
2969 char *
l_makeTempFilename()2970 l_makeTempFilename()
2971 {
2972 char dirname[240];
2973
2974 PROCNAME("l_makeTempFilename");
2975
2976 if (makeTempDirname(dirname, sizeof(dirname), NULL) == 1)
2977 return (char *)ERROR_PTR("failed to make dirname", procName, NULL);
2978
2979 #ifndef _WIN32
2980 {
2981 char *pattern;
2982 l_int32 fd;
2983 pattern = stringConcatNew(dirname, "/lept.XXXXXX", NULL);
2984 fd = mkstemp(pattern);
2985 if (fd == -1) {
2986 LEPT_FREE(pattern);
2987 return (char *)ERROR_PTR("mkstemp failed", procName, NULL);
2988 }
2989 close(fd);
2990 return pattern;
2991 }
2992 #else
2993 {
2994 char fname[MAX_PATH];
2995 FILE *fp;
2996 if (GetTempFileName(dirname, "lp.", 0, fname) == 0)
2997 return (char *)ERROR_PTR("GetTempFileName failed", procName, NULL);
2998 if ((fp = fopen(fname, "wb")) == NULL)
2999 return (char *)ERROR_PTR("file cannot be written to", procName, NULL);
3000 fclose(fp);
3001 return stringNew(fname);
3002 }
3003 #endif /* ~ _WIN32 */
3004 }
3005
3006
3007 /*!
3008 * \brief extractNumberFromFilename()
3009 *
3010 * \param[in] fname
3011 * \param[in] numpre number of characters before the digits to be found
3012 * \param[in] numpost number of characters after the digits to be found
3013 * \return num number embedded in the filename; -1 on error or if
3014 * not found
3015 *
3016 * <pre>
3017 * Notes:
3018 * (1) The number is to be found in the basename, which is the
3019 * filename without either the directory or the last extension.
3020 * (2) When a number is found, it is non-negative. If no number
3021 * is found, this returns -1, without an error message. The
3022 * caller needs to check.
3023 * </pre>
3024 */
3025 l_int32
extractNumberFromFilename(const char * fname,l_int32 numpre,l_int32 numpost)3026 extractNumberFromFilename(const char *fname,
3027 l_int32 numpre,
3028 l_int32 numpost)
3029 {
3030 char *tail, *basename;
3031 l_int32 len, nret, num;
3032
3033 PROCNAME("extractNumberFromFilename");
3034
3035 if (!fname)
3036 return ERROR_INT("fname not defined", procName, -1);
3037
3038 splitPathAtDirectory(fname, NULL, &tail);
3039 splitPathAtExtension(tail, &basename, NULL);
3040 LEPT_FREE(tail);
3041
3042 len = strlen(basename);
3043 if (numpre + numpost > len - 1) {
3044 LEPT_FREE(basename);
3045 return ERROR_INT("numpre + numpost too big", procName, -1);
3046 }
3047
3048 basename[len - numpost] = '\0';
3049 nret = sscanf(basename + numpre, "%d", &num);
3050 LEPT_FREE(basename);
3051
3052 if (nret == 1)
3053 return num;
3054 else
3055 return -1; /* not found */
3056 }
3057