1 /*++++
2 strfncs.c provides some string and array helpers
3 markus@mhoenicka.de 2-8-00
4 ++++++*/
5
6 /*
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
21 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
22
23
24 #include <stdio.h>
25 #include <string.h>
26 #include <stdlib.h>
27 #include <ctype.h>
28 #include <limits.h>
29 #include <unistd.h>
30 #include <netdb.h>
31 #include <netinet/in.h>
32 #include <arpa/inet.h>
33 #include <sys/socket.h>
34 #include <errno.h>
35
36 #include "strfncs.h"
37
38 /* Max integer that can be stored in a size_t variable */
39 #ifndef SIZE_T_MAX
40 #define SIZE_T_MAX UINT_MAX
41 #endif /* !SIZE_T_MAX */
42
43 /* forward declarations of local functions */
44 static int is_ip(char *address);
45
46
47 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
48 is_ip(): checks whether string contains a valid IP address
49 This function does not verify that the given IP address
50 exists or is accessible, but rather checks whether the
51 string is sane.
52
53 static int is_ip returns 0 if error, 1 if ok
54
55 char *address ptr to the string to check
56
57 returns 0 if invalid, 1 if valid
58
59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
is_ip(char * address)60 static int is_ip(char *address) {
61 char *token[4];
62 char ip_address[16];
63 int i, n_value;
64
65 /* refuse if string is too long */
66 if (strlen(address) > 15) {
67 return 0;
68 }
69
70 /* make a local copy as strtok modifies its argument */
71 strncpy(ip_address, address, 16);
72 ip_address[15] = '\0';
73
74 /* now look for four tokens separated by "." */
75 token[0] = strtok(ip_address, ".");
76 if (token[0] == NULL) {
77 return 0;
78 }
79
80 token[1] = strtok(NULL, ".");
81 if (token[1] == NULL) {
82 return 0;
83 }
84
85 token[2] = strtok(NULL, ".");
86 if (token[2] == NULL) {
87 return 0;
88 }
89
90 token[3] = strtok(NULL, "\r\n");
91 if (token[3] == NULL) {
92 return 0;
93 }
94
95 /* see whether the tokens are in the allowed numerical range */
96 for (i = 0; i < 4; i++) {
97 n_value = atoi(token[i]);
98 if (n_value < 0 || n_value > 255) {
99 return 0;
100 }
101 }
102 return 1;
103 }
104
105 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
106 check_ip(): validate an IP address/hostname
107 The fn first tests whether the input string looks like
108 a dotted quad. If not, the string is assumed to be a
109 hostname. It tries to resolve this host. If it has one
110 IP address, the input string will be replaced with a
111 string with a dotted quad representation of the IP
112 address. If the host has more than one IP address, no
113 substitution is performed.
114
115 int check_ip returns 0 if the string looks like a dotted quad or
116 if the string can be resolved as a hostname for exactly
117 one IP address. Returns -1 if the string is 'localhost'
118 which has a special meaning for some database servers.
119 Returns > 0 if there's a problem:
120 1: cannot be resolved as a hostname
121 2: does not appear to be an IP host
122 3: does not appear to have a valid IP address
123 4: has more than one network interface
124
125 char* server_ip ptr to a string with the IP-address or the putative
126 hostname. The size of the buffer must be at least 16
127 chars regardless of the length of the input string as
128 it will receive a dotted quad representation
129 under certain circumstances.
130
131 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
check_ip(char * server_ip)132 int check_ip (char* server_ip) {
133 char new_server_ip[16];
134 char** addrs;
135 int num_interfaces = 0;
136 struct hostent *hostinfo;
137
138 if (server_ip == NULL) {
139 return 1;
140 }
141
142 if (is_ip(server_ip)) {
143 return 0; /* looks like a dotted quad */
144 }
145
146 if (!strcmp(server_ip, "localhost")) {
147 return -1; /* special hostname string for some database servers */
148 }
149
150 /* try to use the string as a hostname */
151 hostinfo = gethostbyname(server_ip);
152
153 if (hostinfo == NULL) {
154 return 1;
155 }
156
157 if (hostinfo->h_addrtype != AF_INET) {
158 return 2;
159 }
160
161 addrs = hostinfo->h_addr_list;
162 while (*addrs && num_interfaces < 3) {
163 sprintf(new_server_ip, "%s", inet_ntoa(*(struct in_addr*)*addrs));
164 num_interfaces++;
165 addrs++;
166 }
167
168 if (!num_interfaces) {
169 return 3;
170 }
171 else if (num_interfaces == 1) {
172 strcpy(server_ip, new_server_ip);
173 return 0;
174 }
175 else {
176 return 4;
177 }
178 }
179
180 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
181 is_port(): checks whether string contains a valid port
182 This fn simply ensures that the port is outside the
183 range that the system uses, i.e. > 1024
184
185 int is_port
186
187 char *address ptr to the string to check
188
189 returns 0 if invalid, 1 if valid
190
191 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
is_port(char * port)192 int is_port(char *port) {
193 if (port == NULL) {
194 return 0;
195 }
196
197 if (atoi(port) < 1024) {
198 return 0;
199 }
200 else {
201 return 1;
202 }
203 }
204
205
206 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
207 is_number(): tests whether a string is a number
208
209 int is_number returns 1 if the string is a representation of a positive
210 integer; returns 0 if the string does not exist or is
211 empty or contains other characters than digits
212
213 char* string address of string to test
214
215 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
is_number(char * string)216 int is_number(char *string) {
217 char *curr;
218
219 if (!string || !*string) {
220 return 0;
221 }
222
223 curr = string;
224
225 while (*curr) {
226 if (!isdigit((int)(*curr))) {
227 return 0;
228 }
229 curr++;
230 }
231 return 1;
232 }
233
234 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
235 is_real_number(): tests whether a string is a number including
236 decimal fractions
237
238 int is_real_number returns 1 if the string is a representation of a positive
239 number; returns 0 if the string does not exist or is
240 empty or contains other characters than digits or a decimal
241 point or if there is more than one decimal point
242
243 char* string address of string to test
244
245 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
is_real_number(const char * string)246 int is_real_number(const char *string) {
247 char *curr;
248 int decpoint = 0;
249
250 if (!string || !*string) {
251 return 0;
252 }
253
254 curr = (char*)string;
255
256 while (*curr) {
257 if (*curr == '.') {
258 decpoint++;
259 }
260
261 if ((!isdigit((int)(*curr)) && *curr != '.') || decpoint > 1) {
262 return 0;
263 }
264 curr++;
265 }
266 return 1;
267 }
268
269 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
270 stripwhite(): strips whitespace from the start and end of STRING
271
272 char* stripwhite
273
274 char* string address of string to convert
275
276 int mode 0 = strips start and end, 1 = start only, 2 = end only
277
278 int type 0 = only space and tab 1 = space, tab, cr, lf
279
280 Returns ptr to the modified string
281
282 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
stripwhite(char * string,int mode,int type)283 char *stripwhite (char *string, int mode, int type) {
284 /* this function was modified from a readline sample program. The
285 original style is unnecessarily cryptic, but it works. */
286 register char *s, *t;
287
288 if (string == NULL) {
289 return NULL;
290 }
291
292 s = string;
293
294 if (type) {
295 if (mode != 2) {
296 for (; full_whitespace (*s); s++)
297 ;
298
299 if (*s == 0)
300 return (s);
301 }
302
303 if (mode != 1) {
304 t = s + strlen (s) - 1;
305 while (t > s && full_whitespace (*t))
306 t--;
307 *++t = '\0';
308 }
309 }
310 else {
311 if (mode != 2) {
312 for (; whitespace (*s); s++)
313 ;
314
315 if (*s == 0)
316 return (s);
317 }
318
319 if (mode != 1) {
320 t = s + strlen (s) - 1;
321 while (t > s && whitespace (*t))
322 t--;
323 *++t = '\0';
324 }
325 }
326 return s;
327 }
328
329 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
330 strup(): converts a string in place to uppercase
331
332 char* strup() returns a pointer to the modified string
333
334 char* string pointer to the string to be converted
335
336 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
strup(char * string)337 char* strup(char* string) {
338 char* chr;
339
340 if (string == NULL) {
341 return NULL;
342 }
343
344 chr = string; /* don't modify string, we need it as a return value */
345
346 /* loop until we find \0 */
347 while (*chr) {
348 *chr = (char)toupper((int)*chr); /* now convert */
349 chr++;
350 }
351 return string;
352 }
353
354 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
355 strdn(): converts a string in place to lowercase
356
357 char* strup() returns a pointer to the modified string
358
359 char* string pointer to the string to be converted
360
361 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
strdn(char * string)362 char* strdn(char* string) {
363 char* chr;
364
365 if (string == NULL) {
366 return NULL;
367 }
368
369 chr = string; /* don't modify string, we need it as a return value */
370
371 /* loop until we find \0 */
372 while (*chr) {
373 *chr = (char)tolower((int)*chr); /* now convert */
374 chr++;
375 }
376 return string;
377 }
378
379 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
380 stricap(): converts a string in place to lowercase, but capitalize
381 the first letter of each word
382
383 char* stricap() returns a pointer to the modified string
384
385 char* string pointer to the string to be converted
386
387 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
stricap(char * string)388 char* stricap(char* string) {
389 char* period;
390
391 strdn(string);
392 /* now uppercase the first character of each word unless it is something like "the", "a" etc. */
393
394 /* ToDo: handle the "the", "a" etc. cases */
395 period = string;
396 if (islower((int)(*period))) {
397 *period = (char)toupper((int)(*period));
398 }
399 period++;
400 while (*period != '\0') {
401 if (ispunct((int)(*period)) || isspace((int)(*period))) {
402 if (islower((int)(*(period+1)))) {
403 *(period+1) = (char)toupper((int)(*(period+1)));
404 }
405 }
406 period++;
407 }
408
409 return string;
410 }
411
412 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
413 compare_ptr(): compares two pointers to strings
414
415 int compare_ptr returns -1 if the first argument is smaller than the
416 second; 0 if both are equal; 1 if the first is larger
417
418 void* ptr_one the first char* to compare
419
420 void* ptr_two the second char* to compare
421
422 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
compare_ptr(const void * ptr_one,const void * ptr_two)423 int compare_ptr(const void* ptr_one, const void* ptr_two) {
424 char** firstptr;
425 char** secondptr;
426
427 firstptr = (char**)ptr_one;
428 secondptr = (char**)ptr_two;
429
430 /* handle cases where at least one of the ptrs is a NULL ptr */
431 if (*firstptr == NULL) {
432 if (*secondptr == NULL) {
433 return 0;
434 }
435 else {
436 return 1;
437 }
438 }
439 else if (*secondptr == NULL) {
440 return -1;
441 }
442
443 /* all ptrs are non-NULL now */
444 if (*firstptr < *secondptr) {
445 return -1;
446 }
447 else if (*firstptr == *secondptr) {
448 return 0;
449 }
450 else {
451 return 1;
452 }
453 }
454
455 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
456 mstrcat(): a modified version of strcat which operates on destination
457 strings obtained with malloc(); the length of the buffer
458 is dynamically increased if necessary.
459
460 char* mstrcat returns a pointer to the destination string or NULL
461 if the realloc failed. As the buffer holding the destination
462 string may have been reallocated, it is mandatory to use
463 *ONLY* this returned pointer after the function call and
464 *NEVER* the old pointer to destination
465
466 char* destination the buffer obtained with malloc() to which the
467 source string will be appended. The calling function is
468 responsible to free this buffer after use
469
470 char* source the string that will be appended to destination
471
472 size_t* ptr_dest_len points to a variable that contains the current size
473 of the buffer that destination points to. Will be modified if
474 a realloc() is necessary to increase the buffer size
475
476 size_t offset a positive value n will concatenate the string at the
477 position omega-n of the destination string. Set offset to zero
478 to get the standard strcat behaviour.
479
480 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
mstrcat(char * destination,char * source,size_t * ptr_dest_len,size_t offset)481 char* mstrcat(char* destination, char* source, size_t* ptr_dest_len, size_t offset) {
482 char *new_dest;
483 size_t len;
484 size_t source_len;
485 size_t destination_len;
486
487 /* take the short way out if destination doesn't exist */
488 if (destination == NULL || source == NULL) {
489 return NULL;
490 }
491
492 source_len = strlen(source);
493 destination_len = strlen(destination);
494
495 /* don't allow offsets longer than the destination length */
496 offset = (offset > destination_len) ? destination_len : offset;
497
498 /* we need the sum of both string lengths plus one for the \0 minus
499 the offset if there is one */
500 len = destination_len + source_len + 1 - offset;
501
502 /* reallocate buffer if it is too small */
503 if (len > *ptr_dest_len) {
504 len = (len - *ptr_dest_len < realloc_chunk) ? *ptr_dest_len + realloc_chunk : len;
505 if ((new_dest = (char*)realloc(destination, len)) == NULL) {
506 return NULL;
507 }
508 else {
509 destination = new_dest;
510 }
511 /* adjust the length variable */
512 *ptr_dest_len = len;
513 }
514
515 if (!offset) { /* this is the standard strcat behaviour */
516 strcat(destination, source);
517 }
518 else { /* this will append the string at position omega minus offset */
519 strcpy(&destination[strlen(destination)-offset], source);
520 }
521
522 /* return the new pointer to the buffer */
523 return destination;
524 }
525
526 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
527 mstrcpy(): a modified version of strcpy which operates on destination
528 strings obtained with malloc(); the length of the buffer
529 is dynamically increased if necessary.
530
531 char* mstrcpy returns a pointer to the destination string or NULL
532 if the realloc failed. As the buffer holding the destination
533 string may have been reallocated, it is mandatory to use
534 *ONLY* this returned pointer after the function call and
535 *NEVER* the old pointer to destination
536
537 char* destination the buffer obtained with malloc() to which the
538 source string will be copied
539
540 char* source the string that will be copied to destination
541
542 size_t* ptr_dest_len points to a variable that contains the current size
543 of the buffer that destination points to. Will be modified if
544 a realloc() is necessary to increase the buffer size
545
546 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
mstrcpy(char * destination,char * source,size_t * ptr_dest_len)547 char* mstrcpy(char* destination, char* source, size_t* ptr_dest_len) {
548 char *new_dest;
549 size_t len;
550
551 /* take the short way out if destination doesn't exist */
552 if (destination == NULL) {
553 return NULL;
554 }
555
556 /* we need the length of the string plus one for the \0 */
557 len = strlen(source)+1;
558
559 /* reallocate the buffer if it is too small */
560 if (*ptr_dest_len < len) {
561 len = (len - *ptr_dest_len < realloc_chunk) ? *ptr_dest_len + realloc_chunk : len;
562 if ((new_dest = (char*)realloc(destination, len)) == NULL) {
563 return NULL;
564 }
565 else {
566 destination = new_dest;
567 }
568 /* adjust the length variable */
569 *ptr_dest_len = len;
570 }
571
572 /* now copy the string*/
573 strcpy(destination, source);
574
575 /* return the new pointer to the buffer */
576 return destination;
577 }
578
579 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
580 mstrncpy(): a modified version of strncpy which operates on destination
581 strings obtained with malloc(); the length of the buffer
582 is dynamically increased if necessary.
583
584 char* mstrncpy returns a pointer to the destination string or NULL
585 if the realloc failed. As the buffer holding the destination
586 string may have been reallocated, it is mandatory to use
587 *ONLY* this returned pointer after the function call and
588 *NEVER* the old pointer to destination
589
590 char* destination the buffer obtained with malloc() to which the
591 source string will be copied
592
593 char* source the string that will be copied to destination
594
595 size_t n the number of characters to copy
596
597 size_t* ptr_dest_len points to a variable that contains the current size
598 of the buffer that destination points to. Will be modified if
599 a realloc() is necessary to increase the buffer size
600
601 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
mstrncpy(char * destination,char * source,size_t n,size_t * ptr_dest_len)602 char* mstrncpy(char* destination, char* source, size_t n, size_t* ptr_dest_len) {
603 char *new_dest;
604 size_t len;
605
606 /* take the short way out if destination doesn't exist */
607 if (destination == NULL || source == NULL) {
608 return NULL;
609 }
610
611 /* we need the length of the string plus one for the \0 */
612 len = n+1;
613
614 /* reallocate the buffer if it is too small */
615 if (*ptr_dest_len < len) {
616 len = (len - *ptr_dest_len < realloc_chunk) ? *ptr_dest_len + realloc_chunk : len;
617 if ((new_dest = (char*)realloc(destination, len)) == NULL) {
618 return NULL;
619 }
620 else {
621 destination = new_dest;
622 }
623 /* adjust the length variable */
624 *ptr_dest_len = len;
625 }
626
627 /* now copy the string*/
628 strncpy(destination, source, n);
629
630 /* return the new pointer to the buffer */
631 return destination;
632 }
633
634 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
635 mstrdup() creates a copy of a string in a malloc()'ed buffer
636
637 char* mstrdup returns a pointer to the copy of the string. Returns
638 NULL in the case of a failure (out of memory)
639
640 char* buffer pointer to the string to be copied
641
642 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
mstrdup(char * buffer)643 char* mstrdup(char *buffer) {
644 char *new_buffer;
645
646 if ((new_buffer = malloc(strlen(buffer)+1)) == NULL) {
647 return NULL;
648 }
649 else {
650 strcpy(new_buffer, buffer);
651 return new_buffer;
652 }
653 }
654
655 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
656 free_all() frees memory, taking the adresses out of a
657 NULL-terminated array
658
659 void free_all
660
661 void*** ptr_mem pointer to an array of pointers which point to
662 pointer variables which hold valid addresses of memory
663 allocated with malloc/calloc/realloc or NULL. This
664 somewhat complicated arrangement allows to reallocate
665 the memory without changing the values in the array.
666 If memory is freed before free_all() is called, the
667 value of the pointer to the freed memory must be set
668 to NULL, which will not harm free().
669
670 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
free_all(void *** ptr_mem)671 void free_all(void*** ptr_mem) {
672 int i = 0;
673
674 while (ptr_mem[i] != NULL) {
675 /* printf("free %d: %d\n", i, *(ptr_mem[i])); */
676 free(*(ptr_mem[i++]));
677 }
678 }
679
680 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
681 count_the_flowers() counts the number of occurrences of letter in
682 buffer. letter may be a multibyte character
683
684 static size_t count_the_flowers returns the number of occurrences of letter
685 in buffer. It will return zero if letter is not
686 in buffer.
687
688 const char* buffer pointer to the string to scan
689
690 const char* letter ptr to the letter to locate
691
692 size_t len the number of bytes that represent letter
693
694 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
count_the_flowers(const char * buffer,const char * letter,size_t len)695 size_t count_the_flowers(const char *buffer, const char *letter, size_t len) {
696 size_t numletters = 0;
697 size_t i;
698 size_t buflen;
699
700 buflen = strlen(buffer);
701
702 if (buflen >= len) {
703 buflen = buflen-len+1;
704
705 for (i = 0; i < buflen; i++) {
706 if (!memcmp(buffer+i, letter, len)) {
707 numletters++;
708 }
709 }
710 return numletters;
711 }
712 else {
713 /* string too short to hold the flower */
714 return 0;
715 }
716 }
717
718 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
719 replace_char_string() replaces the letter that buffer points to
720 with an insert of arbitrary size. The calling function
721 is responsible to allocate enough space for buffer
722 so the insertion can be safely executed. The letter may
723 be a multibyte character.
724
725 void replace_char_string has no return value
726
727 char* buffer pointer to the null-terminated string that will be extended
728
729 char* insert pointer to the string that will be inserted
730
731 size_t len length of the letter to replace in bytes
732
733 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
replace_char_string(char * buffer,char * insert,size_t len)734 void replace_char_string(char *buffer, char *insert, size_t len) {
735 size_t insert_len;
736
737 insert_len = strlen(insert);
738
739 memmove(buffer+insert_len, buffer+len, strlen(buffer+len)+1);
740 memcpy(buffer, insert, insert_len);
741 }
742
743
744 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
745 remove_substring() removes the substring that buffer points to from
746 a string. If the requested substring length is longer
747 than the remainder of the string, the string will
748 be terminated where buffer points to
749
750 void remove_substring has no return value
751
752 char* buffer pointer to the substring to be removed
753
754 size_t sublen number of characters to remove
755
756 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
remove_substring(char * buffer,size_t sublen)757 void remove_substring(char *buffer, size_t sublen) {
758 if (strlen(buffer) >= sublen) {
759 /* move one extra char to include the trailing \0 */
760 memmove(buffer, buffer+sublen, strlen(buffer+sublen)+1);
761
762 }
763 else {
764 *buffer = '\0';
765 }
766 }
767
768 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
769 sgml_entitize() replaces special characters with entities. The
770 replacement is done according to an array of
771 char/string pairs that has to be provided by the
772 calling function.
773 One source for entities and their Unicode equivalents:
774 http://www.w3.org/TR/2000/WD-MathML2-20000211/bycodes.html
775
776 char* sgml_entitize returns a pointer to the converted string. This
777 may be different from the original value of the
778 passed pointer, so after calling this function only
779 the returned value should be used. The function also
780 modifies the value of the passed pointer to buffer.
781 If the function is successful, the return value and
782 the current value of the passed pointer will be
783 identical. The return value is NULL if an error
784 occurred. In this case, the current value of the
785 passed pointer can still be used to access the
786 converted string as it was before the error.
787 The buffer will be reallocated if needed, and the
788 allocated length equals the length of the resulting
789 string.
790
791 char** buffer pointer to a pointer to the string that will be
792 entitized
793
794 struct charent* ptr_myents pointer to an array of character/string pairs
795 which will be used for the replacements. The last
796 element of the array must have a '0' (zero) as the
797 character value as this is taken as a termination
798 signal. You can also pass NULL here, then this fn
799 will use a SGML default table.
800
801 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
sgml_entitize(char ** buffer,struct charent * ptr_myents)802 char* sgml_entitize(char** buffer, struct charent* ptr_myents) {
803 int numents = 0;
804 int i = 0;
805 char *new_buffer;
806 char *token, *the_end;
807
808 struct charent* the_ents;
809 struct charent default_ents[7] = { /* default lookup table for entity replacements */
810 {"&", 1, "&"},
811 {"<", 1, "<"},
812 {">", 1, ">"},
813 {{226, 128, 148, 0}, 3, "—"}, /* 0x2014 */
814 {{226, 128, 152, 0}, 3, "‘"}, /* 0x2018 */
815 {{226, 128, 153, 0}, 3, "’"}, /* 0x2019 */
816 {"", 0, ""}
817 };
818
819 if (!ptr_myents) {
820 the_ents = default_ents;
821 }
822 else {
823 the_ents = ptr_myents;
824 }
825
826 while (the_ents[i].len != 0) {
827
828 numents = count_the_flowers(*buffer, the_ents[i].letter, the_ents[i].len);
829 /* printf("numents for %s went to %d\n", the_ents[i].letter, numents); */
830
831 if (numents > 0) {
832 if ((new_buffer = realloc(*buffer, strlen(*buffer) + ((strlen(the_ents[i].entity))*(numents+1)))) == NULL) {
833 return NULL;
834 }
835 else {
836 *buffer = new_buffer;
837 }
838
839 token = strstr(*buffer, the_ents[i].letter);
840 the_end = &((*buffer)[strlen(*buffer)-1]);
841
842 while (token != NULL) {
843 char* next_amp;
844 char* next_sc;
845 char* next_sp;
846
847 /* printf("token went to:%d<<letter went to %s\n", (int)*token, the_ents[i].letter); */
848 /* replace ampersand only if it does not start an entity */
849 /* get pointers to the next ampersand and semicolon, if any,
850 and see which one is closer */
851 next_amp = strchr(token+1, (int)'&');
852 next_sc = strchr(token+1, (int)';');
853 next_sp = strchr(token+1, (int)' ');
854
855 if (*(the_ents[i].letter) != '&' || compare_ptr(&next_sc, &next_amp) != -1 || compare_ptr(&next_sp, &next_sc) != 1) {
856 replace_char_string(token, the_ents[i].entity, the_ents[i].len);
857 /* adjust the end using the number of chars added */
858 the_end += strlen(the_ents[i].entity)-the_ents[i].len;
859 }
860 token = (token + the_ents[i].len > the_end) ? NULL:strstr(token+the_ents[i].len, the_ents[i].letter);
861 }
862 }
863 i++;
864 }
865 return *buffer;
866 }
867
868 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
869 canonicalize_path(): converts a path to a canonical absolute path,
870 resolving all symbolic links and eliminating
871 path components like "/./" or "/../" or "~"
872
873 char* canonicalize_path returns the canonical path in a malloc()'ed
874 buffer. The calling function is responsible for
875 freeing this buffer after use. Returns NULL if
876 an error occurs.
877
878 char* the_path ptr to a string with the path to convert
879
880 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
canonicalize_path(char * the_path)881 char *canonicalize_path(char *the_path) {
882 /* this fn was stolen and modified from cpbk */
883 char *temp;
884 char *myhome;
885 char *homepath;
886 char *resolved_path;
887 int numtilde;
888
889 if ((temp = malloc(PATH_MAX)) == NULL) {
890 return NULL;
891 }
892
893 if ((homepath = malloc(PATH_MAX)) == NULL) {
894 free(temp);
895 return NULL;
896 }
897
898 /* refuse if path is too long */
899 if (strlen(the_path) > PATH_MAX) {
900 free(temp);
901 free(homepath);
902 return NULL;
903 }
904
905 /* check whether the string abbreviates the home directory. If so,
906 expand with the value of HOME. This way we can call this function
907 with paths that did not go through shell expansion */
908 if ((numtilde = count_the_flowers(the_path, "~", 1)) > 1) {
909 free(homepath);
910 free(temp);
911 return NULL;
912 }
913 else if (numtilde) {
914 myhome = getenv("HOME");
915 strcpy(homepath, the_path);
916 replace_char_string(strchr(homepath, (int)'~'), myhome, 1);
917 }
918 else {
919 strcpy(homepath, the_path);
920 }
921
922 resolved_path = realpath(homepath, temp);
923
924 if (resolved_path == NULL) { /* some or all parts of the path are invalid */
925 if (errno == ENOENT) {
926 /* file does not exist yet which may be ok if a file is to be created */
927 strcpy(temp, homepath);
928 }
929 else {
930 free(homepath);
931 free(temp);
932 return NULL;
933 }
934 }
935
936 free(homepath);
937
938 return temp;
939 }
940
941 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
942 strip_quote(): strips leading and trailing quotes
943
944 char* strip_quote returns a ptr to the string without quotes or NULL
945 if the_string is NULL
946
947 char* the_string ptr to a string to strip
948
949 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
strip_quote(char * the_string)950 char *strip_quote(char *the_string) {
951 char* ptr_c;
952 char* stripped_string;
953
954 if (!the_string) {
955 return NULL;
956 }
957
958 ptr_c = the_string;
959
960 while (*ptr_c != '\0' && (*ptr_c == '\''
961 || *ptr_c == '\"'
962 || *ptr_c == '`')) {
963 ptr_c++;
964 }
965
966 stripped_string = ptr_c;
967
968 ptr_c = &stripped_string[strlen(stripped_string)-1];
969
970 while (ptr_c >= the_string && (*ptr_c == '\''
971 || *ptr_c == '\"'
972 || *ptr_c == '`')) {
973 *ptr_c = '\0';
974 ptr_c--;
975 }
976
977
978 return stripped_string;
979 }
980
981 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
982 truncate_quoted_string(): truncate a quoted string to a defined length
983 the input string must be quoted with either
984 single or double quotes, with no leading or
985 trailing whitespace..
986
987 char* strip_quote returns a ptr to the string without quotes or NULL
988 if the_string is NULL
989
990 char* the_string ptr to a string to strip
991
992 size_t len the maximum length of the string, sans the trailing '\0'
993
994 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
truncate_quoted_string(char * the_string,size_t len)995 char *truncate_quoted_string(char *the_string, size_t len) {
996 char quote_char;
997
998 if (!the_string
999 || strlen(the_string) < len
1000 || len < 3) {
1001 /* nothing to do */
1002 return the_string;
1003 }
1004
1005 quote_char = *the_string;
1006
1007 if (quote_char != '\'' && quote_char != '\"') {
1008 /* string does not appear to be quoted properly */
1009 return NULL;
1010 }
1011
1012 /* make sure we don't accidentally escape the trailing quote */
1013 if (the_string[len-2] == '\\'
1014 || the_string[len-2] == quote_char) {
1015 the_string[len-1] = ' ';
1016 }
1017
1018 the_string[len-1] = quote_char;
1019 the_string[len] = '\0';
1020
1021 return the_string;
1022 }
1023
1024 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1025 escape_chars(): backslash-escapes characters in a string
1026
1027 size_t escape_chars returns the length of the escaped string
1028
1029 char *dest pointer to a buffer that will receive the escaped string
1030 must hold at least twice the size of orig
1031
1032 char *orig pointer to the buffer with the string to be escaped
1033
1034 size_t orig_size length of original string
1035
1036 const char *toescape array of characters that must be escaped
1037
1038 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
escape_chars(char * dest,const char * orig,size_t orig_size,const char * toescape)1039 size_t escape_chars(char *dest, const char *orig, size_t orig_size, const char *toescape) {
1040 char *curdest = dest;
1041 const char *curorig = orig;
1042 const char *curescaped;
1043 size_t len = 0;
1044
1045 while (curorig && curorig < orig+orig_size) {
1046 curescaped = toescape;
1047 while (curescaped && *curescaped) {
1048 if (*curorig == *curescaped) {
1049 *curdest = '\\';
1050 curdest++;
1051 len++;
1052 break;
1053 }
1054 curescaped++;
1055 }
1056 /* Copy char to destination */
1057 *curdest = *curorig;
1058
1059 curorig++;
1060 curdest++;
1061 len++;
1062 }
1063
1064 /* append a NULL byte. This is required if orig was a
1065 zero-terminated string. It does not hurt if orig was a
1066 binary string as the calling function is not supposed to
1067 read past len bytes */
1068 *curdest = '\0';
1069 return len;
1070 }
1071
1072 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1073 unescape_chars(): unescapes a backslash-escaped string
1074
1075 size_t unescape_chars returns the length of the unescaped string
1076
1077 char *dest pointer to a buffer that will receive the unescaped string
1078 must hold at least the size of orig
1079
1080 char *orig pointer to the buffer with the string to be unescaped
1081
1082 size_t orig_size length of original string
1083
1084 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
unescape_chars(char * dest,const char * orig,size_t orig_size)1085 size_t unescape_chars(char *dest, const char *orig, size_t orig_size) {
1086 char *curdest = dest;
1087 const char *curorig = orig;
1088 size_t len = 0;
1089
1090 while (curorig && curorig < orig+orig_size) {
1091 if (*curorig == '\\') {
1092 curorig++;
1093 }
1094 /* Copy char to destination */
1095 *curdest = *curorig;
1096
1097 curorig++;
1098 curdest++;
1099 len++;
1100 }
1101
1102 /* append a NULL byte. This is required if orig was a
1103 zero-terminated string. It does not hurt if orig was a
1104 binary string as the calling function is not supposed to
1105 read past len bytes */
1106 *curdest = '\0';
1107 return len;
1108 }
1109
1110 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1111 escape_latex_chars(): backslash-escapes characters in a string
1112 this function is a wrapper for escape_chars() that
1113 escapes all LaTeX command characters
1114
1115 size_t escape_chars returns the length of the escaped string
1116
1117 char *dest pointer to a buffer that will receive the escaped string
1118 must hold at least twice the size of orig
1119
1120 char *orig pointer to the buffer with the string to be escaped
1121
1122 size_t orig_size length of original string
1123
1124 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
escape_latex_chars(char * dest,const char * orig,size_t orig_size)1125 size_t escape_latex_chars(char *dest, const char *orig, size_t orig_size) {
1126 return escape_chars(dest, orig, orig_size, "#$%&~_^\\{}");
1127 }
1128
1129 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1130 escape_latex_chars_copy(): backslash-escapes characters in a string
1131 this function is a wrapper for escape_latex_chars()
1132 it returns an allocated escaped string
1133
1134 char* escape_chars returns the escaped string
1135
1136 char *orig pointer to the buffer with the string to be escaped
1137
1138 size_t orig_size length of original string
1139
1140 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
escape_latex_chars_copy(const char * orig,size_t orig_size)1141 char* escape_latex_chars_copy(const char *orig, size_t orig_size) {
1142 char* dest;
1143
1144 if ((dest = malloc(2*orig_size)) == NULL) {
1145 return NULL;
1146 }
1147 escape_latex_chars(dest, orig, orig_size);
1148 return dest;
1149 }
1150
1151 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1152 increment_suffix(): calling this function repeatedly generates a
1153 sequence of suffices. The sequence
1154 starts with a,b...z, then aa,ba..za...zz until
1155 the length of the string reaches max_depth where
1156 the fn will return an error.
1157 this function is recursive
1158
1159 int increment_suffix returns 0 if ok or 1 if max_depth is
1160 exceeded
1161
1162 char* suffix ptr to buffer to receive suffix. Must hold at least
1163 max_depth characters plus a terminating \0. In most
1164 cases you want the buffer to be an empty string when
1165 you first call this function to start with "a" or "A"
1166
1167 int max_depth maximum depth of recursive nesting, determines
1168 maximum length of resulting suffix
1169
1170 int upper if 1, the suffix will use uppercase characters, if 0 it'll
1171 be lowercase
1172
1173 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
increment_suffix(char * suffix,int max_depth,int upper)1174 int increment_suffix(char* suffix, int max_depth, int upper) {
1175 char start[2] = "a";
1176 char stopchar = 'z';
1177
1178 if (upper) {
1179 *start = 'A';
1180 stopchar = 'Z';
1181 }
1182
1183 if (!*suffix) { /* still empty */
1184 strcpy(suffix, start);
1185 }
1186 else {
1187 (*suffix)++;
1188 if (*suffix > stopchar) {
1189 if (max_depth > 1) {
1190 *suffix = *start;
1191 /* recursion increments the next character to the right */
1192 if (increment_suffix(suffix+1, max_depth-1, upper)) {
1193 return 1;
1194 }
1195 }
1196 else {
1197 return 1;
1198 }
1199 }
1200 }
1201 return 0;
1202 }
1203
1204 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1205 parse_versioninfo(): parses a version info string (e.g. 3.23.11) and
1206 extracts the major, minor, and minuscule numbers
1207 the results are filled into a structure as numbers
1208
1209 int parse_versioninfo returns 0 if ok or 1 if error
1210
1211 const char* version ptr to buffer containing the version info string.
1212 the string may have up to two periods to separate
1213 the major, minor, and minuscule numbers.
1214
1215 struct VERSIONINFO* ver ptr to structure that receives the numeric
1216 version info
1217
1218 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
parse_versioninfo(const char * version,struct VERSIONINFO * ver)1219 int parse_versioninfo(const char* version, struct VERSIONINFO* ver) {
1220 char* start;
1221 char* dot;
1222 char* my_version;
1223
1224 /* initialize structure */
1225 ver->major = 0;
1226 ver->minor = 0;
1227 ver->minuscule = 0;
1228
1229 if (!version || !*version) {
1230 /* don't treat this as error. The version structure will contain
1231 all zeros */
1232 return 0;
1233 }
1234
1235 /* get a copy of version to mangle */
1236 if ((my_version = strdup(version)) == NULL) {
1237 return 1;
1238 }
1239
1240 start = my_version;
1241 if ((dot = strchr(start, (int)'.')) == NULL) {
1242 ver->major = atoi(start);
1243 free(my_version);
1244 return 0;
1245 }
1246
1247 *dot = '\0';
1248 ver->major = atoi(start);
1249
1250 if (!(dot+1)) {
1251 free(my_version);
1252 return 0;
1253 }
1254
1255 start = dot+1;
1256
1257 if ((dot = strchr(start, (int)'.')) == NULL) {
1258 ver->minor = atoi(start);
1259 free(my_version);
1260 return 0;
1261 }
1262
1263 *dot = '\0';
1264 ver->minor = atoi(start);
1265
1266 if (!(dot+1)) {
1267 free(my_version);
1268 return 0;
1269 }
1270
1271 start = dot+1;
1272
1273 ver->minuscule = atoi(start);
1274
1275 free(my_version);
1276 return 0;
1277 }
1278
1279
1280 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1281 min_token_length(): calculates the size of the smallest token in the
1282 string. The string is tokenized using strtok()
1283 according to the passed separators
1284
1285 size_t min_token_length returns the size of the smallest token, or 0 if
1286 an error occurred
1287
1288 const char* string ptr to the buffer to check
1289
1290 const char* sep ptr to the string containing the separator characters
1291
1292 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
min_token_length(const char * string,const char * sep)1293 size_t min_token_length(const char* string, const char* sep) {
1294 char* my_string;
1295 char* start;
1296 size_t min_length = SIZE_T_MAX;
1297 size_t curr_length;
1298
1299 if (!string
1300 || !sep
1301 || (my_string = strdup(string)) == NULL) {
1302 return 0;
1303 }
1304
1305 start = my_string;
1306
1307 for (start = strtok(my_string, sep); start; start = strtok(NULL, sep)) {
1308 if ((curr_length = strlen(start)) < min_length) {
1309 min_length = curr_length;
1310 }
1311 }
1312
1313 free(my_string);
1314
1315 return min_length;
1316 }
1317
1318 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1319 min_token_length(): calculates the size of the smallest token in the
1320 string. The string is tokenized using strtok()
1321 according to the passed separators
1322
1323 size_t min_token_length returns the size of the smallest token, or 0 if
1324 an error occurred
1325
1326 const char* string ptr to the buffer to check
1327
1328 const char* sep ptr to the string containing the separator characters
1329
1330 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
replace_regexp_chars(char * string,char * regexp_chars,char replacement)1331 int replace_regexp_chars(char* string, char* regexp_chars, char replacement) {
1332 char* curr;
1333 char* regexp_char;
1334 int num_replaced = 0;
1335
1336 curr = string;
1337
1338 while (*curr) {
1339 regexp_char = regexp_chars;
1340
1341 while (*regexp_char) {
1342 if (*curr == *regexp_char) {
1343 *curr = replacement;
1344 num_replaced++;
1345 }
1346 regexp_char++;
1347 }
1348 curr++;
1349 }
1350
1351 return num_replaced;
1352 }
1353