1 /*
2  * Copyright (c) 2013 Genome Research Ltd.
3  * Author(s): James Bonfield
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *    1. Redistributions of source code must retain the above copyright notice,
9  *       this list of conditions and the following disclaimer.
10  *
11  *    2. Redistributions in binary form must reproduce the above
12  *       copyright notice, this list of conditions and the following
13  *       disclaimer in the documentation and/or other materials provided
14  *       with the distribution.
15  *
16  *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
17  *    Institute nor the names of its contributors may be used to endorse
18  *    or promote products derived from this software without specific
19  *    prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
22  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
25  * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Author(s): James Bonfield
36  *
37  * Copyright (c) 2003 MEDICAL RESEARCH COUNCIL
38  * All rights reserved
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions are met:
42  *
43  *    1 Redistributions of source code must retain the above copyright notice,
44  *      this list of conditions and the following disclaimer.
45  *
46  *    2 Redistributions in binary form must reproduce the above copyright
47  *      notice, this list of conditions and the following disclaimer in
48  *      the documentation and/or other materials provided with the
49  *      distribution.
50  *
51  *    3 Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF
52  *      MOLECULAR BIOLOGY nor the names of its contributors may be used
53  *      to endorse or promote products derived from this software without
54  *      specific prior written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
57  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
60  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66  * POSSIBILITY OF SUCH DAMAGE.
67  */
68 
69 #ifdef HAVE_CONFIG_H
70 #include "io_lib_config.h"
71 #endif
72 
73 #include <stdlib.h>
74 #include <string.h>
75 #include <io_lib/dstring.h>
76 #include <math.h>
77 #include <stdarg.h>
78 #include <stdio.h>
79 #include <ctype.h>
80 #include <errno.h>
81 
82 #include "io_lib/dstring.h"
83 #include "io_lib/vlen.h"
84 
85 /*
86  * Allocates a new dstring, initialising it to a default str (or NULL).
87  *
88  * Returns dstring_t pointer on success.
89  *         NULL on failure.
90  */
dstring_create(const char * str)91 dstring_t *dstring_create(const char *str) {
92     dstring_t *ds = (dstring_t *)malloc(sizeof(*ds));
93     if (!ds)
94 	return NULL;
95 
96     ds->str = NULL;
97     ds->allocated = 0;
98     ds->length = 0;
99 
100     if (str) {
101 	if (dstring_insert(ds, 0, str) == -1) {
102 	    dstring_destroy(ds);
103 	    return NULL;
104 	}
105     }
106 
107     return ds;
108 }
109 
110 /*
111  * As per dstring_create(), but using str,len as the internal data.
112  * Ie the caller is giving this data to the dstring object. str should
113  * be a malloced pointer.
114  *
115  * Returns dstring_t pointer on success.
116  *         NULL on failure.
117  */
dstring_create_with(char * str,size_t len)118 dstring_t *dstring_create_with(char *str, size_t len) {
119     dstring_t *ds = (dstring_t *)malloc(sizeof(*ds));
120     if (!ds)
121 	return NULL;
122 
123     ds->str = str;
124     ds->allocated = ds->length = len;
125 
126     return ds;
127 }
128 
129 /* Deallocates a dstring */
dstring_destroy(dstring_t * ds)130 void dstring_destroy(dstring_t *ds) {
131     if (ds) {
132 	if (ds->str)
133 	    free(ds->str);
134 	free(ds);
135     }
136 }
137 
138 /*
139  * Returns a C string from a dstring. If the dstring is empty this may be
140  * NULL.
141  */
dstring_str(const dstring_t * ds)142 char *dstring_str(const dstring_t *ds) {
143     return ds->str;
144 }
145 
146 /*
147  * Empties a dstring without freeing the contents. Ie sets it to contain
148  * a blank string.
149  */
dstring_empty(dstring_t * ds)150 void dstring_empty(dstring_t *ds) {
151     ds->length = 0;
152     if (ds->str)
153 	*ds->str = 0;
154 }
155 
156 /*
157  * Force the memory allocated for a dstring to be at least length characters
158  * long. (The allocated length will include 1 more to allow for the nul
159  * termination.)
160  * It's possible to shrink a string too, although shrinking a string will not
161  * guarantee if remains nul terminated.
162  *
163  * Returns 0 for success
164  *        -1 for failure
165  */
dstring_resize(dstring_t * ds,size_t length)166 int dstring_resize(dstring_t *ds, size_t length) {
167     char *str;
168     size_t length2;
169 
170     if (length+1 <= ds->allocated)
171 	return 0;
172 
173     /*
174      * Allocate with additional overhead so as to reduce calling this
175      * to often. Increase to next power of 2 minus a little bit to
176      * allow for malloc overheads.
177      *
178      * If we are doing a very significant jump here, don't resize
179      * much as it's likely we already know the size rather than growing
180      * it line by line.
181      */
182     if (length > 4*ds->length && length > 4096) {
183 	length2 = length+1023;
184     } else {
185 	length2 = pow(2, ceil(log(length+1)/log(2)));
186 	if (length2 > 4096 && length2-32 > length) length2 -= 32;
187     }
188     /* length++;*/
189     str = realloc(ds->str, length2);
190     if (!str)
191 	return -1;
192     else {
193 	ds->allocated = length2;
194 	/* If this is first alloc, make sure we null terminate */
195 	if (!ds->str) {
196 	    str[0] = 0;
197 	}
198 	ds->str = str;
199     }
200 
201     return 0;
202 }
203 
204 /*
205  * Uses vsprintf style formatting to produce a string which is then inserted
206  * at a specific offset.
207  *
208  * Returns 0 for success
209  *        -1 for failure
210  */
dstring_vinsertf(dstring_t * ds,size_t offset,const char * fmt,va_list args)211 int dstring_vinsertf(dstring_t *ds,
212 		     size_t offset,
213 		     const char *fmt,
214 		     va_list args) {
215     size_t est_length;
216     char buf[8192], *bufp = buf;
217 
218     /*
219      * Work out the expanded length, if it's small enough use a temporary
220      * buffer to store the sprintf output.
221      */
222     est_length = vflen((char *)fmt, args);
223 
224     if (est_length+1 > 8192) {
225 	if (NULL == (bufp = (char *)malloc(est_length+1))) {
226 	    goto error;
227 	}
228     }
229 
230     /* Produce the C string, and add it to the dstring */
231     vsprintf(bufp, fmt, args);
232 
233     if (-1 == dstring_insert(ds, offset, bufp))
234 	goto error;
235 
236     if (bufp != buf)
237 	free(bufp);
238 
239     va_end(args);
240 
241     return 0;
242 
243  error:
244     if (bufp && bufp != buf)
245 	free(bufp);
246 
247     va_end(args);
248 
249     return -1;
250 }
251 
252 /*
253  * Uses sprintf style formatting to produce a string which is then inserted
254  * at a specific offset.
255  *
256  * Returns 0 for success
257  *        -1 for failure
258  */
259 __PRINTF_FORMAT__(3,4)
dstring_insertf(dstring_t * ds,size_t offset,const char * fmt,...)260 int dstring_insertf(dstring_t *ds,
261 		    size_t offset,
262 		    const char *fmt,
263 		    ...) {
264     va_list args;
265     va_start(args, fmt);
266 
267     return dstring_vinsertf(ds, offset, fmt, args);
268 }
269 
270 /*
271  * Inserts a string of a specified length to a dstring at a given offset.
272  *
273  * Returns 0 for success
274  *        -1 for failure
275  */
dstring_ninsert(dstring_t * ds,size_t offset,const char * str,size_t len)276 int dstring_ninsert(dstring_t *ds,
277 		    size_t offset,
278 		    const char *str,
279 		    size_t len) {
280     if (0 != DSTRING_RESIZE(ds, ds->length + len))
281 	return -1;
282 
283     memmove(&ds->str[offset+len], &ds->str[offset], ds->length + 1 - offset);
284     memmove(&ds->str[offset], str, len);
285 
286     ds->length += len;
287 
288     return 0;
289 }
290 
291 /*
292  * Inserts a C string into a dstring at a given offset.
293  *
294  * Returns 0 for success
295  *        -1 for failure
296  */
dstring_insert(dstring_t * ds,size_t offset,const char * str)297 int dstring_insert(dstring_t *ds, size_t offset, const char *str) {
298     return dstring_ninsert(ds, offset, str, strlen(str));
299 }
300 
301 /*
302  * Adds C string to the start.
303  * Equivalent a dstring_insert at position zero.
304  *
305  * Returns 0 for success
306  *        -1 for failure
307  */
dstring_prepend(dstring_t * ds,const char * str)308 int dstring_prepend(dstring_t *ds, const char *str) {
309     return dstring_insert(ds, 0, str);
310 }
311 
312 /*
313  * Adds string of a specified length to the start.
314  *
315  * Returns 0 for success
316  *        -1 for failure
317  */
dstring_nprepend(dstring_t * ds,const char * str,size_t len)318 int dstring_nprepend(dstring_t *ds, const char *str, size_t len) {
319     return dstring_ninsert(ds, 0, str, len);
320 }
321 
322 /*
323  * Uses sprintf style formatting to produce a string which is then inserted
324  * to the start of our dstring.
325  *
326  * Returns 0 for success
327  *        -1 for failure
328  */
329 __PRINTF_FORMAT__(2,3)
dstring_prependf(dstring_t * ds,const char * fmt,...)330 int dstring_prependf(dstring_t *ds, const char *fmt, ...) {
331 
332     va_list args;
333     va_start(args, fmt);
334 
335     return dstring_vinsertf(ds, 0, fmt, args);
336 }
337 
338 /*
339  * Adds C string to the end.
340  * Equivalent a dstring_insert at position <dstring length>.
341  *
342  * Returns 0 for success
343  *        -1 for failure
344  */
dstring_append(dstring_t * ds,const char * str)345 int dstring_append(dstring_t *ds, const char *str) {
346     return dstring_insert(ds, ds->length, str);
347 }
348 
349 /*
350  * Adds a C string to the end, but URL encoding it with percent rules
351  * If specified 'meta' is a list of meta-characters need escaping, otherwise
352  * we use standard html ones ("<>&");
353  */
dstring_append_hex_encoded(dstring_t * ds,const char * str,const char * meta)354 int dstring_append_hex_encoded(dstring_t *ds, const char *str,
355 			       const char *meta) {
356     unsigned char escape[256];
357     const unsigned char *ustr = (const unsigned char *)str;
358     int i, j;
359     char hex[3];
360 
361     for (i = 0; i < 256; i++) {
362 	if (isprint(i))
363 	    escape[i] = 0;
364 	else
365 	    escape[i] = 1;
366     }
367     escape['%'] = 1;
368     if (meta) {
369 	for (i = 0; meta[i]; i++)
370 	    escape[(uc)meta[i]] = 1;
371     } else {
372 	for (i = 0; "<>&"[i]; i++)
373 	    escape[(uc)"<>&"[i]] = 1;
374     }
375 
376     j = 0;
377     hex[0] = '%';
378     do {
379 	i = j;
380 	while (ustr[i] && !escape[ustr[i]])
381 	    i++;
382 
383 	if (i-j) {
384 	    if (0 != dstring_ninsert(ds, ds->length, &str[j], i-j))
385 		return -1;
386 	}
387 
388 	while (ustr[i] && escape[ustr[i]]) {
389 	    hex[1] = "0123456789ABCDEF"[ustr[i] >>  4];
390 	    hex[2] = "0123456789ABCDEF"[ustr[i] & 0xf];
391 	    if (0 != dstring_ninsert(ds, ds->length, hex, 3))
392 		return -1;
393 	    i++;
394 	}
395 	j = i;
396     } while (ustr[i]);
397 
398     return 0;
399 }
400 
401 /*
402  * Adds a single character to the end of the string.
403  *
404  * Returns 0 for success
405  *        -1 for failure
406  */
dstring_append_char(dstring_t * ds,char c)407 int dstring_append_char(dstring_t *ds, char c) {
408     return dstring_ninsert(ds, ds->length, &c, 1);
409 }
410 
411 /*
412  * Adds an integer to the end of the string, turning it to printable ascii.
413  *
414  * Returns 0 for success
415  *        -1 for failure
416  */
dstring_append_int(dstring_t * ds,int i)417 int dstring_append_int(dstring_t *ds, int i) {
418     char buf[50], *cp = buf;
419     int j, k = 0;
420 
421     if (i == 0) {
422 	*cp++ = '0';
423     } else {
424 	if (i < 0) {
425 	    *cp++ = '-';
426 	    i = -i;
427 	}
428 
429 	if (i < 1000)
430 	    goto b1;
431 	if (i < 100000)
432 	    goto b2;
433 	if (i < 100000000)
434 	    goto b3;
435 
436 	j = i / 1000000000;
437 	if (j || k) *cp++ = j + '0', k=1, i %= 1000000000;
438 
439 	j = i / 100000000;
440 	if (j || k) *cp++ = j + '0', k=1, i %= 100000000;
441 
442     b3:
443 	j = i / 10000000;
444 	if (j || k) *cp++ = j + '0', k=1, i %= 10000000;
445 
446 	j = i / 1000000;
447 	if (j || k) *cp++ = j + '0', k=1, i %= 1000000;
448 
449 	j = i / 100000;
450 	if (j || k) *cp++ = j + '0', k=1, i %= 100000;
451 
452     b2:
453 	j = i / 10000;
454 	if (j || k) *cp++ = j + '0', k=1, i %= 10000;
455 
456 	j = i / 1000;
457 	if (j || k) *cp++ = j + '0', k=1, i %= 1000;
458 
459     b1:
460 	j = i / 100;
461 	if (j || k) *cp++ = j + '0', k=1, i %= 100;
462 
463 	j = i / 10;
464 	if (j || k) *cp++ = j + '0', k=1, i %= 10;
465 
466 	if (i || k) *cp++ = i + '0';
467     }
468 
469     return dstring_ninsert(ds, ds->length, buf, cp-buf);
470 }
471 
472 /*
473  * Adds string of a specified length to the end.
474  *
475  * Returns 0 for success
476  *        -1 for failure
477  */
dstring_nappend(dstring_t * ds,const char * str,size_t len)478 int dstring_nappend(dstring_t *ds, const char *str, size_t len) {
479     if (0 != DSTRING_RESIZE(ds, ds->length + len))
480 	return -1;
481 
482     memcpy(&ds->str[ds->length], str, len);
483     ds->length += len;
484 
485     return 0;
486 }
487 
488 /*
489  * Uses sprintf style formatting to produce a string which is then appended
490  * to our dstring.
491  *
492  * Returns 0 for success
493  *        -1 for failure
494  */
495 __PRINTF_FORMAT__(2,3)
dstring_appendf(dstring_t * ds,const char * fmt,...)496 int dstring_appendf(dstring_t *ds, const char *fmt, ...) {
497 
498     va_list args;
499     va_start(args, fmt);
500 
501     return dstring_vinsertf(ds, ds->length, fmt, args);
502 }
503 
504 /*
505  * Refreshes the cached dstring length.
506  * Use this if you obtain a copy of the internal C string and manipulate it
507  * in some way.
508  */
dstring_refresh_length(dstring_t * ds)509 void dstring_refresh_length(dstring_t *ds) {
510     ds->length = strlen(ds->str);
511 }
512 
513 /*
514  * Returns the length of the dstring (excluding nul; like strlen).
515  */
dstring_length(dstring_t * ds)516 size_t dstring_length(dstring_t *ds) {
517     return ds->length;
518 }
519 
520 /*
521  * Inserts a dstring into a dstring
522  *
523  * Returns 0 for success
524  *        -1 for failure
525  */
dstring_dinsert(dstring_t * ds_to,size_t offset,const dstring_t * ds_from)526 int dstring_dinsert(dstring_t *ds_to,
527 		    size_t offset,
528 		    const dstring_t *ds_from) {
529     if (!ds_from || !ds_to)
530 	return -1;
531 
532     return dstring_insert(ds_to, offset, ds_from->str);
533 }
534 
535 /*
536  * Deletes a section from a dstring, starting at 'offset' and extending
537  * for 'length' characters.
538  */
dstring_delete(dstring_t * ds,size_t offset,size_t length)539 void dstring_delete(dstring_t *ds, size_t offset, size_t length) {
540     memmove(&ds->str[offset], &ds->str[offset+length],
541 	    ds->length + 1 - (offset + length));
542     ds->length -= length;
543 }
544 
545 /*
546  * Replaces a section from a dstring (at offset for length bytes) with a
547  * new (C) string.
548  *
549  * Returns 0 for success
550  *        -1 for failure
551  */
dstring_replace(dstring_t * ds,size_t offset,size_t length,const char * rep_str)552 int dstring_replace(dstring_t *ds,
553 		    size_t offset,
554 		    size_t length,
555 		    const char *rep_str) {
556     size_t rep_len = strlen(rep_str);
557 
558     /* Ensure our string is large enough */
559     if (rep_len > length) {
560 	if (0 != DSTRING_RESIZE(ds, ds->length + rep_len - length))
561 	    return -1;
562     }
563 
564     /* Do the replace */
565     if (rep_len != length) {
566 	memmove(&ds->str[offset+rep_len], &ds->str[offset+length],
567 		ds->length + 1 - (offset + length));
568     }
569     memmove(&ds->str[offset], rep_str, rep_len);
570     ds->length += rep_len - length;
571 
572     return 0;
573 }
574 
575 /*
576  * Replaces a section from a dstring (at offset for length bytes) with a
577  * new dstring.
578  *
579  * Returns 0 for success
580  *        -1 for failure
581  */
dstring_dreplace(dstring_t * ds,size_t offset,size_t length,const dstring_t * rep_with)582 int dstring_dreplace(dstring_t *ds,
583 		     size_t offset,
584 		     size_t length,
585 		     const dstring_t *rep_with) {
586     return dstring_replace(ds, offset, length, rep_with->str);
587 }
588 
589 
590 /*
591  * Searches for the first occurance of 'search' in a dstring starting
592  * at position offset (including looking at that position).
593  *
594  * Returns the new offset if found
595  *        -1 if not.
596  */
dstring_find(dstring_t * ds,size_t offset,const char * search)597 int dstring_find(dstring_t *ds,
598 		 size_t offset,
599 		 const char *search) {
600     size_t i;
601     size_t search_len = strlen(search);
602 
603     /* Noddy algorithm to start with; use Boyer-Moore or something if needed */
604     for (i = offset; i <= ds->length; i++) {
605 	if (strncmp(&ds->str[i], search, search_len) == 0)
606 	    return i;
607     }
608 
609     return -1;
610 }
611 
612 /*
613  * A combination of dstring_find and dstring_replace.
614  * Look for 'search' starting at a specific offset. If found replace it with
615  * replace.
616  *
617  * Returns position of replaced string if found
618  *        -1 if not found or on error.
619  */
dstring_find_replace(dstring_t * ds,size_t offset,const char * search,const char * rep_with)620 int dstring_find_replace(dstring_t *ds,
621 			 size_t offset,
622 			 const char *search,
623 			 const char *rep_with) {
624     int pos;
625     size_t search_len = strlen(search);
626 
627     /* Find */
628     if (-1 == (pos = dstring_find(ds, offset, search)))
629 	return -1;
630 
631     /* And replace */
632     if (0 != dstring_replace(ds, pos, search_len, rep_with))
633 	return -1;
634 
635     return pos;
636 }
637 
638 /*
639  * Look for 'search' starting at a specific offset. If found replace it with
640  * replace. Repeat until all occurances have been replaced.
641  *
642  * Returns 0 for success
643  *        -1 on error
644  */
dstring_find_replace_all(dstring_t * ds,const char * search,const char * rep_with)645 int dstring_find_replace_all(dstring_t *ds,
646 			     const char *search,
647 			     const char *rep_with) {
648     /*
649      * Most efficient mechanism is to search and replace to a separate
650      * buffer so that we don't have problems with search being contained
651      * within rep_with and do not have efficiency problems due to
652      * excessive use of string shifting.
653      */
654     dstring_t *new_ds = dstring_create(NULL);
655     int found_pos, current_pos = 0;
656     size_t search_len = strlen(search);
657     dstring_t tmp;
658 
659     if (!new_ds)
660 	goto error;
661 
662     while (-1 != (found_pos = dstring_find(ds, current_pos, search))) {
663 	if (-1 == dstring_nappend(new_ds, &ds->str[current_pos],
664 				  found_pos - current_pos))
665 	    goto error;
666 	if (-1 == dstring_append(new_ds, rep_with))
667 	    goto error;
668 	current_pos = found_pos + search_len;
669     }
670     if (-1 == dstring_append(new_ds, &ds->str[current_pos]))
671 	goto error;
672 
673     /*
674      * Swap data structures over and free the original dstring. We can't
675      * swap pointers as the calling function still needs 'ds' to be the
676      * same address.
677      */
678     tmp = *ds;
679     *ds = *new_ds;
680     *new_ds = tmp;
681     dstring_destroy(new_ds);
682 
683     return 0;
684 
685  error:
686     if (new_ds)
687 	dstring_destroy(new_ds);
688     return -1;
689 }
690 
691 /*
692  * Escapes HTML meta characters by replacing them with appropriate HTML
693  * codes.
694  * We deal with the following:
695  *
696  * &	&amp;
697  * <	&lt;
698  * >	&gt;
699  * "	&quot;
700  *
701  * Returns 0 for success
702  *        -1 on error
703  */
dstring_escape_html(dstring_t * ds)704 int dstring_escape_html(dstring_t *ds) {
705     if (-1 == dstring_find_replace_all(ds, "&", "&amp;"))
706 	return -1;
707     if (-1 == dstring_find_replace_all(ds, "<", "&lt;"))
708 	return -1;
709     if (-1 == dstring_find_replace_all(ds, ">", "&gt;"))
710 	return -1;
711     if (-1 == dstring_find_replace_all(ds, "\"", "&quot;"))
712 	return -1;
713 
714     return 0;
715 }
716 
717 /*
718  * Searches for URLs in text strings and converts then to html href links.
719  * At present we just look for http://, https://, ftp://, file:// and
720  * mailto://
721  *
722  * Returns 0 for success
723  *        -1 on error
724  */
dstring_htmlise_links(dstring_t * ds)725 int dstring_htmlise_links(dstring_t *ds) {
726     char *links[] = {"http://", "https://", "ftp://", "file://", "mailto://"};
727     size_t nlinks = sizeof(links)/sizeof(*links);
728     size_t i;
729 
730     for (i = 0; i < nlinks; i++) {
731 	int pos = 0;
732 	while (-1 != (pos = dstring_find(ds, pos, links[i]))) {
733 	    size_t end, url_len;
734 	    char *str = dstring_str(ds);
735 	    dstring_t *url;
736 
737 	    /* Got the left end; extend to find right end */
738 	    for (end = pos+1; str[end] && !isspace(str[end]); end++)
739 		;
740 
741 	    /* Create a new href string */
742 	    if (NULL == (url = dstring_create(NULL)))
743 		return -1;
744 
745 	    if (-1 == dstring_insertf(url, 0, "<a href=\"%.*s\">%.*s</a>",
746 				      (int)(end-pos), &str[pos],
747 				      (int)(end-pos), &str[pos])) {
748 		dstring_destroy(url);
749 		return -1;
750 	    }
751 
752 	    url_len = dstring_length(url);
753 	    if (-1 == dstring_dreplace(ds, pos, end-pos, url)) {
754 		dstring_destroy(url);
755 		return -1;
756 	    }
757 	    dstring_destroy(url);
758 	    pos += url_len;
759 	}
760     }
761 
762     return 0;
763 }
764 
765 /*
766  * Converts a text string into a HTML version representing the same string.
767  * This includes escaping any HTML meta characters and searching for URLs
768  * within the string and replacing it with an HTML link (keeping the link as
769  * the anchor name).
770  * This is simply a wrapper joining dstring_escape_html and
771  * dstring_htmlise_links.
772  *
773  * Returns 0 for success
774  *        -1 on error
775  */
dstring_to_html(dstring_t * ds)776 int dstring_to_html(dstring_t *ds) {
777     if (-1 == dstring_escape_html(ds))
778 	return -1;
779 
780     return dstring_htmlise_links(ds);
781 }
782 
783 /*
784  * Appends an system error much like perror(), using errno.
785  * 'str' is added in the form "str: error_message".
786  *
787  * All Return 0 for success
788  *           -1 for failure
789  */
dstring_perror(dstring_t * ds,const char * str)790 int dstring_perror(dstring_t *ds, const char *str) {
791     return dstring_appendf(ds, "%s: %s\n", str, strerror(errno));
792 }
793 
794 #ifdef TEST
795 
main(void)796 int main(void) {
797     dstring_t *ds1 = dstring_create("foo");
798     dstring_t *ds2 = dstring_create(NULL);
799     dstring_t *ds3 = dstring_create("blah");
800 
801     printf("ds1='%s'\n", dstring_str(ds1));
802     printf("ds3='%s'\n", dstring_str(ds3));
803 
804     dstring_resize(ds2, 10);
805 
806     printf("ret=%d, ", dstring_insert(ds1, 3, "abc"));
807     printf("ds1='%s'\n", dstring_str(ds1));
808 
809     printf("ret=%d, ", dstring_insert(ds1, 0, "xyz"));
810     printf("ds1='%s'\n", dstring_str(ds1));
811 
812     printf("ret=%d, ", dstring_dinsert(ds1, 3, ds3));
813     printf("ds1='%s'\n", dstring_str(ds1));
814 
815     printf("ret=%d, ", dstring_replace(ds1, 3, 4, "fish"));
816     printf("ds1='%s'\n", dstring_str(ds1));
817 
818     printf("ret=%d, ", dstring_replace(ds1, 3, 4, "X"));
819     printf("ds1='%s'\n", dstring_str(ds1));
820 
821     printf("ret=%d, ", dstring_replace(ds1, 3, 1, "YZ"));
822     printf("ds1='%s'\n", dstring_str(ds1));
823 
824     printf("ret=%d, ", dstring_replace(ds1, 3, 2, ""));
825     printf("ds1='%s'\n", dstring_str(ds1));
826 
827     printf("ret=%d, ", dstring_replace(ds1, 3, 0, "blah"));
828     printf("ds1='%s'\n", dstring_str(ds1));
829 
830     printf("ret=%d\n", dstring_find(ds1, 0, "ab"));
831     printf("ret=%d\n", dstring_find(ds1, 0, "a"));
832     printf("ret=%d\n", dstring_find(ds1, 6, "a"));
833     printf("ret=%d\n", dstring_find(ds1, 11, "a"));
834 
835     printf("ret=%d, ", dstring_replace(ds1, 3, 0, "blah"));
836     printf("ds1='%s'\n", dstring_str(ds1));
837     printf("len=%ld, strlen=%ld\n",
838 	   (long)dstring_length(ds1),
839 	   (long)strlen(dstring_str(ds1)));
840 
841     printf("ret=%d, ", dstring_dreplace(ds1, 0, 10, ds3));
842     printf("ds1='%s'\n", dstring_str(ds1));
843 
844     printf("ret=%d, ", dstring_find_replace(ds1, 0, "h", "abc"));
845     printf("ds1='%s'\n", dstring_str(ds1));
846 
847     printf("ret=%d, ", dstring_find_replace_all(ds1, "ab", "X"));
848     printf("ds1='%s'\n", dstring_str(ds1));
849     printf("ret=%d, ", dstring_find_replace_all(ds1, "b", "X"));
850     printf("ret=%d, ", dstring_find_replace_all(ds1, "l", "XX"));
851     printf("ret=%d, ", dstring_find_replace_all(ds1, "oo", "X"));
852     printf("ds1='%s'\n", dstring_str(ds1));
853 
854     printf("ret=%d, ", dstring_find_replace_all(ds1, "XX", "XXX"));
855     printf("ds1='%s'\n", dstring_str(ds1));
856 
857     printf("ret=%d, ", dstring_find_replace_all(ds1, "X", ""));
858     printf("ds1='%s'\n", dstring_str(ds1));
859 
860     {
861 	int i;
862 	for (i = 0; i < 8; i++) {
863 	    dstring_find_replace_all(ds1, "h", "hh");
864 	    dstring_find_replace_all(ds1, "hh", "xhhhx");
865 	    printf("i=%d, len=%ld\n", i, (long)strlen(dstring_str(ds1)));
866 	}
867     }
868 
869     dstring_destroy(ds1);
870     dstring_destroy(ds2);
871     dstring_destroy(ds3);
872 
873     ds1 = dstring_create("xyz<http://www.mrc-lmb.cam.ac.uk/ foo&bar>=\"fish\"");
874     printf("ds1='%s'\n", dstring_str(ds1));
875     dstring_to_html(ds1);
876     printf("ds1='%s'\n", dstring_str(ds1));
877 
878     return 0;
879 }
880 
881 #endif
882