1 /* -*- Mode: c; c-basic-offset: 2 -*-
2  *
3  * rasqal_regex.c - Rasqal regex support
4  *
5  * Copyright (C) 2011, David Beckett http://www.dajobe.org/
6  *
7  * This package is Free Software and part of Redland http://librdf.org/
8  *
9  * It is licensed under the following three licenses as alternatives:
10  *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
11  *   2. GNU General Public License (GPL) V2 or any newer version
12  *   3. Apache License, V2.0 or any newer version
13  *
14  * You may not use this file except in compliance with at least one of
15  * the above three licenses.
16  *
17  * See LICENSE.html or LICENSE.txt at the top of this package for the
18  * complete terms and further detail along with the license texts for
19  * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
20  *
21  *
22  */
23 
24 #ifdef HAVE_CONFIG_H
25 #include <rasqal_config.h>
26 #endif
27 
28 #ifdef WIN32
29 #include <win32_rasqal_config.h>
30 #endif
31 
32 #include <stdio.h>
33 #include <string.h>
34 #include <ctype.h>
35 #ifdef HAVE_STDLIB_H
36 #include <stdlib.h>
37 #endif
38 #include <stdarg.h>
39 
40 #ifdef RASQAL_REGEX_PCRE
41 #include <pcre.h>
42 #endif
43 
44 #ifdef RASQAL_REGEX_POSIX
45 #include <sys/types.h>
46 #include <regex.h>
47 #endif
48 
49 #include "rasqal.h"
50 #include "rasqal_internal.h"
51 
52 
53 #define DEBUG_FH stderr
54 
55 
56 #ifndef STANDALONE
57 
58 
59 /*
60  * rasqal_regex_match:
61  * @world: world
62  * @locator: locator
63  * @pattern: regex pattern
64  * @regex_flags: regex flags string
65  * @subject: input string
66  * @subject_len: input string length
67  *
68  * INTERNAL - Test if a string matches a regex pattern.
69  *
70  * Intended to be used for executing #RASQAL_EXPR_STR_MATCH and
71  * #RASQAL_EXPR_STR_NMATCH operations (unused: formerly RDQL)
72  *
73  * Return value: <0 on error, 0 for no match, >0 for match
74  *
75  */
76 int
rasqal_regex_match(rasqal_world * world,raptor_locator * locator,const char * pattern,const char * regex_flags,const char * subject,size_t subject_len)77 rasqal_regex_match(rasqal_world* world, raptor_locator* locator,
78                    const char* pattern,
79                    const char* regex_flags,
80                    const char* subject, size_t subject_len)
81 {
82   int flag_i = 0; /* regex_flags contains i */
83   const char *p;
84 #ifdef RASQAL_REGEX_PCRE
85   pcre* re;
86   int compile_options = PCRE_UTF8;
87   int exec_options = 0;
88   const char *re_error = NULL;
89   int erroffset = 0;
90 #endif
91 #ifdef RASQAL_REGEX_POSIX
92   regex_t reg;
93   int compile_options = REG_EXTENDED;
94   int exec_options = 0;
95 #endif
96   int rc = 0;
97 
98   for(p = regex_flags; p && *p; p++)
99     if(*p == 'i')
100       flag_i++;
101 
102 #ifdef RASQAL_REGEX_PCRE
103   if(flag_i)
104     compile_options |= PCRE_CASELESS;
105 
106   re = pcre_compile(RASQAL_GOOD_CAST(const char*, pattern), compile_options,
107                     &re_error, &erroffset, NULL);
108   if(!re) {
109     rasqal_log_error_simple(world, RAPTOR_LOG_LEVEL_ERROR, locator,
110                             "Regex compile of '%s' failed - %s", pattern, re_error);
111     rc = -1;
112   } else {
113     rc = pcre_exec(re,
114                    NULL, /* no study */
115                    subject,
116                    RASQAL_BAD_CAST(int, subject_len), /* PCRE API is an int */
117                    0 /* startoffset */,
118                    exec_options /* options */,
119                    NULL, 0 /* ovector, ovecsize - no matches wanted */
120                    );
121     if(rc >= 0)
122       rc = 1;
123     else if(rc != PCRE_ERROR_NOMATCH) {
124       rasqal_log_error_simple(world, RAPTOR_LOG_LEVEL_ERROR, locator,
125                               "Regex match failed - returned code %d", rc);
126       rc= -1;
127     } else
128       rc = 0;
129   }
130   pcre_free(re);
131 
132 #endif
133 
134 #ifdef RASQAL_REGEX_POSIX
135   if(flag_i)
136     compile_options |= REG_ICASE;
137 
138   rc = regcomp(&reg, RASQAL_GOOD_CAST(const char*, pattern), compile_options);
139   if(rc) {
140     rasqal_log_error_simple(world, RAPTOR_LOG_LEVEL_ERROR,
141                             locator,
142                             "Regex compile of '%s' failed", pattern);
143     rc = -1;
144   } else {
145     rc = regexec(&reg, RASQAL_GOOD_CAST(const char*, subject),
146                  0, NULL, /* nmatch, regmatch_t pmatch[] - no matches wanted */
147                  exec_options /* eflags */
148                  );
149     if(!rc)
150       rc = 1;
151     else if (rc != REG_NOMATCH) {
152       rasqal_log_error_simple(world, RAPTOR_LOG_LEVEL_ERROR, locator,
153                               "Regex match failed - returned code %d", rc);
154       rc = -1;
155     } else
156       rc = 0;
157   }
158   regfree(&reg);
159 #endif
160 
161 #ifdef RASQAL_REGEX_NONE
162   rasqal_log_warning_simple(world, RASQAL_WARNING_LEVEL_MISSING_SUPPORT, locator,
163                             "Regex support missing, cannot compare '%s' to '%s'",
164                             match_string, pattern);
165   rc = -1;
166 #endif
167 
168   return rc;
169 }
170 
171 
172 
173 /*
174  * rasqal_regex_get_ref_number:
175  * @str: pointer to pointer to buffer at '$' symbol
176  *
177  * INTERNAL - Decode a $N or $NN reference at *str and move *str past it
178  *
179  * Return value: reference number or <0 if none found
180  */
181 static int
rasqal_regex_get_ref_number(const char ** str)182 rasqal_regex_get_ref_number(const char **str)
183 {
184   const char *p = *str;
185   int ref_number = 0;
186 
187   if(!p[1])
188     return -1;
189 
190   /* skip $ */
191   p++;
192 
193   if(*p >= '0' && *p <= '9') {
194     ref_number = (*p - '0');
195     p++;
196   } else
197     return -1;
198 
199   if(*p && *p >= '0' && *p <= '9') {
200     ref_number = ref_number * 10 + (*p - '0');
201     p++;
202   }
203 
204   *str = p;
205   return ref_number;
206 }
207 
208 
209 #ifdef RASQAL_REGEX_PCRE
210 static char*
rasqal_regex_replace_pcre(rasqal_world * world,raptor_locator * locator,pcre * re,int options,const char * subject,size_t subject_len,const char * replace,size_t replace_len,size_t * result_len_p)211 rasqal_regex_replace_pcre(rasqal_world* world, raptor_locator* locator,
212                           pcre* re, int options,
213                           const char *subject, size_t subject_len,
214                           const char *replace, size_t replace_len,
215                           size_t *result_len_p)
216 {
217   int capture_count;
218   int *ovector = NULL;
219   int ovecsize;
220   size_t startoffset;
221   int matched_empty_options;
222   char *result = NULL;
223   size_t result_size; /* allocated size of result (excluding NUL) */
224   size_t result_len; /* used size of result */
225   const char *replace_end = replace + replace_len;
226 
227   if(pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &capture_count) < 0)
228     goto failed;
229 
230   ovecsize = (capture_count + 1) * 3; /* +1 for whole pattern match pair */
231   ovector = RASQAL_CALLOC(int *, RASQAL_GOOD_CAST(size_t, ovecsize), sizeof(int));
232   if(!ovector)
233     goto failed;
234 
235   result_size = subject_len << 1;
236   result = RASQAL_MALLOC(char*, result_size + 1);
237   if(!result)
238     goto failed;
239   result_len = 0;
240 
241   /* Match and replace loop; adjusting startoffset each time */
242   startoffset = 0;
243   matched_empty_options = 0;
244   while(1) {
245     int stringcount;
246     const char *subject_piece = subject + startoffset;
247 
248     stringcount = pcre_exec(re,
249                             NULL, /* no study */
250                             subject,
251                             RASQAL_BAD_CAST(int, subject_len), /* PCRE API is an int */
252                             RASQAL_BAD_CAST(int, startoffset),
253                             options | matched_empty_options,
254                             ovector, ovecsize);
255 
256     /* "The value returned by pcre_exec() is one more than the
257      * highest numbered pair that has been set. ...  If there are no
258      * capturing subpatterns, the return value from a successful
259      * match is 1, indicating that just the first pair of offsets has
260      * been set." - pcreapi
261      */
262 
263     if(!stringcount)
264       /* ovector was too small - how can this happen?.  Use all
265        * the variables available.  Should return an warning? FIXME
266        */
267       stringcount = ovecsize / 3;
268 
269 
270     if(stringcount > 0) {
271       /* matches have been found */
272       const char *subject_match;
273       size_t piece_len;
274       size_t new_result_len;
275       const char *replace_p;
276       char last_char;
277       char *result_p;
278 
279       subject_match = subject + ovector[0];
280 
281       /* compute new length of replacement with expanded variables */
282       new_result_len = result_len;
283 
284       /* compute size of piece before the match */
285       piece_len = RASQAL_GOOD_CAST(size_t, subject_match - subject_piece);
286       new_result_len += piece_len;
287 
288       /* compute size of matched piece */
289       replace_p = replace;
290       last_char = '\0';
291       while(replace_p < replace_end) {
292         if(*replace_p == '\\' || *replace_p == '$') {
293           int ref_number;
294 
295           if(last_char == '\\') {
296             /* Allow \\ and \$ */
297             replace_p++;
298             last_char = '\0';
299             continue;
300           }
301 
302           ref_number = rasqal_regex_get_ref_number(&replace_p);
303           if(ref_number >= 0) {
304             if(ref_number < stringcount)
305               new_result_len = new_result_len + RASQAL_GOOD_CAST(size_t, ovector[(ref_number << 1) + 1] - ovector[ref_number << 1]);
306             continue;
307           }
308         }
309 
310         new_result_len++;
311 
312         last_char = *replace_p;
313         replace_p++;
314       }
315 
316       /* need to expand result buffer? */
317       if(new_result_len > result_size) {
318         char* new_result;
319 
320         result_size += new_result_len << 1;
321         new_result = RASQAL_MALLOC(char*, result_size + 1);
322         if(!new_result)
323           goto failed;
324 
325         memcpy(new_result, result, result_len);
326         RASQAL_FREE(char*, result);
327         result = new_result;
328       }
329 
330       /* copy the piece of the input before the match */
331       piece_len = RASQAL_GOOD_CAST(size_t, subject_match - subject_piece);
332       memcpy(&result[result_len], subject_piece, piece_len);
333       result_len += piece_len;
334 
335       /* copy replacement into result inserting matched references */
336       result_p = result + result_len;
337       replace_p = replace;
338       last_char = '\0';
339       while(replace_p < replace_end) {
340         if(*replace_p == '\\' || *replace_p == '$') {
341           int ref_number;
342 
343           if(last_char == '\\') {
344             /* Allow \\ and \$ */
345             *(result_p - 1) = *replace_p++;
346             last_char = '\0';
347             continue;
348           }
349 
350           ref_number = rasqal_regex_get_ref_number(&replace_p);
351           if(ref_number >= 0) {
352             if(ref_number < stringcount) {
353               size_t match_len;
354               int match_start_offset = ovector[ref_number << 1];
355 
356               match_len = RASQAL_BAD_CAST(size_t, ovector[(ref_number << 1) + 1] - match_start_offset);
357               memcpy(result_p, subject + match_start_offset, match_len);
358               result_p += match_len;
359               result_len += match_len;
360             }
361             continue;
362           }
363         }
364 
365         *result_p++ = *replace_p;
366         result_len++;
367 
368         last_char = *replace_p;
369         replace_p++;
370       }
371       *result_p = '\0';
372 
373       /* continue at offset after all matches */
374       startoffset = RASQAL_BAD_CAST(size_t, ovector[1]);
375 
376       /*
377        * "It is possible to emulate Perl's behaviour after matching a
378        * null string by first trying the match again at the same
379        * offset with PCRE_NOTEMPTY and PCRE_ANCHORED, and then if
380        * that fails by advancing the starting offset ... and trying
381        * an ordinary match again." - pcreapi
382        *
383        * The 'and then if' part is implemented by the if() inside
384        * the if(stringcount == PCRE_ERROR_NOMATCH) below.
385        *
386        */
387       matched_empty_options = (ovector[0] == ovector[1]) ?
388                               (PCRE_NOTEMPTY | PCRE_ANCHORED) : 0;
389 
390     } else if(stringcount == PCRE_ERROR_NOMATCH) {
391       /* No match */
392       size_t piece_len;
393       size_t new_result_len;
394 
395       if(matched_empty_options && (size_t)startoffset < subject_len) {
396         /* If the previous match was an empty string and there is
397          * still some input to try, move on one char and continue
398          * ordinary matches.
399          */
400         result[result_len++] = *subject_piece;
401         startoffset++;
402         matched_empty_options = 0;
403         continue;
404       }
405 
406       /* otherwise we are finished - copy the remaining input */
407       piece_len = subject_len - startoffset;
408       new_result_len = result_len + piece_len;
409 
410       if(new_result_len > result_size) {
411         char* new_result;
412 
413         result_size = new_result_len;
414         new_result = RASQAL_MALLOC(char*, result_size + 1);
415         if(!new_result)
416           goto failed;
417 
418         memcpy(new_result, result, result_len);
419         RASQAL_FREE(char*, result);
420         result = new_result;
421       }
422 
423       memcpy(&result[result_len], subject_piece, piece_len);
424       result_len += piece_len;
425 
426       /* NUL terminate the result and end */
427       result[result_len] = '\0';
428       break;
429     } else {
430       /* stringcount < 0 : other failures */
431       RASQAL_DEBUG2("pcre_exec() failed with code %d\n", stringcount);
432       goto failed;
433     }
434   }
435 
436   RASQAL_FREE(int*, ovector);
437 
438   if(result_len_p)
439     *result_len_p = result_len;
440 
441   return result;
442 
443   failed:
444   if(result)
445     RASQAL_FREE(char*, result);
446 
447   if(ovector)
448     RASQAL_FREE(int*, ovector);
449 
450   return NULL;
451 }
452 #endif
453 
454 
455 #ifdef RASQAL_REGEX_POSIX
456 static char*
rasqal_regex_replace_posix(rasqal_world * world,raptor_locator * locator,regex_t reg,int options,const char * subject,size_t subject_len,const char * replace,size_t replace_len,size_t * result_len_p)457 rasqal_regex_replace_posix(rasqal_world* world, raptor_locator* locator,
458                            regex_t reg, int options,
459                            const char *subject, size_t subject_len,
460                            const char *replace, size_t replace_len,
461                            size_t *result_len_p)
462 {
463   size_t capture_count;
464   regmatch_t* pmatch;
465   off_t startoffset;
466   int matched_empty;
467   char *result = NULL;
468   size_t result_size; /* allocated size of result (excluding NUL) */
469   size_t result_len; /* used size of result */
470   const char *replace_end = replace + replace_len;
471 
472   capture_count = reg.re_nsub;
473 
474   pmatch = RASQAL_CALLOC(regmatch_t*, capture_count + 1, sizeof(regmatch_t));
475   if(!pmatch)
476     return NULL;
477 
478   result_size = subject_len << 1;
479   result = RASQAL_MALLOC(char*, result_size + 1);
480   if(!result)
481     goto failed;
482   result_len = 0;
483 
484   /* Match and replace loop; adjusting startoffset each time */
485   startoffset = 0;
486   matched_empty = 0;
487   while(1) {
488     int rc;
489     const char *subject_piece = subject + startoffset;
490 
491     rc = regexec(&reg, RASQAL_GOOD_CAST(const char*, subject_piece),
492                  capture_count, pmatch,
493                  options /* eflags */
494                  );
495 
496     if(!rc) {
497       /* matches have been found */
498       const char *subject_match;
499       size_t piece_len;
500       size_t new_result_len;
501       const char *replace_p;
502       char last_char;
503       char *result_p;
504 
505       subject_match = subject_piece + pmatch[0].rm_so;
506 
507       /* compute new length of replacement with expanded variables */
508       new_result_len = result_len;
509 
510       /* compute size of piece before the match */
511       piece_len = subject_match - subject_piece;
512       new_result_len += piece_len;
513 
514       /* compute size of matched piece */
515       replace_p = replace;
516       last_char = '\0';
517       while(replace_p < replace_end) {
518         if(*replace_p == '\\' || *replace_p == '$') {
519           int ref_number;
520 
521           if(last_char == '\\') {
522             /* Allow \\ and \$ */
523             replace_p++;
524             last_char = '\0';
525             continue;
526           }
527 
528           ref_number = rasqal_regex_get_ref_number(&replace_p);
529           if(ref_number >= 0) {
530             regmatch_t rm;
531             size_t copy_len;
532 
533             rm = pmatch[ref_number];
534             copy_len = rm.rm_eo - rm.rm_so + 1;
535             if((size_t)ref_number < capture_count)
536               new_result_len += copy_len;
537             continue;
538           }
539         }
540 
541         new_result_len++;
542 
543         last_char = *replace_p;
544         replace_p++;
545       }
546 
547       /* need to expand result buffer? */
548       if(new_result_len > result_size) {
549         char* new_result;
550 
551         result_size += new_result_len << 1;
552         new_result = RASQAL_MALLOC(char*, result_size + 1);
553         if(!new_result)
554           goto failed;
555 
556         memcpy(new_result, result, result_len);
557         RASQAL_FREE(char*, result);
558         result = new_result;
559       }
560 
561       /* copy the piece of the input before the match */
562       piece_len = subject_match - subject_piece;
563       if(piece_len)
564         memcpy(&result[result_len], subject_piece, piece_len);
565       result_len += piece_len;
566 
567       /* copy replacement into result inserting matched references */
568       result_p = result + result_len;
569       replace_p = replace;
570       last_char = '\0';
571       while(replace_p < replace_end) {
572         if(*replace_p == '\\' || *replace_p == '$') {
573           int ref_number;
574 
575           if(last_char == '\\') {
576             /* Allow \\ and \$ */
577             *(result_p - 1) = *replace_p++;
578             last_char = '\0';
579             continue;
580           }
581 
582           ref_number = rasqal_regex_get_ref_number(&replace_p);
583           if(ref_number >= 0) {
584             if((size_t)ref_number < capture_count) {
585               regmatch_t rm;
586               size_t match_len;
587 
588               rm = pmatch[ref_number];
589               match_len = rm.rm_eo - rm.rm_so + 1;
590               memcpy(result_p, subject + rm.rm_so, match_len);
591               result_p += match_len;
592               result_len += match_len;
593             }
594             continue;
595           }
596         }
597 
598         *result_p++ = *replace_p;
599         result_len++;
600 
601         last_char = *replace_p;
602         replace_p++;
603       }
604       *result_p = '\0';
605 
606       /* continue at offset after all matches */
607       startoffset += pmatch[0].rm_eo;
608 
609       matched_empty = (pmatch[0].rm_so == pmatch[0].rm_eo);
610     } else if (rc == REG_NOMATCH) {
611       /* No match */
612       size_t piece_len;
613       size_t new_result_len;
614 
615       if(matched_empty && (size_t)startoffset < subject_len) {
616         /* If the previous match was an empty string and there is
617          * still some input to try, move on one char and continue
618          * ordinary matches.
619          */
620         result[result_len++] = *subject_piece;
621         startoffset++;
622         matched_empty = 0;
623         continue;
624       }
625 
626       /* otherwise we are finished - copy the remaining input */
627       piece_len = subject_len - startoffset;
628       new_result_len = result_len + piece_len;
629 
630       if(new_result_len > result_size) {
631         char* new_result;
632 
633         result_size = new_result_len;
634         new_result = RASQAL_MALLOC(char*, result_size + 1);
635         if(!new_result)
636           goto failed;
637 
638         memcpy(new_result, result, result_len);
639         RASQAL_FREE(char*, result);
640         result = new_result;
641       }
642 
643       memcpy(&result[result_len], subject_piece, piece_len);
644       result_len += piece_len;
645 
646       /* NUL terminate the result and end */
647       result[result_len] = '\0';
648       break;
649     } else {
650       rasqal_log_error_simple(world, RAPTOR_LOG_LEVEL_ERROR, locator,
651                               "Regex match failed - returned code %d", rc);
652       goto failed;
653     }
654   }
655 
656   RASQAL_FREE(regmatch_t*, pmatch);
657 
658   return result;
659 
660 
661   failed:
662   if(result)
663     RASQAL_FREE(char*, result);
664 
665   RASQAL_FREE(regmatch_t*, pmatch);
666 
667   return NULL;
668 }
669 #endif
670 
671 
672 
673 /**
674  * rasqal_regex_replace:
675  * @world: world
676  * @locator: locator
677  * @pattern: regex pattern
678  * @regex_flags: regex flags string
679  * @subject: input string
680  * @subject_len: input string length
681  * @replace: replacement string
682  * @replace_len: Length of replacement string
683  * @result_len_p: pointer to store result length (output)
684  *
685  * Replace all copies of matches to a pattern with a replacement with subsitution
686  *
687  * Intended to be used for SPARQL 1.1 REPLACE() implementation.
688  *
689  * Return value: result string or NULL on failure
690  *
691  */
692 char*
rasqal_regex_replace(rasqal_world * world,raptor_locator * locator,const char * pattern,const char * regex_flags,const char * subject,size_t subject_len,const char * replace,size_t replace_len,size_t * result_len_p)693 rasqal_regex_replace(rasqal_world* world, raptor_locator* locator,
694                      const char* pattern,
695                      const char* regex_flags,
696                      const char* subject, size_t subject_len,
697                      const char* replace, size_t replace_len,
698                      size_t* result_len_p)
699 {
700   const char *p;
701 #ifdef RASQAL_REGEX_PCRE
702   pcre* re;
703   int compile_options = PCRE_UTF8;
704   int exec_options = 0;
705   const char *re_error = NULL;
706   int erroffset = 0;
707 #endif
708 #ifdef RASQAL_REGEX_POSIX
709   regex_t reg;
710   int compile_options = REG_EXTENDED;
711   int exec_options = 0;
712   int rc = 0;
713   size_t pattern_len;
714   char* pattern2;
715 #endif
716   char *result_s = NULL;
717 
718 #ifdef RASQAL_REGEX_PCRE
719   for(p = regex_flags; p && *p; p++) {
720     if(*p == 'i')
721       exec_options |= PCRE_CASELESS;
722   }
723 
724   re = pcre_compile(pattern, compile_options,
725                     &re_error, &erroffset, NULL);
726   if(!re) {
727     rasqal_log_error_simple(world, RAPTOR_LOG_LEVEL_ERROR, locator,
728                             "Regex compile of '%s' failed - %s", pattern, re_error);
729   } else
730     result_s = rasqal_regex_replace_pcre(world, locator,
731                                          re, exec_options,
732                                          subject, subject_len,
733                                          replace, replace_len,
734                                          result_len_p);
735   pcre_free(re);
736 #endif
737 
738 #ifdef RASQAL_REGEX_POSIX
739   /* Add an outer capture so we can always find what was matched */
740   pattern_len = strlen(pattern);
741   pattern2 = RASQAL_MALLOC(char*, pattern_len + 3);
742   if(!pattern2)
743     return NULL;
744 
745   pattern2[0] = '(';
746   memcpy(pattern2 + 1, pattern, pattern_len);
747   pattern2[pattern_len + 1]=')';
748   pattern2[pattern_len + 2]='\0';
749 
750   for(p = regex_flags; p && *p; p++) {
751     if(*p == 'i')
752       compile_options |= REG_ICASE;
753   }
754 
755   rc = regcomp(&reg, pattern2, compile_options);
756   if(rc) {
757     RASQAL_FREE(char*, pattern2);
758     rasqal_log_error_simple(world, RAPTOR_LOG_LEVEL_ERROR, locator,
759                             "Regex compile of '%s' failed - %d", pattern, rc);
760   } else {
761     RASQAL_FREE(char*, pattern2);
762     result_s = rasqal_regex_replace_posix(world, locator,
763                                           reg, exec_options,
764                                           subject, subject_len,
765                                           replace, replace_len,
766                                           result_len_p);
767   }
768 
769   regfree(&reg);
770 #endif
771 
772 #ifdef RASQAL_REGEX_NONE
773   rasqal_log_warning_simple(world, RASQAL_WARNING_LEVEL_MISSING_SUPPORT,
774                             locator,
775                             "Regex support missing, cannot replace '%s' from '%s' to '%s'", subject, pattern, replace);
776 #endif
777 
778   return result_s;
779 }
780 
781 #endif /* not STANDALONE */
782 
783 
784 #ifdef STANDALONE
785 #include <stdio.h>
786 
787 int main(int argc, char *argv[]);
788 
789 
790 #define NTESTS 1
791 
792 int
main(int argc,char * argv[])793 main(int argc, char *argv[])
794 {
795   rasqal_world* world;
796   const char *program = rasqal_basename(argv[0]);
797 #ifdef RASQAL_REGEX_PCRE
798   raptor_locator* locator = NULL;
799   int test = 0;
800 #endif
801   int failures = 0;
802 
803   world = rasqal_new_world();
804   if(!world || rasqal_world_open(world)) {
805     fprintf(stderr, "%s: rasqal_world init failed\n", program);
806     failures++;
807     goto tidy;
808   }
809 
810 #if defined(RASQAL_REGEX_POSIX) || defined(RASQAL_REGEX_NONE)
811     fprintf(stderr,
812             "%s: WARNING: Cannot only run regex tests with PCRE regexes\n",
813             program);
814 #endif
815 
816 #ifdef RASQAL_REGEX_PCRE
817   for(test = 0; test < NTESTS; test++) {
818     const char* regex_flags = "";
819     const char* subject = "abcd1234-^";
820     const char* pattern = "[^a-z0-9]";
821     const char* replace = "-";
822     const char* expected_result = "abcd1234--";
823     size_t subject_len = strlen(RASQAL_GOOD_CAST(const char*, subject));
824     size_t replace_len = strlen(RASQAL_GOOD_CAST(const char*, replace));
825     char* result;
826     size_t result_len = 0;
827 
828     fprintf(stderr, "%s: Test %d pattern: '%s' subject '%s'\n",
829             program, test, pattern, subject);
830 
831     result = rasqal_regex_replace(world, locator,
832                                   pattern, regex_flags,
833                                   subject, subject_len,
834                                   replace, replace_len,
835                                   &result_len);
836 
837     if(result) {
838       if(strcmp(result, expected_result)) {
839         fprintf(stderr, "%s: Test %d failed - expected '%s' but got '%s'\n",
840                 program, test, expected_result, result);
841         failures++;
842       }
843       RASQAL_FREE(char*, result);
844     } else {
845       fprintf(stderr, "%s: Test %d failed - result was NULL\n", program, test);
846       failures++;
847     }
848   }
849 #endif
850 
851   tidy:
852   rasqal_free_world(world);
853 
854   return failures;
855 }
856 #endif /* STANDALONE */
857