1 /* -*- Mode: c; c-basic-offset: 2 -*-
2  *
3  * rasqal_expr_strings.c - Rasqal string expression functions
4  *
5  * Copyright (C) 2010, David Beckett http://www.dajobe.org/
6  *
7  * This package is Free Software and part of Redland http://librdf.org/
8  *
9  * It is licensed under the following three licenses as alternatives:
10  *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
11  *   2. GNU General Public License (GPL) V2 or any newer version
12  *   3. Apache License, V2.0 or any newer version
13  *
14  * You may not use this file except in compliance with at least one of
15  * the above three licenses.
16  *
17  * See LICENSE.html or LICENSE.txt at the top of this package for the
18  * complete terms and further detail along with the license texts for
19  * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
20  *
21  *
22  */
23 
24 #ifdef HAVE_CONFIG_H
25 #include <rasqal_config.h>
26 #endif
27 
28 #ifdef WIN32
29 #include <win32_rasqal_config.h>
30 #endif
31 
32 #include <stdio.h>
33 #include <string.h>
34 #include <ctype.h>
35 #ifdef HAVE_STDLIB_H
36 #include <stdlib.h>
37 #endif
38 #include <stdarg.h>
39 
40 #include "rasqal.h"
41 #include "rasqal_internal.h"
42 
43 
44 #define DEBUG_FH stderr
45 
46 
47 /*
48  * rasqal_expression_evaluate_strlen:
49  * @e: The expression to evaluate.
50  * @eval_context: Evaluation context
51  *
52  * INTERNAL - Evaluate RASQAL_EXPR_STRLEN(expr) expression.
53  *
54  * Return value: A #rasqal_literal integer value or NULL on failure.
55  */
56 rasqal_literal*
rasqal_expression_evaluate_strlen(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)57 rasqal_expression_evaluate_strlen(rasqal_expression *e,
58                                   rasqal_evaluation_context *eval_context,
59                                   int *error_p)
60 {
61   rasqal_world* world = eval_context->world;
62   rasqal_literal* l1;
63   rasqal_literal* result = NULL;
64   const unsigned char *s;
65   int len = 0;
66 
67   l1 = rasqal_expression_evaluate2(e->arg1, eval_context, error_p);
68   if((error_p && *error_p) || !l1)
69     goto failed;
70 
71   s = rasqal_literal_as_string_flags(l1, eval_context->flags, error_p);
72   if(error_p && *error_p)
73     goto failed;
74 
75   if(!s)
76     len = 0;
77   else
78     len = raptor_unicode_utf8_strlen(s, strlen(RASQAL_GOOD_CAST(const char*, s)));
79 
80 
81   result = rasqal_new_numeric_literal_from_long(world, RASQAL_LITERAL_INTEGER,
82                                                 len);
83   rasqal_free_literal(l1);
84   return result;
85 
86   failed:
87   if(error_p)
88     *error_p = 1;
89 
90   if(l1)
91     rasqal_free_literal(l1);
92 
93   return NULL;
94 }
95 
96 
97 /*
98  * rasqal_expression_evaluate_substr:
99  * @e: The expression to evaluate.
100  * @eval_context: Evaluation context
101  *
102  * INTERNAL - Evaluate RASQAL_EXPR_SUBSTR(expr) expression.
103  *
104  * Return value: A #rasqal_literal integer value or NULL on failure.
105  */
106 rasqal_literal*
rasqal_expression_evaluate_substr(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)107 rasqal_expression_evaluate_substr(rasqal_expression *e,
108                                   rasqal_evaluation_context *eval_context,
109                                   int *error_p)
110 {
111   rasqal_world* world = eval_context->world;
112   rasqal_literal* l1 = NULL;
113   rasqal_literal* l2 = NULL;
114   rasqal_literal* l3 = NULL;
115   const unsigned char *s;
116   unsigned char* new_s = NULL;
117   char* new_lang = NULL;
118   raptor_uri* dt_uri = NULL;
119   size_t len = 0;
120   int startingLoc = 0;
121   int length = -1;
122 
123   /* haystack string */
124   l1 = rasqal_expression_evaluate2(e->arg1, eval_context, error_p);
125   if((error_p && *error_p) || !l1)
126     goto failed;
127 
128   s = rasqal_literal_as_counted_string(l1, &len, eval_context->flags, error_p);
129   if(error_p && *error_p)
130     goto failed;
131 
132   /* integer startingLoc */
133   l2 = rasqal_expression_evaluate2(e->arg2, eval_context, error_p);
134   if((error_p && *error_p) || !l2)
135     goto failed;
136 
137   startingLoc = rasqal_literal_as_integer(l2, error_p);
138   if(error_p && *error_p)
139     goto failed;
140 
141   /* optional integer length */
142   if(e->arg3) {
143     l3 = rasqal_expression_evaluate2(e->arg3, eval_context, error_p);
144     if(!l3)
145       goto failed;
146 
147     length = rasqal_literal_as_integer(l3, error_p);
148     if(error_p && *error_p)
149       goto failed;
150 
151   }
152 
153   new_s = RASQAL_MALLOC(unsigned char*, len + 1);
154   if(!new_s)
155     goto failed;
156 
157   /* adjust starting index to xsd fn:substring initial offset 1 */
158   if(!raptor_unicode_utf8_substr(new_s, /* dest_length_p */ NULL,
159                                  s, len, startingLoc - 1, length))
160     goto failed;
161 
162   if(l1->language) {
163     len = strlen(RASQAL_GOOD_CAST(const char*, l1->language));
164     new_lang = RASQAL_MALLOC(char*, len + 1);
165     if(!new_lang)
166       goto failed;
167 
168     memcpy(new_lang, l1->language, len + 1);
169   }
170 
171   dt_uri = l1->datatype;
172   if(dt_uri)
173     dt_uri = raptor_uri_copy(dt_uri);
174 
175   rasqal_free_literal(l1);
176   rasqal_free_literal(l2);
177   if(l3)
178     rasqal_free_literal(l3);
179 
180   /* after this new_s, new_lang and dt_uri become owned by result */
181   return rasqal_new_string_literal(world, new_s, new_lang, dt_uri,
182                                    /* qname */ NULL);
183 
184 
185 
186   failed:
187   if(error_p)
188     *error_p = 1;
189 
190   if(l1)
191     rasqal_free_literal(l1);
192   if(l2)
193     rasqal_free_literal(l2);
194   if(l3)
195     rasqal_free_literal(l3);
196 
197   return NULL;
198 }
199 
200 
201 /*
202  * rasqal_expression_evaluate_set_case:
203  * @e: The expression to evaluate.
204  * @eval_context: Evaluation context
205  *
206  * INTERNAL - Evaluate RASQAL_EXPR_UCASE(expr) or
207  * RASQAL_EXPR_LCASE(expr) expressions.
208  *
209  * Return value: A #rasqal_literal string value or NULL on failure.
210  */
211 rasqal_literal*
rasqal_expression_evaluate_set_case(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)212 rasqal_expression_evaluate_set_case(rasqal_expression *e,
213                                     rasqal_evaluation_context *eval_context,
214                                     int *error_p)
215 {
216   rasqal_world* world = eval_context->world;
217   rasqal_literal* l1;
218   const unsigned char *s;
219   unsigned char* new_s = NULL;
220   char* new_lang = NULL;
221   raptor_uri* dt_uri = NULL;
222   size_t len = 0;
223 
224   l1 = rasqal_expression_evaluate2(e->arg1, eval_context, error_p);
225   if((error_p && *error_p) || !l1)
226     goto failed;
227 
228   s = rasqal_literal_as_counted_string(l1, &len, eval_context->flags, error_p);
229   if(error_p && *error_p)
230     goto failed;
231 
232   new_s =RASQAL_MALLOC(unsigned char*, len + 1);
233   if(!new_s)
234     goto failed;
235 
236   if(e->op == RASQAL_EXPR_UCASE) {
237     unsigned int i;
238 
239     for(i = 0; i < len; i++) {
240       unsigned char c = s[i];
241       if(islower(RASQAL_GOOD_CAST(int, c)))
242         c = RASQAL_GOOD_CAST(unsigned char, toupper(RASQAL_GOOD_CAST(int, c)));
243       new_s[i] = c;
244     }
245   } else { /* RASQAL_EXPR_LCASE */
246     unsigned int i;
247 
248     for(i = 0; i < len; i++) {
249       unsigned char c = s[i];
250       if(isupper(RASQAL_GOOD_CAST(int, c)))
251         c = RASQAL_GOOD_CAST(unsigned char, tolower(RASQAL_GOOD_CAST(int, c)));
252       new_s[i] = c;
253     }
254   }
255   new_s[len] = '\0';
256 
257   if(l1->language) {
258     len = strlen(RASQAL_GOOD_CAST(const char*, l1->language));
259     new_lang = RASQAL_MALLOC(char*, len + 1);
260     if(!new_lang)
261       goto failed;
262 
263     memcpy(new_lang, l1->language, len + 1);
264   }
265 
266   dt_uri = l1->datatype;
267   if(dt_uri)
268     dt_uri = raptor_uri_copy(dt_uri);
269 
270   rasqal_free_literal(l1);
271 
272   /* after this new_s, new_lang and dt_uri become owned by result */
273   return rasqal_new_string_literal(world, new_s, new_lang, dt_uri,
274                                    /* qname */ NULL);
275 
276 
277   failed:
278   if(error_p)
279     *error_p = 1;
280 
281   if(new_s)
282     RASQAL_FREE(char*, new_s);
283   if(new_lang)
284     RASQAL_FREE(char*, new_lang);
285   if(l1)
286     rasqal_free_literal(l1);
287 
288   return NULL;
289 }
290 
291 
292 /*
293  * rasqal_literals_sparql11_compatible:
294  * @l1: first literal
295  * @l2: second  literal
296  *
297  * INTERNAL - Check if two literals are SPARQL 1.1 compatible such as usable for STRSTARTS()
298  *
299  * From STRSTARTS(), STRENDS() and CONTAINS() draft definition:
300  * 1. pairs of simple literals,
301  * 2. pairs of xsd:string typed literals
302  * 3. pairs of plain literals with identical language tags
303  * 4. pairs of an xsd:string typed literal (arg1 or arg2) and a simple literal (arg2 or arg1)
304  * 5. pairs of a plain literal with language tag (arg1) and a simple literal (arg2)
305  * 6. pairs of a plain literal with language tag (arg1) and an xsd:string typed literal (arg2)
306  *
307  * Return value: non-0 if literals are compatible
308  */
309 static int
rasqal_literals_sparql11_compatible(rasqal_literal * l1,rasqal_literal * l2)310 rasqal_literals_sparql11_compatible(rasqal_literal *l1, rasqal_literal *l2)
311 {
312   raptor_uri* dt1;
313   raptor_uri* dt2;
314   const char *lang1;
315   const char *lang2;
316   raptor_uri* xsd_string_uri;
317 
318   xsd_string_uri = rasqal_xsd_datatype_type_to_uri(l1->world,
319                                                    RASQAL_LITERAL_XSD_STRING);
320 
321   /* Languages */
322   lang1 = l1->language;
323   lang2 = l2->language;
324 
325   /* Turn xsd:string datatypes into plain literals for compatibility
326    * purposes
327    */
328   dt1 = l1->datatype;
329   if(dt1 && raptor_uri_equals(dt1, xsd_string_uri))
330     dt1 = NULL;
331 
332   dt2 = l2->datatype;
333   if(dt2 && raptor_uri_equals(dt2, xsd_string_uri))
334     dt2 = NULL;
335 
336   /* If there any datatypes left, the literals are not compatible */
337   if(dt1 || dt2)
338     return 0;
339 
340   /* pairs of simple literals (or pairs of xsd:string or mixtures): #1, #2, #4 */
341   if(!lang1 && !lang2)
342     return 1;
343 
344   /* pairs of plain literals with identical language tags #3 */
345   if(lang1 && lang2)
346     return !strcmp(lang1, lang2);
347 
348   /* pairs of a plain literal with language tag (arg1) and a simple
349    * literal or xsd:string typed literal [with no language tag] (arg2) #5, #6
350    */
351   return (lang1 && !lang2);
352 }
353 
354 
355 /*
356  * rasqal_expression_evaluate_str_prefix_suffix:
357  * @e: The expression to evaluate.
358  * @eval_context: Evaluation context
359  *
360  * INTERNAL - Evaluate RASQAL_EXPR_STRSTARTS(lit, lit) and
361  * RASQAL_EXPR_STRENDS(lit, lit) expressions.
362  *
363  * Return value: A #rasqal_literal integer value or NULL on failure.
364  */
365 rasqal_literal*
rasqal_expression_evaluate_str_prefix_suffix(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)366 rasqal_expression_evaluate_str_prefix_suffix(rasqal_expression *e,
367                                              rasqal_evaluation_context *eval_context,
368                                              int *error_p)
369 {
370   rasqal_world* world = eval_context->world;
371   rasqal_literal *l1 = NULL;
372   rasqal_literal *l2 = NULL;
373   int b;
374   const unsigned char *s1;
375   const unsigned char *s2;
376   size_t len1 = 0;
377   size_t len2 = 0;
378 
379   l1 = rasqal_expression_evaluate2(e->arg1, eval_context, error_p);
380   if((error_p && *error_p) || !l1)
381     goto failed;
382 
383   l2 = rasqal_expression_evaluate2(e->arg2, eval_context, error_p);
384   if((error_p && *error_p) || !l2)
385     goto failed;
386 
387   if(!rasqal_literals_sparql11_compatible(l1, l2))
388     goto failed;
389 
390   s1 = rasqal_literal_as_counted_string(l1, &len1, eval_context->flags, error_p);
391   if(error_p && *error_p)
392     goto failed;
393 
394   s2 = rasqal_literal_as_counted_string(l2, &len2, eval_context->flags, error_p);
395   if(error_p && *error_p)
396     goto failed;
397 
398   if(len1 < len2) {
399     /* s1 is shorter than s2 so s2 can never be a prefix, suffix or
400      * contain s1 */
401     b = 0;
402   } else {
403     if(e->op == RASQAL_EXPR_STRSTARTS) {
404       b = !memcmp(s1, s2, len2);
405     } else if(e->op == RASQAL_EXPR_STRENDS) {
406       b = !memcmp(s1 + len1 - len2, s2, len2);
407     } else { /* RASQAL_EXPR_CONTAINS */
408       /* b = (strnstr(RASQAL_GOOD_CAST(const char*, s1), RASQAL_GOOD_CAST(const char*, s2), len2) != NULL); */
409       b = (strstr(RASQAL_GOOD_CAST(const char*, s1),
410                   RASQAL_GOOD_CAST(const char*, s2)) != NULL);
411     }
412   }
413 
414 
415 
416   rasqal_free_literal(l1);
417   rasqal_free_literal(l2);
418 
419   return rasqal_new_boolean_literal(world, b);
420 
421   failed:
422   if(error_p)
423     *error_p = 1;
424 
425   if(l1)
426     rasqal_free_literal(l1);
427   if(l2)
428     rasqal_free_literal(l2);
429 
430   return NULL;
431 }
432 
433 
434 /*
435  * rasqal_expression_evaluate_encode_for_uri:
436  * @e: The expression to evaluate.
437  * @eval_context: Evaluation context
438  *
439  * INTERNAL - Evaluate RASQAL_EXPR_ENCODE_FOR_URI(string) expression.
440  *
441  * Return value: A #rasqal_literal string value or NULL on failure.
442  */
443 rasqal_literal*
rasqal_expression_evaluate_encode_for_uri(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)444 rasqal_expression_evaluate_encode_for_uri(rasqal_expression *e,
445                                           rasqal_evaluation_context *eval_context,
446                                           int *error_p)
447 {
448   rasqal_world* world = eval_context->world;
449   rasqal_literal* l1;
450   raptor_uri* xsd_string_uri;
451   const unsigned char *s;
452   unsigned char* new_s = NULL;
453   raptor_uri* dt_uri = NULL;
454   size_t len = 0;
455   unsigned int i;
456   unsigned char* p;
457 
458   l1 = rasqal_expression_evaluate2(e->arg1, eval_context, error_p);
459   if((error_p && *error_p) || !l1)
460     goto failed;
461 
462   xsd_string_uri = rasqal_xsd_datatype_type_to_uri(l1->world,
463                                                    RASQAL_LITERAL_XSD_STRING);
464 
465   dt_uri = l1->datatype;
466   if(dt_uri && !raptor_uri_equals(dt_uri, xsd_string_uri))
467     /* datatype and not xsd:string */
468     goto failed;
469 
470   s = rasqal_literal_as_counted_string(l1, &len, eval_context->flags, error_p);
471   if(error_p && *error_p)
472     goto failed;
473 
474   /* pessimistically assume every UTF-8 byte is %XX 3 x len */
475   new_s = RASQAL_MALLOC(unsigned char*, (3 * len) + 1);
476   if(!new_s)
477     goto failed;
478 
479   p = new_s;
480   for(i = 0; i < len; i++) {
481     unsigned char c = s[i];
482 
483     /* All characters are escaped except those identified as
484      * "unreserved" by [RFC 3986], that is the upper- and lower-case
485      * letters A-Z, the digits 0-9, HYPHEN-MINUS ("-"), LOW LINE
486      * ("_"), FULL STOP ".", and TILDE "~".
487      */
488     if((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
489        (c >= '0' && c <= '9') ||
490        c == '-' || c == '_' || c == '.' || c == '~') {
491       *p++ = c;
492     } else {
493       unsigned short hex;
494 
495       *p++ = '%';
496       hex = (c & 0xf0) >> 4;
497       *p++ = RASQAL_GOOD_CAST(unsigned char, (hex < 10) ? ('0' + hex) : ('A' + hex - 10));
498       hex = (c & 0x0f);
499       *p++ = RASQAL_GOOD_CAST(unsigned char, (hex < 10) ? ('0' + hex) : ('A' + hex - 10));
500     }
501   }
502 
503   *p = '\0';
504 
505   rasqal_free_literal(l1);
506 
507   /* after this new_s, new_lang and dt_uri become owned by result */
508   return rasqal_new_string_literal(world, new_s, NULL, NULL,
509                                    /* qname */ NULL);
510 
511 
512   failed:
513   if(error_p)
514     *error_p = 1;
515 
516   if(new_s)
517     RASQAL_FREE(char*, new_s);
518   if(l1)
519     rasqal_free_literal(l1);
520 
521   return NULL;
522 
523 }
524 
525 
526 /*
527  * rasqal_expression_evaluate_concat:
528  * @e: The expression to evaluate.
529  * @eval_context: Evaluation context
530  *
531  * INTERNAL - Evaluate RASQAL_EXPR_CONCAT(expr list) expression.
532  *
533  * "If all input literals are typed literals of type xsd:string,
534  * then the returned literal is also of type xsd:string, if all input
535  * literals are plain literals with identical language tag, then the
536  * returned literal is a plain literal with the same language tag, in
537  * all other cases, the returned literal is a simple literal."
538  *
539  * Return value: A #rasqal_literal string value or NULL on failure.
540  */
541 rasqal_literal*
rasqal_expression_evaluate_concat(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)542 rasqal_expression_evaluate_concat(rasqal_expression *e,
543                                   rasqal_evaluation_context *eval_context,
544                                   int *error_p)
545 {
546   rasqal_world* world = eval_context->world;
547   raptor_stringbuffer* sb = NULL;
548   int i;
549   size_t len;
550   unsigned char* result_str = NULL;
551   char* lang_tag = NULL;
552   int mode = -1; /* -1: undecided  0: xsd:string  1: simple+lang  2: simple */
553   raptor_uri* dt = NULL;
554   raptor_uri* xsd_string_uri;
555   rasqal_literal *result_l;
556 
557   xsd_string_uri = rasqal_xsd_datatype_type_to_uri(world,
558                                                    RASQAL_LITERAL_XSD_STRING);
559 
560   sb = raptor_new_stringbuffer();
561   if(!sb)
562     goto failed;
563 
564   for(i = 0; i < raptor_sequence_size(e->args); i++) {
565     rasqal_expression *arg_expr;
566     rasqal_literal* arg_literal;
567     const unsigned char* s = NULL;
568 
569     arg_expr = (rasqal_expression*)raptor_sequence_get_at(e->args, i);
570     if(!arg_expr)
571       break;
572 
573     arg_literal = rasqal_expression_evaluate2(arg_expr, eval_context, error_p);
574     if(!arg_literal) {
575       /* FIXME - check what to do with a NULL literal */
576 #if 0
577       if(error_p)
578         *error_p = 1;
579       goto failed;
580 #endif
581       continue;
582     }
583 
584     if(arg_literal->type != RASQAL_LITERAL_STRING &&
585        arg_literal->type != RASQAL_LITERAL_XSD_STRING) {
586       /* result is NULL literal; no error */
587       goto null_literal;
588     }
589 
590 
591 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
592     RASQAL_DEBUG1("Concating literal ");
593     rasqal_literal_print(arg_literal, stderr);
594     fprintf(stderr, " with existing mode %d  lang=%s\n", mode, lang_tag);
595 #endif
596 
597     if(arg_literal->datatype) {
598       /* Datatype */
599       if(raptor_uri_equals(arg_literal->datatype, xsd_string_uri)) {
600         if(mode < 0)
601           /* mode -1: expect all xsd:string */
602           mode = 0;
603         else if(mode != 0) {
604           /* mode 1, 2: different datatypes, so result is simple literal */
605           if(lang_tag) {
606             RASQAL_FREE(char*, lang_tag); lang_tag = NULL;
607           }
608           mode = 2;
609         } else {
610           /* mode 0: not xsd:string so result is simple literal */
611           mode = 2;
612         }
613       }
614     } else {
615       /* No datatype; check language */
616       if(arg_literal->language) {
617         if(mode < 0) {
618           /* mode -1: First literal with language: save it and use it */
619           size_t lang_len = strlen(arg_literal->language);
620 
621           lang_tag = RASQAL_MALLOC(char*, lang_len + 1);
622           if(!lang_tag)
623             goto failed;
624           memcpy(lang_tag, arg_literal->language, lang_len + 1);
625           mode = 1;
626         } else if (mode == 1) {
627           /* mode 1: Already got a lang tag so check it */
628 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
629           RASQAL_DEBUG3("concat compare lang %s vs %s\n",
630                         arg_literal->language, lang_tag);
631 #endif
632           if(strcmp(arg_literal->language, lang_tag)) {
633             /* different languages, so result is simple literal */
634             RASQAL_FREE(char*, lang_tag); lang_tag = NULL;
635             mode = 2;
636           }
637         } else if (mode == 0) {
638           /* mode 0: mixture of xsd:string and language literals,
639            * so result is simple literal
640            */
641           mode = 2;
642         } /* otherwise mode 2: No change */
643       } else {
644         if(lang_tag) {
645           /* mode 1: language but this literal has none, so result is
646            * simple literal */
647           RASQAL_FREE(char*, lang_tag); lang_tag = NULL;
648         }
649         mode = 2;
650       }
651     }
652 
653     /* FIXME - check that altering the flags this way to allow
654      * concat of URIs is OK
655      */
656     s = rasqal_literal_as_string_flags(arg_literal,
657                                          (eval_context->flags & ~RASQAL_COMPARE_XQUERY),
658                                          error_p);
659     rasqal_free_literal(arg_literal);
660 
661 
662     if((error_p && *error_p) || !s)
663       goto failed;
664 
665     raptor_stringbuffer_append_string(sb, s, 1);
666   }
667 
668 
669   len = raptor_stringbuffer_length(sb);
670   result_str = RASQAL_MALLOC(unsigned char*, len + 1);
671   if(!result_str)
672     goto failed;
673 
674   if(raptor_stringbuffer_copy_to_string(sb, result_str, len))
675     goto failed;
676 
677   raptor_free_stringbuffer(sb);
678 
679   if(mode == 0)
680     dt = raptor_uri_copy(xsd_string_uri);
681 
682   /* result_str and lang and dt (if set) becomes owned by result */
683   result_l = rasqal_new_string_literal(world, result_str, lang_tag, dt, NULL);
684 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
685   RASQAL_DEBUG1("Concat result literal: ");
686   rasqal_literal_print(result_l, stderr);
687   fprintf(stderr, " with mode %d\n", mode);
688 #endif
689 
690   return result_l;
691 
692   failed:
693   if(error_p)
694     *error_p = 1;
695 
696   null_literal:
697   if(dt)
698     raptor_free_uri(dt);
699   if(lang_tag)
700     RASQAL_FREE(char*, lang_tag);
701   if(result_str)
702     RASQAL_FREE(char*, result_str);
703   if(sb)
704     raptor_free_stringbuffer(sb);
705 
706   return NULL;
707 }
708 
709 
710 /*
711  * rasqal_expression_evaluate_langmatches:
712  * @e: The expression to evaluate.
713  * @eval_context: Evaluation context
714  *
715  * INTERNAL - Evaluate RASQAL_EXPR_LANGMATCHES(lang tag, lang tag range) expression.
716  *
717  * Return value: A #rasqal_literal boolean value or NULL on failure.
718  */
719 rasqal_literal*
rasqal_expression_evaluate_langmatches(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)720 rasqal_expression_evaluate_langmatches(rasqal_expression *e,
721                                        rasqal_evaluation_context *eval_context,
722                                        int *error_p)
723 {
724   rasqal_world* world = eval_context->world;
725   rasqal_literal *l1 = NULL;
726   rasqal_literal *l2 = NULL;
727   const unsigned char *tag;
728   const unsigned char *range;
729   int b;
730 
731   l1 = rasqal_expression_evaluate2(e->arg1, eval_context, error_p);
732   if((error_p && *error_p) || !l1)
733     goto failed;
734 
735   l2 = rasqal_expression_evaluate2(e->arg2, eval_context, error_p);
736   if((error_p && *error_p) || !l2)
737     goto failed;
738 
739   tag = rasqal_literal_as_string_flags(l1, eval_context->flags, error_p);
740   if(error_p && *error_p)
741     goto failed;
742 
743   range = rasqal_literal_as_string_flags(l2, eval_context->flags, error_p);
744   if(error_p && *error_p)
745     goto failed;
746 
747 
748   b = rasqal_language_matches(tag, range);
749 
750   rasqal_free_literal(l1);
751   rasqal_free_literal(l2);
752 
753   return rasqal_new_boolean_literal(world, b);
754 
755   failed:
756   if(error_p)
757     *error_p = 1;
758 
759   if(l1)
760     rasqal_free_literal(l1);
761   if(l2)
762     rasqal_free_literal(l2);
763 
764   return NULL;
765 }
766 
767 
768 /*
769  * rasqal_expression_evaluate_strmatch:
770  * @e: The expression to evaluate.
771  * @eval_context: Evaluation context
772  *
773  * INTERNAL - Evaluate RASQAL_EXPR_STR_MATCH, RASQAL_EXPR_STR_NMATCH and
774  * RASQAL_EXPR_REGEX expressions.
775  *
776  * Return value: A #rasqal_literal value or NULL on failure.
777  */
778 rasqal_literal*
rasqal_expression_evaluate_strmatch(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)779 rasqal_expression_evaluate_strmatch(rasqal_expression *e,
780                                     rasqal_evaluation_context *eval_context,
781                                     int *error_p)
782 {
783   rasqal_world* world = eval_context->world;
784   int b = 0;
785   const unsigned char *l1_str;
786   const char *match_string;
787   const char *pattern;
788   const char *regex_flags;
789   rasqal_literal *l1, *l2, *l3;
790   int rc = 0;
791   size_t match_len;
792 
793   l1 = rasqal_expression_evaluate2(e->arg1, eval_context, error_p);
794   if((error_p && *error_p) || !l1)
795     goto failed;
796 
797   l1_str = rasqal_literal_as_counted_string(l1, &match_len,
798                                             eval_context->flags, error_p);
799   match_string = RASQAL_GOOD_CAST(const char*, l1_str);
800   if((error_p && *error_p) || !match_string) {
801     rasqal_free_literal(l1);
802     goto failed;
803   }
804 
805   l3 = NULL;
806   regex_flags = NULL;
807   if(e->op == RASQAL_EXPR_REGEX) {
808     l2 = rasqal_expression_evaluate2(e->arg2, eval_context, error_p);
809     if((error_p && *error_p) || !l2) {
810       rasqal_free_literal(l1);
811       goto failed;
812     }
813 
814     if(e->arg3) {
815       l3 = rasqal_expression_evaluate2(e->arg3, eval_context, error_p);
816       if((error_p && *error_p) || !l3) {
817         rasqal_free_literal(l1);
818         rasqal_free_literal(l2);
819         goto failed;
820       }
821       regex_flags = RASQAL_GOOD_CAST(const char*, l3->string);
822     }
823 
824   } else {
825     l2 = e->literal;
826     regex_flags = RASQAL_GOOD_CAST(const char*, l2->flags);
827   }
828   pattern = RASQAL_GOOD_CAST(const char*, l2->string);
829 
830   rc = rasqal_regex_match(world, eval_context->locator,
831                           pattern, regex_flags,
832                           match_string, match_len);
833 
834 #ifdef RASQAL_DEBUG
835   if(rc >= 0)
836     RASQAL_DEBUG5("regex match returned %s for '%s' against '%s' (flags=%s)\n", rc ? "true" : "false", match_string, pattern, l2->flags ? RASQAL_GOOD_CAST(char*, l2->flags) : "");
837   else
838     RASQAL_DEBUG4("regex match returned failed for '%s' against '%s' (flags=%s)\n", match_string, pattern, l2->flags ? RASQAL_GOOD_CAST(char*, l2->flags) : "");
839 #endif
840 
841   rasqal_free_literal(l1);
842   if(e->op == RASQAL_EXPR_REGEX) {
843     rasqal_free_literal(l2);
844     if(l3)
845       rasqal_free_literal(l3);
846   }
847 
848   if(rc < 0)
849     goto failed;
850 
851   b = rc;
852   if(e->op == RASQAL_EXPR_STR_NMATCH)
853     b = 1 - b;
854 
855   return rasqal_new_boolean_literal(world, b);
856 
857   failed:
858   if(error_p)
859     *error_p = 1;
860 
861   return NULL;
862 }
863 
864 
865 /*
866  * rasqal_expression_evaluate_strbefore:
867  * @e: The expression to evaluate.
868  * @eval_context: Evaluation context
869  *
870  * INTERNAL - Evaluate RASQAL_EXPR_STRBEFORE(string, needle) expression.
871  *
872  * Return value: A #rasqal_literal string value or NULL on failure.
873  */
874 rasqal_literal*
rasqal_expression_evaluate_strbefore(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)875 rasqal_expression_evaluate_strbefore(rasqal_expression *e,
876                                      rasqal_evaluation_context *eval_context,
877                                      int *error_p)
878 {
879   rasqal_world* world = eval_context->world;
880   rasqal_literal* l1 = NULL;
881   rasqal_literal* l2 = NULL;
882   const unsigned char *haystack;
883   const unsigned char *needle;
884   size_t haystack_len;
885   size_t needle_len;
886   const char *ptr;
887   unsigned char* result;
888   size_t result_len;
889   char* new_lang = NULL;
890 
891   /* haystack string */
892   l1 = rasqal_expression_evaluate2(e->arg1, eval_context, error_p);
893   if((error_p && *error_p) || !l1)
894     goto failed;
895 
896   /* needle string */
897   l2 = rasqal_expression_evaluate2(e->arg2, eval_context, error_p);
898   if((error_p && *error_p) || !l2)
899     goto failed;
900 
901   if(!rasqal_literal_is_string(l1) || !rasqal_literal_is_string(l2)) {
902     /* not strings */
903 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
904     RASQAL_DEBUG1("Cannot strbefore haystack ");
905     rasqal_literal_print(l1, stderr);
906     fputs( " to needle ", stderr);
907     rasqal_literal_print(l2, stderr);
908     fputs(" - both not string", stderr);
909 #endif
910     goto failed;
911   }
912 
913   if(l2->language && rasqal_literal_string_languages_compare(l1, l2)) {
914 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
915     RASQAL_DEBUG1("Cannot strbefore haystack ");
916     rasqal_literal_print(l1, stderr);
917     fputs( " to language needle ", stderr);
918     rasqal_literal_print(l2, stderr);
919     fputs(" - languages mismatch", stderr);
920 #endif
921     goto failed;
922   }
923 
924   haystack = rasqal_literal_as_counted_string(l1, &haystack_len,
925                                               eval_context->flags, error_p);
926   if((error_p && *error_p) || !haystack)
927     goto failed;
928 
929   needle = rasqal_literal_as_counted_string(l2, &needle_len,
930                                             eval_context->flags, error_p);
931   if((error_p && *error_p) || !needle)
932     goto failed;
933 
934   ptr = strstr(RASQAL_GOOD_CAST(const char*, haystack),
935                RASQAL_GOOD_CAST(const char*, needle));
936   if(ptr) {
937     result_len = RASQAL_GOOD_CAST(size_t, ptr - RASQAL_GOOD_CAST(const char*, haystack));
938 
939     if(l1->language) {
940       size_t len = strlen(RASQAL_GOOD_CAST(const char*, l1->language));
941       new_lang = RASQAL_MALLOC(char*, len + 1);
942       if(!new_lang)
943         goto failed;
944 
945       memcpy(new_lang, l1->language, len + 1);
946     }
947   } else {
948     result_len = 0;
949     haystack = RASQAL_GOOD_CAST(const unsigned char *, "");
950   }
951 
952   rasqal_free_literal(l1); l1 = NULL;
953   rasqal_free_literal(l2); l2 = NULL;
954 
955   result = RASQAL_MALLOC(unsigned char*, result_len + 1);
956   if(!result)
957     goto failed;
958 
959   if(result_len)
960     memcpy(result, haystack, result_len);
961   result[result_len] = '\0';
962 
963   return rasqal_new_string_literal(world, result,
964                                    new_lang,
965                                    /* datatype */ NULL,
966                                    /* qname */ NULL);
967 
968   failed:
969   if(l1)
970     rasqal_free_literal(l1);
971 
972   if(l2)
973     rasqal_free_literal(l2);
974 
975   if(error_p)
976     *error_p = 1;
977 
978   return NULL;
979 }
980 
981 
982 /*
983  * rasqal_expression_evaluate_strafter:
984  * @e: The expression to evaluate.
985  * @eval_context: Evaluation context
986  *
987  * INTERNAL - Evaluate RASQAL_EXPR_STRAFTER(string, needle) expression.
988  *
989  * Return value: A #rasqal_literal string value or NULL on failure.
990  */
991 rasqal_literal*
rasqal_expression_evaluate_strafter(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)992 rasqal_expression_evaluate_strafter(rasqal_expression *e,
993                                     rasqal_evaluation_context *eval_context,
994                                     int *error_p)
995 {
996   rasqal_world* world = eval_context->world;
997   rasqal_literal* l1 = NULL;
998   rasqal_literal* l2 = NULL;
999   const unsigned char *haystack;
1000   const unsigned char *needle;
1001   size_t haystack_len;
1002   size_t needle_len;
1003   const char *ptr;
1004   unsigned char* result;
1005   size_t result_len;
1006   char* new_lang = NULL;
1007 
1008   /* haystack string */
1009   l1 = rasqal_expression_evaluate2(e->arg1, eval_context, error_p);
1010   if((error_p && *error_p) || !l1)
1011     goto failed;
1012 
1013   /* needle string */
1014   l2 = rasqal_expression_evaluate2(e->arg2, eval_context, error_p);
1015   if((error_p && *error_p) || !l2)
1016     goto failed;
1017 
1018   if(!rasqal_literal_is_string(l1) || !rasqal_literal_is_string(l2)) {
1019     /* not strings */
1020 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
1021     RASQAL_DEBUG1("Cannot strafter haystack ");
1022     rasqal_literal_print(l1, stderr);
1023     fputs( " to needle ", stderr);
1024     rasqal_literal_print(l2, stderr);
1025     fputs(" - both not string", stderr);
1026 #endif
1027     goto failed;
1028   }
1029 
1030   if(l2->language && rasqal_literal_string_languages_compare(l1, l2)) {
1031 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
1032     RASQAL_DEBUG1("Cannot strafter haystack ");
1033     rasqal_literal_print(l1, stderr);
1034     fputs( " to language needle ", stderr);
1035     rasqal_literal_print(l2, stderr);
1036     fputs(" - languages mismatch", stderr);
1037 #endif
1038     goto failed;
1039   }
1040 
1041 
1042   haystack = rasqal_literal_as_counted_string(l1, &haystack_len,
1043                                               eval_context->flags, error_p);
1044   if((error_p && *error_p) || !haystack)
1045     goto failed;
1046 
1047   needle = rasqal_literal_as_counted_string(l2, &needle_len,
1048                                             eval_context->flags, error_p);
1049   if((error_p && *error_p) || !needle)
1050     goto failed;
1051 
1052   ptr = strstr(RASQAL_GOOD_CAST(const char*, haystack),
1053                RASQAL_GOOD_CAST(const char*, needle));
1054   if(ptr) {
1055     ptr += needle_len;
1056     result_len = haystack_len - RASQAL_GOOD_CAST(size_t, (ptr - RASQAL_GOOD_CAST(const char*, haystack)));
1057 
1058     if(l1->language) {
1059       size_t len = strlen(RASQAL_GOOD_CAST(const char*, l1->language));
1060       new_lang = RASQAL_MALLOC(char*, len + 1);
1061       if(!new_lang)
1062         goto failed;
1063 
1064       memcpy(new_lang, l1->language, len + 1);
1065     }
1066   } else {
1067     ptr = (const char *)"";
1068     result_len = 0;
1069   }
1070 
1071   rasqal_free_literal(l1); l1 = NULL;
1072   rasqal_free_literal(l2); l2 = NULL;
1073 
1074   result = RASQAL_MALLOC(unsigned char*, result_len + 1);
1075   if(!result)
1076     goto failed;
1077 
1078   if(result_len)
1079     memcpy(result, ptr, result_len);
1080   result[result_len] = '\0';
1081 
1082   return rasqal_new_string_literal(world, result,
1083                                    new_lang,
1084                                    /* datatype */ NULL,
1085                                    /* qname */ NULL);
1086 
1087   failed:
1088   if(l1)
1089     rasqal_free_literal(l1);
1090 
1091   if(l2)
1092     rasqal_free_literal(l2);
1093 
1094   if(error_p)
1095     *error_p = 1;
1096 
1097   return NULL;
1098 }
1099 
1100 
1101 /*
1102  * rasqal_expression_evaluate_replace:
1103  * @e: The expression to evaluate.
1104  * @eval_context: Evaluation context
1105  *
1106  * INTERNAL - Evaluate RASQAL_EXPR_REPLACE(input, pattern, replacement[, flags]) expression.
1107  *
1108  * Return value: A #rasqal_literal string value or NULL on failure.
1109  */
1110 rasqal_literal*
rasqal_expression_evaluate_replace(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)1111 rasqal_expression_evaluate_replace(rasqal_expression *e,
1112                                    rasqal_evaluation_context *eval_context,
1113                                    int *error_p)
1114 {
1115   rasqal_world* world = eval_context->world;
1116   const unsigned char *tmp_str;
1117   const char *match;
1118   const char *pattern;
1119   const char *replace;
1120   const char *regex_flags = NULL;
1121   size_t match_len;
1122   size_t replace_len;
1123   rasqal_literal* l1 = NULL;
1124   rasqal_literal* l2 = NULL;
1125   rasqal_literal* l3 = NULL;
1126   rasqal_literal* l4 = NULL;
1127   char* result_s = NULL;
1128   size_t result_len = 0;
1129   rasqal_literal* result = NULL;
1130 
1131   l1 = rasqal_expression_evaluate2(e->arg1, eval_context, error_p);
1132   if((error_p && *error_p) || !l1)
1133     goto failed;
1134   tmp_str = rasqal_literal_as_counted_string(l1, &match_len,
1135                                              eval_context->flags,
1136                                              error_p);
1137   match = RASQAL_GOOD_CAST(const char*, tmp_str);
1138   if((error_p && *error_p) || !match)
1139     goto failed;
1140 
1141   l2 = rasqal_expression_evaluate2(e->arg2, eval_context, error_p);
1142   if((error_p && *error_p) || !l2)
1143     goto failed;
1144   pattern = RASQAL_GOOD_CAST(const char*, l2->string);
1145 
1146   l3 = rasqal_expression_evaluate2(e->arg3, eval_context, error_p);
1147   if((error_p && *error_p) || !l3)
1148     goto failed;
1149 
1150   if(l1->type != RASQAL_LITERAL_STRING && l1->type != RASQAL_LITERAL_XSD_STRING)
1151     /* Not a string so cannot do string operations */
1152     goto failed;
1153 
1154   tmp_str = rasqal_literal_as_counted_string(l3, &replace_len,
1155                                              eval_context->flags,
1156                                              error_p);
1157   replace = RASQAL_GOOD_CAST(const char*, tmp_str);
1158   if((error_p && *error_p) || !replace)
1159     goto failed;
1160 
1161   if(e->arg4) {
1162     l4 = rasqal_expression_evaluate2(e->arg4, eval_context, error_p);
1163     if((error_p && *error_p) || !l4)
1164       goto failed;
1165 
1166     regex_flags = RASQAL_GOOD_CAST(const char*, l4->string);
1167   }
1168 
1169   result_s = rasqal_regex_replace(world, eval_context->locator,
1170                                   pattern,
1171                                   regex_flags,
1172                                   match, match_len,
1173                                   replace, replace_len,
1174                                   &result_len);
1175 
1176   RASQAL_DEBUG6("regex replace returned %s for '%s' from '%s' to '%s' (flags=%s)\n", result_s ? result_s : "NULL", match, pattern, replace, regex_flags ? RASQAL_GOOD_CAST(char*, regex_flags) : "");
1177 
1178   if(!result_s)
1179     goto failed;
1180 
1181   result = rasqal_new_string_literal(world,
1182                                      RASQAL_GOOD_CAST(const unsigned char*, result_s),
1183                                      l1->language, l1->datatype, NULL);
1184   l1->language = NULL;
1185   l1->datatype = NULL;
1186 
1187   rasqal_free_literal(l1);
1188   rasqal_free_literal(l2);
1189   rasqal_free_literal(l3);
1190   if(l4)
1191     rasqal_free_literal(l4);
1192 
1193   return result;
1194 
1195 
1196   failed:
1197   if(l1)
1198     rasqal_free_literal(l1);
1199 
1200   if(l2)
1201     rasqal_free_literal(l2);
1202 
1203   if(l3)
1204     rasqal_free_literal(l3);
1205 
1206   if(l4)
1207     rasqal_free_literal(l4);
1208 
1209   if(error_p)
1210     *error_p = 1;
1211 
1212   return NULL;
1213 }
1214