1 /* -*- Mode: c; c-basic-offset: 2 -*-
2 *
3 * rasqal_expr_strings.c - Rasqal string expression functions
4 *
5 * Copyright (C) 2010, David Beckett http://www.dajobe.org/
6 *
7 * This package is Free Software and part of Redland http://librdf.org/
8 *
9 * It is licensed under the following three licenses as alternatives:
10 * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
11 * 2. GNU General Public License (GPL) V2 or any newer version
12 * 3. Apache License, V2.0 or any newer version
13 *
14 * You may not use this file except in compliance with at least one of
15 * the above three licenses.
16 *
17 * See LICENSE.html or LICENSE.txt at the top of this package for the
18 * complete terms and further detail along with the license texts for
19 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
20 *
21 *
22 */
23
24 #ifdef HAVE_CONFIG_H
25 #include <rasqal_config.h>
26 #endif
27
28 #ifdef WIN32
29 #include <win32_rasqal_config.h>
30 #endif
31
32 #include <stdio.h>
33 #include <string.h>
34 #include <ctype.h>
35 #ifdef HAVE_STDLIB_H
36 #include <stdlib.h>
37 #endif
38 #include <stdarg.h>
39
40 #include "rasqal.h"
41 #include "rasqal_internal.h"
42
43
44 #define DEBUG_FH stderr
45
46
47 /*
48 * rasqal_expression_evaluate_strlen:
49 * @e: The expression to evaluate.
50 * @eval_context: Evaluation context
51 *
52 * INTERNAL - Evaluate RASQAL_EXPR_STRLEN(expr) expression.
53 *
54 * Return value: A #rasqal_literal integer value or NULL on failure.
55 */
56 rasqal_literal*
rasqal_expression_evaluate_strlen(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)57 rasqal_expression_evaluate_strlen(rasqal_expression *e,
58 rasqal_evaluation_context *eval_context,
59 int *error_p)
60 {
61 rasqal_world* world = eval_context->world;
62 rasqal_literal* l1;
63 rasqal_literal* result = NULL;
64 const unsigned char *s;
65 int len = 0;
66
67 l1 = rasqal_expression_evaluate2(e->arg1, eval_context, error_p);
68 if((error_p && *error_p) || !l1)
69 goto failed;
70
71 s = rasqal_literal_as_string_flags(l1, eval_context->flags, error_p);
72 if(error_p && *error_p)
73 goto failed;
74
75 if(!s)
76 len = 0;
77 else
78 len = raptor_unicode_utf8_strlen(s, strlen(RASQAL_GOOD_CAST(const char*, s)));
79
80
81 result = rasqal_new_numeric_literal_from_long(world, RASQAL_LITERAL_INTEGER,
82 len);
83 rasqal_free_literal(l1);
84 return result;
85
86 failed:
87 if(error_p)
88 *error_p = 1;
89
90 if(l1)
91 rasqal_free_literal(l1);
92
93 return NULL;
94 }
95
96
97 /*
98 * rasqal_expression_evaluate_substr:
99 * @e: The expression to evaluate.
100 * @eval_context: Evaluation context
101 *
102 * INTERNAL - Evaluate RASQAL_EXPR_SUBSTR(expr) expression.
103 *
104 * Return value: A #rasqal_literal integer value or NULL on failure.
105 */
106 rasqal_literal*
rasqal_expression_evaluate_substr(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)107 rasqal_expression_evaluate_substr(rasqal_expression *e,
108 rasqal_evaluation_context *eval_context,
109 int *error_p)
110 {
111 rasqal_world* world = eval_context->world;
112 rasqal_literal* l1 = NULL;
113 rasqal_literal* l2 = NULL;
114 rasqal_literal* l3 = NULL;
115 const unsigned char *s;
116 unsigned char* new_s = NULL;
117 char* new_lang = NULL;
118 raptor_uri* dt_uri = NULL;
119 size_t len = 0;
120 int startingLoc = 0;
121 int length = -1;
122
123 /* haystack string */
124 l1 = rasqal_expression_evaluate2(e->arg1, eval_context, error_p);
125 if((error_p && *error_p) || !l1)
126 goto failed;
127
128 s = rasqal_literal_as_counted_string(l1, &len, eval_context->flags, error_p);
129 if(error_p && *error_p)
130 goto failed;
131
132 /* integer startingLoc */
133 l2 = rasqal_expression_evaluate2(e->arg2, eval_context, error_p);
134 if((error_p && *error_p) || !l2)
135 goto failed;
136
137 startingLoc = rasqal_literal_as_integer(l2, error_p);
138 if(error_p && *error_p)
139 goto failed;
140
141 /* optional integer length */
142 if(e->arg3) {
143 l3 = rasqal_expression_evaluate2(e->arg3, eval_context, error_p);
144 if(!l3)
145 goto failed;
146
147 length = rasqal_literal_as_integer(l3, error_p);
148 if(error_p && *error_p)
149 goto failed;
150
151 }
152
153 new_s = RASQAL_MALLOC(unsigned char*, len + 1);
154 if(!new_s)
155 goto failed;
156
157 /* adjust starting index to xsd fn:substring initial offset 1 */
158 if(!raptor_unicode_utf8_substr(new_s, /* dest_length_p */ NULL,
159 s, len, startingLoc - 1, length))
160 goto failed;
161
162 if(l1->language) {
163 len = strlen(RASQAL_GOOD_CAST(const char*, l1->language));
164 new_lang = RASQAL_MALLOC(char*, len + 1);
165 if(!new_lang)
166 goto failed;
167
168 memcpy(new_lang, l1->language, len + 1);
169 }
170
171 dt_uri = l1->datatype;
172 if(dt_uri)
173 dt_uri = raptor_uri_copy(dt_uri);
174
175 rasqal_free_literal(l1);
176 rasqal_free_literal(l2);
177 if(l3)
178 rasqal_free_literal(l3);
179
180 /* after this new_s, new_lang and dt_uri become owned by result */
181 return rasqal_new_string_literal(world, new_s, new_lang, dt_uri,
182 /* qname */ NULL);
183
184
185
186 failed:
187 if(error_p)
188 *error_p = 1;
189
190 if(l1)
191 rasqal_free_literal(l1);
192 if(l2)
193 rasqal_free_literal(l2);
194 if(l3)
195 rasqal_free_literal(l3);
196
197 return NULL;
198 }
199
200
201 /*
202 * rasqal_expression_evaluate_set_case:
203 * @e: The expression to evaluate.
204 * @eval_context: Evaluation context
205 *
206 * INTERNAL - Evaluate RASQAL_EXPR_UCASE(expr) or
207 * RASQAL_EXPR_LCASE(expr) expressions.
208 *
209 * Return value: A #rasqal_literal string value or NULL on failure.
210 */
211 rasqal_literal*
rasqal_expression_evaluate_set_case(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)212 rasqal_expression_evaluate_set_case(rasqal_expression *e,
213 rasqal_evaluation_context *eval_context,
214 int *error_p)
215 {
216 rasqal_world* world = eval_context->world;
217 rasqal_literal* l1;
218 const unsigned char *s;
219 unsigned char* new_s = NULL;
220 char* new_lang = NULL;
221 raptor_uri* dt_uri = NULL;
222 size_t len = 0;
223
224 l1 = rasqal_expression_evaluate2(e->arg1, eval_context, error_p);
225 if((error_p && *error_p) || !l1)
226 goto failed;
227
228 s = rasqal_literal_as_counted_string(l1, &len, eval_context->flags, error_p);
229 if(error_p && *error_p)
230 goto failed;
231
232 new_s =RASQAL_MALLOC(unsigned char*, len + 1);
233 if(!new_s)
234 goto failed;
235
236 if(e->op == RASQAL_EXPR_UCASE) {
237 unsigned int i;
238
239 for(i = 0; i < len; i++) {
240 unsigned char c = s[i];
241 if(islower(RASQAL_GOOD_CAST(int, c)))
242 c = RASQAL_GOOD_CAST(unsigned char, toupper(RASQAL_GOOD_CAST(int, c)));
243 new_s[i] = c;
244 }
245 } else { /* RASQAL_EXPR_LCASE */
246 unsigned int i;
247
248 for(i = 0; i < len; i++) {
249 unsigned char c = s[i];
250 if(isupper(RASQAL_GOOD_CAST(int, c)))
251 c = RASQAL_GOOD_CAST(unsigned char, tolower(RASQAL_GOOD_CAST(int, c)));
252 new_s[i] = c;
253 }
254 }
255 new_s[len] = '\0';
256
257 if(l1->language) {
258 len = strlen(RASQAL_GOOD_CAST(const char*, l1->language));
259 new_lang = RASQAL_MALLOC(char*, len + 1);
260 if(!new_lang)
261 goto failed;
262
263 memcpy(new_lang, l1->language, len + 1);
264 }
265
266 dt_uri = l1->datatype;
267 if(dt_uri)
268 dt_uri = raptor_uri_copy(dt_uri);
269
270 rasqal_free_literal(l1);
271
272 /* after this new_s, new_lang and dt_uri become owned by result */
273 return rasqal_new_string_literal(world, new_s, new_lang, dt_uri,
274 /* qname */ NULL);
275
276
277 failed:
278 if(error_p)
279 *error_p = 1;
280
281 if(new_s)
282 RASQAL_FREE(char*, new_s);
283 if(new_lang)
284 RASQAL_FREE(char*, new_lang);
285 if(l1)
286 rasqal_free_literal(l1);
287
288 return NULL;
289 }
290
291
292 /*
293 * rasqal_literals_sparql11_compatible:
294 * @l1: first literal
295 * @l2: second literal
296 *
297 * INTERNAL - Check if two literals are SPARQL 1.1 compatible such as usable for STRSTARTS()
298 *
299 * From STRSTARTS(), STRENDS() and CONTAINS() draft definition:
300 * 1. pairs of simple literals,
301 * 2. pairs of xsd:string typed literals
302 * 3. pairs of plain literals with identical language tags
303 * 4. pairs of an xsd:string typed literal (arg1 or arg2) and a simple literal (arg2 or arg1)
304 * 5. pairs of a plain literal with language tag (arg1) and a simple literal (arg2)
305 * 6. pairs of a plain literal with language tag (arg1) and an xsd:string typed literal (arg2)
306 *
307 * Return value: non-0 if literals are compatible
308 */
309 static int
rasqal_literals_sparql11_compatible(rasqal_literal * l1,rasqal_literal * l2)310 rasqal_literals_sparql11_compatible(rasqal_literal *l1, rasqal_literal *l2)
311 {
312 raptor_uri* dt1;
313 raptor_uri* dt2;
314 const char *lang1;
315 const char *lang2;
316 raptor_uri* xsd_string_uri;
317
318 xsd_string_uri = rasqal_xsd_datatype_type_to_uri(l1->world,
319 RASQAL_LITERAL_XSD_STRING);
320
321 /* Languages */
322 lang1 = l1->language;
323 lang2 = l2->language;
324
325 /* Turn xsd:string datatypes into plain literals for compatibility
326 * purposes
327 */
328 dt1 = l1->datatype;
329 if(dt1 && raptor_uri_equals(dt1, xsd_string_uri))
330 dt1 = NULL;
331
332 dt2 = l2->datatype;
333 if(dt2 && raptor_uri_equals(dt2, xsd_string_uri))
334 dt2 = NULL;
335
336 /* If there any datatypes left, the literals are not compatible */
337 if(dt1 || dt2)
338 return 0;
339
340 /* pairs of simple literals (or pairs of xsd:string or mixtures): #1, #2, #4 */
341 if(!lang1 && !lang2)
342 return 1;
343
344 /* pairs of plain literals with identical language tags #3 */
345 if(lang1 && lang2)
346 return !strcmp(lang1, lang2);
347
348 /* pairs of a plain literal with language tag (arg1) and a simple
349 * literal or xsd:string typed literal [with no language tag] (arg2) #5, #6
350 */
351 return (lang1 && !lang2);
352 }
353
354
355 /*
356 * rasqal_expression_evaluate_str_prefix_suffix:
357 * @e: The expression to evaluate.
358 * @eval_context: Evaluation context
359 *
360 * INTERNAL - Evaluate RASQAL_EXPR_STRSTARTS(lit, lit) and
361 * RASQAL_EXPR_STRENDS(lit, lit) expressions.
362 *
363 * Return value: A #rasqal_literal integer value or NULL on failure.
364 */
365 rasqal_literal*
rasqal_expression_evaluate_str_prefix_suffix(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)366 rasqal_expression_evaluate_str_prefix_suffix(rasqal_expression *e,
367 rasqal_evaluation_context *eval_context,
368 int *error_p)
369 {
370 rasqal_world* world = eval_context->world;
371 rasqal_literal *l1 = NULL;
372 rasqal_literal *l2 = NULL;
373 int b;
374 const unsigned char *s1;
375 const unsigned char *s2;
376 size_t len1 = 0;
377 size_t len2 = 0;
378
379 l1 = rasqal_expression_evaluate2(e->arg1, eval_context, error_p);
380 if((error_p && *error_p) || !l1)
381 goto failed;
382
383 l2 = rasqal_expression_evaluate2(e->arg2, eval_context, error_p);
384 if((error_p && *error_p) || !l2)
385 goto failed;
386
387 if(!rasqal_literals_sparql11_compatible(l1, l2))
388 goto failed;
389
390 s1 = rasqal_literal_as_counted_string(l1, &len1, eval_context->flags, error_p);
391 if(error_p && *error_p)
392 goto failed;
393
394 s2 = rasqal_literal_as_counted_string(l2, &len2, eval_context->flags, error_p);
395 if(error_p && *error_p)
396 goto failed;
397
398 if(len1 < len2) {
399 /* s1 is shorter than s2 so s2 can never be a prefix, suffix or
400 * contain s1 */
401 b = 0;
402 } else {
403 if(e->op == RASQAL_EXPR_STRSTARTS) {
404 b = !memcmp(s1, s2, len2);
405 } else if(e->op == RASQAL_EXPR_STRENDS) {
406 b = !memcmp(s1 + len1 - len2, s2, len2);
407 } else { /* RASQAL_EXPR_CONTAINS */
408 /* b = (strnstr(RASQAL_GOOD_CAST(const char*, s1), RASQAL_GOOD_CAST(const char*, s2), len2) != NULL); */
409 b = (strstr(RASQAL_GOOD_CAST(const char*, s1),
410 RASQAL_GOOD_CAST(const char*, s2)) != NULL);
411 }
412 }
413
414
415
416 rasqal_free_literal(l1);
417 rasqal_free_literal(l2);
418
419 return rasqal_new_boolean_literal(world, b);
420
421 failed:
422 if(error_p)
423 *error_p = 1;
424
425 if(l1)
426 rasqal_free_literal(l1);
427 if(l2)
428 rasqal_free_literal(l2);
429
430 return NULL;
431 }
432
433
434 /*
435 * rasqal_expression_evaluate_encode_for_uri:
436 * @e: The expression to evaluate.
437 * @eval_context: Evaluation context
438 *
439 * INTERNAL - Evaluate RASQAL_EXPR_ENCODE_FOR_URI(string) expression.
440 *
441 * Return value: A #rasqal_literal string value or NULL on failure.
442 */
443 rasqal_literal*
rasqal_expression_evaluate_encode_for_uri(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)444 rasqal_expression_evaluate_encode_for_uri(rasqal_expression *e,
445 rasqal_evaluation_context *eval_context,
446 int *error_p)
447 {
448 rasqal_world* world = eval_context->world;
449 rasqal_literal* l1;
450 raptor_uri* xsd_string_uri;
451 const unsigned char *s;
452 unsigned char* new_s = NULL;
453 raptor_uri* dt_uri = NULL;
454 size_t len = 0;
455 unsigned int i;
456 unsigned char* p;
457
458 l1 = rasqal_expression_evaluate2(e->arg1, eval_context, error_p);
459 if((error_p && *error_p) || !l1)
460 goto failed;
461
462 xsd_string_uri = rasqal_xsd_datatype_type_to_uri(l1->world,
463 RASQAL_LITERAL_XSD_STRING);
464
465 dt_uri = l1->datatype;
466 if(dt_uri && !raptor_uri_equals(dt_uri, xsd_string_uri))
467 /* datatype and not xsd:string */
468 goto failed;
469
470 s = rasqal_literal_as_counted_string(l1, &len, eval_context->flags, error_p);
471 if(error_p && *error_p)
472 goto failed;
473
474 /* pessimistically assume every UTF-8 byte is %XX 3 x len */
475 new_s = RASQAL_MALLOC(unsigned char*, (3 * len) + 1);
476 if(!new_s)
477 goto failed;
478
479 p = new_s;
480 for(i = 0; i < len; i++) {
481 unsigned char c = s[i];
482
483 /* All characters are escaped except those identified as
484 * "unreserved" by [RFC 3986], that is the upper- and lower-case
485 * letters A-Z, the digits 0-9, HYPHEN-MINUS ("-"), LOW LINE
486 * ("_"), FULL STOP ".", and TILDE "~".
487 */
488 if((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
489 (c >= '0' && c <= '9') ||
490 c == '-' || c == '_' || c == '.' || c == '~') {
491 *p++ = c;
492 } else {
493 unsigned short hex;
494
495 *p++ = '%';
496 hex = (c & 0xf0) >> 4;
497 *p++ = RASQAL_GOOD_CAST(unsigned char, (hex < 10) ? ('0' + hex) : ('A' + hex - 10));
498 hex = (c & 0x0f);
499 *p++ = RASQAL_GOOD_CAST(unsigned char, (hex < 10) ? ('0' + hex) : ('A' + hex - 10));
500 }
501 }
502
503 *p = '\0';
504
505 rasqal_free_literal(l1);
506
507 /* after this new_s, new_lang and dt_uri become owned by result */
508 return rasqal_new_string_literal(world, new_s, NULL, NULL,
509 /* qname */ NULL);
510
511
512 failed:
513 if(error_p)
514 *error_p = 1;
515
516 if(new_s)
517 RASQAL_FREE(char*, new_s);
518 if(l1)
519 rasqal_free_literal(l1);
520
521 return NULL;
522
523 }
524
525
526 /*
527 * rasqal_expression_evaluate_concat:
528 * @e: The expression to evaluate.
529 * @eval_context: Evaluation context
530 *
531 * INTERNAL - Evaluate RASQAL_EXPR_CONCAT(expr list) expression.
532 *
533 * "If all input literals are typed literals of type xsd:string,
534 * then the returned literal is also of type xsd:string, if all input
535 * literals are plain literals with identical language tag, then the
536 * returned literal is a plain literal with the same language tag, in
537 * all other cases, the returned literal is a simple literal."
538 *
539 * Return value: A #rasqal_literal string value or NULL on failure.
540 */
541 rasqal_literal*
rasqal_expression_evaluate_concat(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)542 rasqal_expression_evaluate_concat(rasqal_expression *e,
543 rasqal_evaluation_context *eval_context,
544 int *error_p)
545 {
546 rasqal_world* world = eval_context->world;
547 raptor_stringbuffer* sb = NULL;
548 int i;
549 size_t len;
550 unsigned char* result_str = NULL;
551 char* lang_tag = NULL;
552 int mode = -1; /* -1: undecided 0: xsd:string 1: simple+lang 2: simple */
553 raptor_uri* dt = NULL;
554 raptor_uri* xsd_string_uri;
555 rasqal_literal *result_l;
556
557 xsd_string_uri = rasqal_xsd_datatype_type_to_uri(world,
558 RASQAL_LITERAL_XSD_STRING);
559
560 sb = raptor_new_stringbuffer();
561 if(!sb)
562 goto failed;
563
564 for(i = 0; i < raptor_sequence_size(e->args); i++) {
565 rasqal_expression *arg_expr;
566 rasqal_literal* arg_literal;
567 const unsigned char* s = NULL;
568
569 arg_expr = (rasqal_expression*)raptor_sequence_get_at(e->args, i);
570 if(!arg_expr)
571 break;
572
573 arg_literal = rasqal_expression_evaluate2(arg_expr, eval_context, error_p);
574 if(!arg_literal) {
575 /* FIXME - check what to do with a NULL literal */
576 #if 0
577 if(error_p)
578 *error_p = 1;
579 goto failed;
580 #endif
581 continue;
582 }
583
584 if(arg_literal->type != RASQAL_LITERAL_STRING &&
585 arg_literal->type != RASQAL_LITERAL_XSD_STRING) {
586 /* result is NULL literal; no error */
587 goto null_literal;
588 }
589
590
591 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
592 RASQAL_DEBUG1("Concating literal ");
593 rasqal_literal_print(arg_literal, stderr);
594 fprintf(stderr, " with existing mode %d lang=%s\n", mode, lang_tag);
595 #endif
596
597 if(arg_literal->datatype) {
598 /* Datatype */
599 if(raptor_uri_equals(arg_literal->datatype, xsd_string_uri)) {
600 if(mode < 0)
601 /* mode -1: expect all xsd:string */
602 mode = 0;
603 else if(mode != 0) {
604 /* mode 1, 2: different datatypes, so result is simple literal */
605 if(lang_tag) {
606 RASQAL_FREE(char*, lang_tag); lang_tag = NULL;
607 }
608 mode = 2;
609 } else {
610 /* mode 0: not xsd:string so result is simple literal */
611 mode = 2;
612 }
613 }
614 } else {
615 /* No datatype; check language */
616 if(arg_literal->language) {
617 if(mode < 0) {
618 /* mode -1: First literal with language: save it and use it */
619 size_t lang_len = strlen(arg_literal->language);
620
621 lang_tag = RASQAL_MALLOC(char*, lang_len + 1);
622 if(!lang_tag)
623 goto failed;
624 memcpy(lang_tag, arg_literal->language, lang_len + 1);
625 mode = 1;
626 } else if (mode == 1) {
627 /* mode 1: Already got a lang tag so check it */
628 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
629 RASQAL_DEBUG3("concat compare lang %s vs %s\n",
630 arg_literal->language, lang_tag);
631 #endif
632 if(strcmp(arg_literal->language, lang_tag)) {
633 /* different languages, so result is simple literal */
634 RASQAL_FREE(char*, lang_tag); lang_tag = NULL;
635 mode = 2;
636 }
637 } else if (mode == 0) {
638 /* mode 0: mixture of xsd:string and language literals,
639 * so result is simple literal
640 */
641 mode = 2;
642 } /* otherwise mode 2: No change */
643 } else {
644 if(lang_tag) {
645 /* mode 1: language but this literal has none, so result is
646 * simple literal */
647 RASQAL_FREE(char*, lang_tag); lang_tag = NULL;
648 }
649 mode = 2;
650 }
651 }
652
653 /* FIXME - check that altering the flags this way to allow
654 * concat of URIs is OK
655 */
656 s = rasqal_literal_as_string_flags(arg_literal,
657 (eval_context->flags & ~RASQAL_COMPARE_XQUERY),
658 error_p);
659 rasqal_free_literal(arg_literal);
660
661
662 if((error_p && *error_p) || !s)
663 goto failed;
664
665 raptor_stringbuffer_append_string(sb, s, 1);
666 }
667
668
669 len = raptor_stringbuffer_length(sb);
670 result_str = RASQAL_MALLOC(unsigned char*, len + 1);
671 if(!result_str)
672 goto failed;
673
674 if(raptor_stringbuffer_copy_to_string(sb, result_str, len))
675 goto failed;
676
677 raptor_free_stringbuffer(sb);
678
679 if(mode == 0)
680 dt = raptor_uri_copy(xsd_string_uri);
681
682 /* result_str and lang and dt (if set) becomes owned by result */
683 result_l = rasqal_new_string_literal(world, result_str, lang_tag, dt, NULL);
684 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
685 RASQAL_DEBUG1("Concat result literal: ");
686 rasqal_literal_print(result_l, stderr);
687 fprintf(stderr, " with mode %d\n", mode);
688 #endif
689
690 return result_l;
691
692 failed:
693 if(error_p)
694 *error_p = 1;
695
696 null_literal:
697 if(dt)
698 raptor_free_uri(dt);
699 if(lang_tag)
700 RASQAL_FREE(char*, lang_tag);
701 if(result_str)
702 RASQAL_FREE(char*, result_str);
703 if(sb)
704 raptor_free_stringbuffer(sb);
705
706 return NULL;
707 }
708
709
710 /*
711 * rasqal_expression_evaluate_langmatches:
712 * @e: The expression to evaluate.
713 * @eval_context: Evaluation context
714 *
715 * INTERNAL - Evaluate RASQAL_EXPR_LANGMATCHES(lang tag, lang tag range) expression.
716 *
717 * Return value: A #rasqal_literal boolean value or NULL on failure.
718 */
719 rasqal_literal*
rasqal_expression_evaluate_langmatches(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)720 rasqal_expression_evaluate_langmatches(rasqal_expression *e,
721 rasqal_evaluation_context *eval_context,
722 int *error_p)
723 {
724 rasqal_world* world = eval_context->world;
725 rasqal_literal *l1 = NULL;
726 rasqal_literal *l2 = NULL;
727 const unsigned char *tag;
728 const unsigned char *range;
729 int b;
730
731 l1 = rasqal_expression_evaluate2(e->arg1, eval_context, error_p);
732 if((error_p && *error_p) || !l1)
733 goto failed;
734
735 l2 = rasqal_expression_evaluate2(e->arg2, eval_context, error_p);
736 if((error_p && *error_p) || !l2)
737 goto failed;
738
739 tag = rasqal_literal_as_string_flags(l1, eval_context->flags, error_p);
740 if(error_p && *error_p)
741 goto failed;
742
743 range = rasqal_literal_as_string_flags(l2, eval_context->flags, error_p);
744 if(error_p && *error_p)
745 goto failed;
746
747
748 b = rasqal_language_matches(tag, range);
749
750 rasqal_free_literal(l1);
751 rasqal_free_literal(l2);
752
753 return rasqal_new_boolean_literal(world, b);
754
755 failed:
756 if(error_p)
757 *error_p = 1;
758
759 if(l1)
760 rasqal_free_literal(l1);
761 if(l2)
762 rasqal_free_literal(l2);
763
764 return NULL;
765 }
766
767
768 /*
769 * rasqal_expression_evaluate_strmatch:
770 * @e: The expression to evaluate.
771 * @eval_context: Evaluation context
772 *
773 * INTERNAL - Evaluate RASQAL_EXPR_STR_MATCH, RASQAL_EXPR_STR_NMATCH and
774 * RASQAL_EXPR_REGEX expressions.
775 *
776 * Return value: A #rasqal_literal value or NULL on failure.
777 */
778 rasqal_literal*
rasqal_expression_evaluate_strmatch(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)779 rasqal_expression_evaluate_strmatch(rasqal_expression *e,
780 rasqal_evaluation_context *eval_context,
781 int *error_p)
782 {
783 rasqal_world* world = eval_context->world;
784 int b = 0;
785 const unsigned char *l1_str;
786 const char *match_string;
787 const char *pattern;
788 const char *regex_flags;
789 rasqal_literal *l1, *l2, *l3;
790 int rc = 0;
791 size_t match_len;
792
793 l1 = rasqal_expression_evaluate2(e->arg1, eval_context, error_p);
794 if((error_p && *error_p) || !l1)
795 goto failed;
796
797 l1_str = rasqal_literal_as_counted_string(l1, &match_len,
798 eval_context->flags, error_p);
799 match_string = RASQAL_GOOD_CAST(const char*, l1_str);
800 if((error_p && *error_p) || !match_string) {
801 rasqal_free_literal(l1);
802 goto failed;
803 }
804
805 l3 = NULL;
806 regex_flags = NULL;
807 if(e->op == RASQAL_EXPR_REGEX) {
808 l2 = rasqal_expression_evaluate2(e->arg2, eval_context, error_p);
809 if((error_p && *error_p) || !l2) {
810 rasqal_free_literal(l1);
811 goto failed;
812 }
813
814 if(e->arg3) {
815 l3 = rasqal_expression_evaluate2(e->arg3, eval_context, error_p);
816 if((error_p && *error_p) || !l3) {
817 rasqal_free_literal(l1);
818 rasqal_free_literal(l2);
819 goto failed;
820 }
821 regex_flags = RASQAL_GOOD_CAST(const char*, l3->string);
822 }
823
824 } else {
825 l2 = e->literal;
826 regex_flags = RASQAL_GOOD_CAST(const char*, l2->flags);
827 }
828 pattern = RASQAL_GOOD_CAST(const char*, l2->string);
829
830 rc = rasqal_regex_match(world, eval_context->locator,
831 pattern, regex_flags,
832 match_string, match_len);
833
834 #ifdef RASQAL_DEBUG
835 if(rc >= 0)
836 RASQAL_DEBUG5("regex match returned %s for '%s' against '%s' (flags=%s)\n", rc ? "true" : "false", match_string, pattern, l2->flags ? RASQAL_GOOD_CAST(char*, l2->flags) : "");
837 else
838 RASQAL_DEBUG4("regex match returned failed for '%s' against '%s' (flags=%s)\n", match_string, pattern, l2->flags ? RASQAL_GOOD_CAST(char*, l2->flags) : "");
839 #endif
840
841 rasqal_free_literal(l1);
842 if(e->op == RASQAL_EXPR_REGEX) {
843 rasqal_free_literal(l2);
844 if(l3)
845 rasqal_free_literal(l3);
846 }
847
848 if(rc < 0)
849 goto failed;
850
851 b = rc;
852 if(e->op == RASQAL_EXPR_STR_NMATCH)
853 b = 1 - b;
854
855 return rasqal_new_boolean_literal(world, b);
856
857 failed:
858 if(error_p)
859 *error_p = 1;
860
861 return NULL;
862 }
863
864
865 /*
866 * rasqal_expression_evaluate_strbefore:
867 * @e: The expression to evaluate.
868 * @eval_context: Evaluation context
869 *
870 * INTERNAL - Evaluate RASQAL_EXPR_STRBEFORE(string, needle) expression.
871 *
872 * Return value: A #rasqal_literal string value or NULL on failure.
873 */
874 rasqal_literal*
rasqal_expression_evaluate_strbefore(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)875 rasqal_expression_evaluate_strbefore(rasqal_expression *e,
876 rasqal_evaluation_context *eval_context,
877 int *error_p)
878 {
879 rasqal_world* world = eval_context->world;
880 rasqal_literal* l1 = NULL;
881 rasqal_literal* l2 = NULL;
882 const unsigned char *haystack;
883 const unsigned char *needle;
884 size_t haystack_len;
885 size_t needle_len;
886 const char *ptr;
887 unsigned char* result;
888 size_t result_len;
889 char* new_lang = NULL;
890
891 /* haystack string */
892 l1 = rasqal_expression_evaluate2(e->arg1, eval_context, error_p);
893 if((error_p && *error_p) || !l1)
894 goto failed;
895
896 /* needle string */
897 l2 = rasqal_expression_evaluate2(e->arg2, eval_context, error_p);
898 if((error_p && *error_p) || !l2)
899 goto failed;
900
901 if(!rasqal_literal_is_string(l1) || !rasqal_literal_is_string(l2)) {
902 /* not strings */
903 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
904 RASQAL_DEBUG1("Cannot strbefore haystack ");
905 rasqal_literal_print(l1, stderr);
906 fputs( " to needle ", stderr);
907 rasqal_literal_print(l2, stderr);
908 fputs(" - both not string", stderr);
909 #endif
910 goto failed;
911 }
912
913 if(l2->language && rasqal_literal_string_languages_compare(l1, l2)) {
914 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
915 RASQAL_DEBUG1("Cannot strbefore haystack ");
916 rasqal_literal_print(l1, stderr);
917 fputs( " to language needle ", stderr);
918 rasqal_literal_print(l2, stderr);
919 fputs(" - languages mismatch", stderr);
920 #endif
921 goto failed;
922 }
923
924 haystack = rasqal_literal_as_counted_string(l1, &haystack_len,
925 eval_context->flags, error_p);
926 if((error_p && *error_p) || !haystack)
927 goto failed;
928
929 needle = rasqal_literal_as_counted_string(l2, &needle_len,
930 eval_context->flags, error_p);
931 if((error_p && *error_p) || !needle)
932 goto failed;
933
934 ptr = strstr(RASQAL_GOOD_CAST(const char*, haystack),
935 RASQAL_GOOD_CAST(const char*, needle));
936 if(ptr) {
937 result_len = RASQAL_GOOD_CAST(size_t, ptr - RASQAL_GOOD_CAST(const char*, haystack));
938
939 if(l1->language) {
940 size_t len = strlen(RASQAL_GOOD_CAST(const char*, l1->language));
941 new_lang = RASQAL_MALLOC(char*, len + 1);
942 if(!new_lang)
943 goto failed;
944
945 memcpy(new_lang, l1->language, len + 1);
946 }
947 } else {
948 result_len = 0;
949 haystack = RASQAL_GOOD_CAST(const unsigned char *, "");
950 }
951
952 rasqal_free_literal(l1); l1 = NULL;
953 rasqal_free_literal(l2); l2 = NULL;
954
955 result = RASQAL_MALLOC(unsigned char*, result_len + 1);
956 if(!result)
957 goto failed;
958
959 if(result_len)
960 memcpy(result, haystack, result_len);
961 result[result_len] = '\0';
962
963 return rasqal_new_string_literal(world, result,
964 new_lang,
965 /* datatype */ NULL,
966 /* qname */ NULL);
967
968 failed:
969 if(l1)
970 rasqal_free_literal(l1);
971
972 if(l2)
973 rasqal_free_literal(l2);
974
975 if(error_p)
976 *error_p = 1;
977
978 return NULL;
979 }
980
981
982 /*
983 * rasqal_expression_evaluate_strafter:
984 * @e: The expression to evaluate.
985 * @eval_context: Evaluation context
986 *
987 * INTERNAL - Evaluate RASQAL_EXPR_STRAFTER(string, needle) expression.
988 *
989 * Return value: A #rasqal_literal string value or NULL on failure.
990 */
991 rasqal_literal*
rasqal_expression_evaluate_strafter(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)992 rasqal_expression_evaluate_strafter(rasqal_expression *e,
993 rasqal_evaluation_context *eval_context,
994 int *error_p)
995 {
996 rasqal_world* world = eval_context->world;
997 rasqal_literal* l1 = NULL;
998 rasqal_literal* l2 = NULL;
999 const unsigned char *haystack;
1000 const unsigned char *needle;
1001 size_t haystack_len;
1002 size_t needle_len;
1003 const char *ptr;
1004 unsigned char* result;
1005 size_t result_len;
1006 char* new_lang = NULL;
1007
1008 /* haystack string */
1009 l1 = rasqal_expression_evaluate2(e->arg1, eval_context, error_p);
1010 if((error_p && *error_p) || !l1)
1011 goto failed;
1012
1013 /* needle string */
1014 l2 = rasqal_expression_evaluate2(e->arg2, eval_context, error_p);
1015 if((error_p && *error_p) || !l2)
1016 goto failed;
1017
1018 if(!rasqal_literal_is_string(l1) || !rasqal_literal_is_string(l2)) {
1019 /* not strings */
1020 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
1021 RASQAL_DEBUG1("Cannot strafter haystack ");
1022 rasqal_literal_print(l1, stderr);
1023 fputs( " to needle ", stderr);
1024 rasqal_literal_print(l2, stderr);
1025 fputs(" - both not string", stderr);
1026 #endif
1027 goto failed;
1028 }
1029
1030 if(l2->language && rasqal_literal_string_languages_compare(l1, l2)) {
1031 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
1032 RASQAL_DEBUG1("Cannot strafter haystack ");
1033 rasqal_literal_print(l1, stderr);
1034 fputs( " to language needle ", stderr);
1035 rasqal_literal_print(l2, stderr);
1036 fputs(" - languages mismatch", stderr);
1037 #endif
1038 goto failed;
1039 }
1040
1041
1042 haystack = rasqal_literal_as_counted_string(l1, &haystack_len,
1043 eval_context->flags, error_p);
1044 if((error_p && *error_p) || !haystack)
1045 goto failed;
1046
1047 needle = rasqal_literal_as_counted_string(l2, &needle_len,
1048 eval_context->flags, error_p);
1049 if((error_p && *error_p) || !needle)
1050 goto failed;
1051
1052 ptr = strstr(RASQAL_GOOD_CAST(const char*, haystack),
1053 RASQAL_GOOD_CAST(const char*, needle));
1054 if(ptr) {
1055 ptr += needle_len;
1056 result_len = haystack_len - RASQAL_GOOD_CAST(size_t, (ptr - RASQAL_GOOD_CAST(const char*, haystack)));
1057
1058 if(l1->language) {
1059 size_t len = strlen(RASQAL_GOOD_CAST(const char*, l1->language));
1060 new_lang = RASQAL_MALLOC(char*, len + 1);
1061 if(!new_lang)
1062 goto failed;
1063
1064 memcpy(new_lang, l1->language, len + 1);
1065 }
1066 } else {
1067 ptr = (const char *)"";
1068 result_len = 0;
1069 }
1070
1071 rasqal_free_literal(l1); l1 = NULL;
1072 rasqal_free_literal(l2); l2 = NULL;
1073
1074 result = RASQAL_MALLOC(unsigned char*, result_len + 1);
1075 if(!result)
1076 goto failed;
1077
1078 if(result_len)
1079 memcpy(result, ptr, result_len);
1080 result[result_len] = '\0';
1081
1082 return rasqal_new_string_literal(world, result,
1083 new_lang,
1084 /* datatype */ NULL,
1085 /* qname */ NULL);
1086
1087 failed:
1088 if(l1)
1089 rasqal_free_literal(l1);
1090
1091 if(l2)
1092 rasqal_free_literal(l2);
1093
1094 if(error_p)
1095 *error_p = 1;
1096
1097 return NULL;
1098 }
1099
1100
1101 /*
1102 * rasqal_expression_evaluate_replace:
1103 * @e: The expression to evaluate.
1104 * @eval_context: Evaluation context
1105 *
1106 * INTERNAL - Evaluate RASQAL_EXPR_REPLACE(input, pattern, replacement[, flags]) expression.
1107 *
1108 * Return value: A #rasqal_literal string value or NULL on failure.
1109 */
1110 rasqal_literal*
rasqal_expression_evaluate_replace(rasqal_expression * e,rasqal_evaluation_context * eval_context,int * error_p)1111 rasqal_expression_evaluate_replace(rasqal_expression *e,
1112 rasqal_evaluation_context *eval_context,
1113 int *error_p)
1114 {
1115 rasqal_world* world = eval_context->world;
1116 const unsigned char *tmp_str;
1117 const char *match;
1118 const char *pattern;
1119 const char *replace;
1120 const char *regex_flags = NULL;
1121 size_t match_len;
1122 size_t replace_len;
1123 rasqal_literal* l1 = NULL;
1124 rasqal_literal* l2 = NULL;
1125 rasqal_literal* l3 = NULL;
1126 rasqal_literal* l4 = NULL;
1127 char* result_s = NULL;
1128 size_t result_len = 0;
1129 rasqal_literal* result = NULL;
1130
1131 l1 = rasqal_expression_evaluate2(e->arg1, eval_context, error_p);
1132 if((error_p && *error_p) || !l1)
1133 goto failed;
1134 tmp_str = rasqal_literal_as_counted_string(l1, &match_len,
1135 eval_context->flags,
1136 error_p);
1137 match = RASQAL_GOOD_CAST(const char*, tmp_str);
1138 if((error_p && *error_p) || !match)
1139 goto failed;
1140
1141 l2 = rasqal_expression_evaluate2(e->arg2, eval_context, error_p);
1142 if((error_p && *error_p) || !l2)
1143 goto failed;
1144 pattern = RASQAL_GOOD_CAST(const char*, l2->string);
1145
1146 l3 = rasqal_expression_evaluate2(e->arg3, eval_context, error_p);
1147 if((error_p && *error_p) || !l3)
1148 goto failed;
1149
1150 if(l1->type != RASQAL_LITERAL_STRING && l1->type != RASQAL_LITERAL_XSD_STRING)
1151 /* Not a string so cannot do string operations */
1152 goto failed;
1153
1154 tmp_str = rasqal_literal_as_counted_string(l3, &replace_len,
1155 eval_context->flags,
1156 error_p);
1157 replace = RASQAL_GOOD_CAST(const char*, tmp_str);
1158 if((error_p && *error_p) || !replace)
1159 goto failed;
1160
1161 if(e->arg4) {
1162 l4 = rasqal_expression_evaluate2(e->arg4, eval_context, error_p);
1163 if((error_p && *error_p) || !l4)
1164 goto failed;
1165
1166 regex_flags = RASQAL_GOOD_CAST(const char*, l4->string);
1167 }
1168
1169 result_s = rasqal_regex_replace(world, eval_context->locator,
1170 pattern,
1171 regex_flags,
1172 match, match_len,
1173 replace, replace_len,
1174 &result_len);
1175
1176 RASQAL_DEBUG6("regex replace returned %s for '%s' from '%s' to '%s' (flags=%s)\n", result_s ? result_s : "NULL", match, pattern, replace, regex_flags ? RASQAL_GOOD_CAST(char*, regex_flags) : "");
1177
1178 if(!result_s)
1179 goto failed;
1180
1181 result = rasqal_new_string_literal(world,
1182 RASQAL_GOOD_CAST(const unsigned char*, result_s),
1183 l1->language, l1->datatype, NULL);
1184 l1->language = NULL;
1185 l1->datatype = NULL;
1186
1187 rasqal_free_literal(l1);
1188 rasqal_free_literal(l2);
1189 rasqal_free_literal(l3);
1190 if(l4)
1191 rasqal_free_literal(l4);
1192
1193 return result;
1194
1195
1196 failed:
1197 if(l1)
1198 rasqal_free_literal(l1);
1199
1200 if(l2)
1201 rasqal_free_literal(l2);
1202
1203 if(l3)
1204 rasqal_free_literal(l3);
1205
1206 if(l4)
1207 rasqal_free_literal(l4);
1208
1209 if(error_p)
1210 *error_p = 1;
1211
1212 return NULL;
1213 }
1214