1 /*
2 * Copyright 2006-2008 The FLWOR Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "stdafx.h"
17
18 #include <iostream>
19
20 #include "common/common.h"
21
22 #include "zorbamisc/ns_consts.h"
23 #include "diagnostics/assert.h"
24 #include "diagnostics/xquery_diagnostics.h"
25
26 #include "zorbatypes/numconversions.h"
27
28 #include "system/globalenv.h"
29
30 #include "context/static_context.h"
31
32 #include "compiler/api/compilercb.h"
33
34 #include "runtime/strings/strings.h"
35 #include "runtime/visitors/planiter_visitor.h"
36
37 #include "store/api/item.h"
38 #include "store/api/item_factory.h"
39
40 #include "zorbautils/string_util.h"
41
42 #include "util/regex.h"
43 #include "util/utf8_util.h"
44 #include "util/utf8_string.h"
45 #include "util/string_util.h"
46 #include "util/uri_util.h"
47 #include "util/xml_util.h"
48
49
50 using namespace std;
51
52 namespace zorba {
53
54
55 /**
56 *______________________________________________________________________
57 *
58 * 7.2.1 fn:codepoints-to-string
59 *
60 * fn:codepoints-to-string($arg as xs:integer*) as xs:string
61 *_______________________________________________________________________*/
62 bool
nextImpl(store::Item_t & result,PlanState & planState) const63 CodepointsToStringIterator::nextImpl(store::Item_t& result, PlanState& planState) const
64 {
65 store::Item_t item;
66 zstring resStr;
67
68 PlanIteratorState* state;
69 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
70
71 while(true)
72 {
73 if (consumeNext(item, theChildren [0].getp(), planState ))
74 {
75 {
76 zstring lUtf8Code = item->getIntegerValue().toString();
77 try
78 {
79 xs_unsignedInt lCode = ztd::aton<xs_unsignedInt>(lUtf8Code.c_str());
80 if (!xml::is_valid(lCode))
81 throw std::invalid_argument( lUtf8Code.str() );
82 utf8::encode( lCode, &resStr );
83 }
84 catch ( std::exception const& )
85 {
86 throw XQUERY_EXCEPTION(
87 err::FOCH0001, ERROR_PARAMS( lUtf8Code ), ERROR_LOC( loc )
88 );
89 }
90 }
91 }
92 else
93 {
94 STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state );
95 break;
96 }
97 }
98 STACK_END (state);
99 }
100
101 /**
102 *______________________________________________________________________
103 *
104 * 7.2.2 fn:string-to-codepoints
105 *
106 * fn:string-to-codepoints($arg as xs:string?) as xs:integer*
107 *_______________________________________________________________________
108 */
nextImpl(store::Item_t & result,PlanState & planState) const109 bool StringToCodepointsIterator::nextImpl(
110 store::Item_t& result,
111 PlanState& planState) const
112 {
113 // TODO Optimization for large strings: large strings mean that a large
114 // integer vector should be stored in the state that is not good.
115 store::Item_t item;
116 zstring inputStr;
117
118 StringToCodepointsIteratorState* state;
119 DEFAULT_STACK_INIT(StringToCodepointsIteratorState, state, planState);
120
121 if (consumeNext(item, theChildren [0].getp(), planState ))
122 {
123 if(!item->isStreamable())
124 {
125 item->getStringValue2(inputStr);
126 }
127 else
128 {
129 state->theStreamItem = item;
130 state->theStream = &item->getStream();
131 }
132 }
133
134 if ( state->theStream )
135 {
136 while ( true )
137 {
138 utf8::encoded_char_type ec;
139 memset( ec, 0, sizeof( ec ) );
140 utf8::storage_type *p;
141 p = ec;
142
143 if ( utf8::read( *state->theStream, ec ) == utf8::npos )
144 {
145 if ( state->theStream->eof() )
146 break;
147 if ( state->theStream->good() ) {
148 //
149 // If read() failed but the stream state is good, it means that an
150 // invalid byte was encountered.
151 //
152 char buf[ 6 /* bytes at most */ * 5 /* chars per byte */ ], *b = buf;
153 bool first = true;
154 for ( ; *p; ++p ) {
155 if ( first )
156 first = false;
157 else
158 *b++ = ',';
159 ::strcpy( b, "0x" ); b += 2;
160 ::sprintf( b, "%0hhX", *p ); b += 2;
161 }
162 throw XQUERY_EXCEPTION(
163 zerr::ZXQD0006_INVALID_UTF8_BYTE_SEQUENCE,
164 ERROR_PARAMS( buf ),
165 ERROR_LOC( loc )
166 );
167 } else {
168 throw XQUERY_EXCEPTION(
169 zerr::ZOSE0003_STREAM_READ_FAILURE, ERROR_LOC( loc )
170 );
171 }
172 }
173 state->theResult.clear();
174 state->theResult.push_back( utf8::next_char( p ) );
175
176 GENV_ITEMFACTORY->createInteger(
177 result,
178 Integer(state->theResult[0])
179 );
180
181 STACK_PUSH(true, state );
182 state->theIterator = state->theIterator + 1;
183 }
184 }
185 else if (!inputStr.empty())
186 {
187 utf8::to_codepoints(inputStr, &state->theResult);
188
189 while (state->theIterator < state->theResult.size())
190 {
191 GENV_ITEMFACTORY->createInteger(
192 result,
193 Integer(state->theResult[state->theIterator])
194 );
195
196 STACK_PUSH(true, state );
197 state->theIterator = state->theIterator + 1;
198 }
199 }
200 STACK_END (state);
201 }
202
203
init(PlanState & planState)204 void StringToCodepointsIteratorState::init(PlanState& planState)
205 {
206 PlanIteratorState::init(planState);
207 theIterator = 0;
208 theStream = 0;
209 theResult.clear();
210 }
211
212
reset(PlanState & planState)213 void StringToCodepointsIteratorState::reset(PlanState& planState)
214 {
215 PlanIteratorState::reset(planState);
216 theIterator = 0;
217 theResult.clear();
218 theStreamItem = 0;
219 }
220
221
222 /**
223 *______________________________________________________________________
224 *
225 * 7.3.2 fn:compare
226 *
227 * fn:compare($comparand1 as xs:string?,
228 * $comparand2 as xs:string?) as xs:integer
229 * fn:compare($comparand1 as xs:string?,
230 * $comparand2 as xs:string?,
231 * $collation as xs:string) as xs:integer?
232 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const233 bool CompareStrIterator::nextImpl(
234 store::Item_t& result,
235 PlanState& planState) const
236 {
237 store::Item_t n0;
238 store::Item_t n1;
239 store::Item_t n2;
240 int res;
241
242 PlanIteratorState* state;
243 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
244
245 if (consumeNext(n0, theChildren[0].getp(), planState ))
246 {
247 if (consumeNext(n1, theChildren[1].getp(), planState ))
248 {
249 XQPCollator* coll;
250
251 if (theChildren.size() == 3)
252 {
253 consumeNext(n2, theChildren[2].getp(), planState);
254
255 coll = theSctx->get_collator(n2->getStringValue().str(), loc);
256 }
257 else
258 {
259 coll = theSctx->get_default_collator(loc);
260 }
261
262 res = utf8::compare(n0->getStringValue(), n1->getStringValue(), coll);
263
264 res = (res < 0 ? -1 : (res > 0 ? 1 : 0));
265
266 GENV_ITEMFACTORY->createInteger(result, Integer(res));
267
268 STACK_PUSH(true, state);
269 }
270 }
271
272 STACK_END (state);
273 }
274
275
276 /**
277 *______________________________________________________________________
278 *
279 * 7.3.3 fn:codepoint-equal
280 *
281 * fn:codepoint-equal($comparand1 as xs:string?,
282 * $comparand2 as xs:string?) as xs:boolean?
283 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const284 bool CodepointEqualIterator::nextImpl(
285 store::Item_t& result,
286 PlanState& planState) const
287 {
288 store::Item_t item0;
289 store::Item_t item1;
290
291 PlanIteratorState* state;
292 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
293
294 if (consumeNext(item0, theChildren [0].getp(), planState ))
295 {
296 if (consumeNext(item1, theChildren [1].getp(), planState ))
297 {
298 GENV_ITEMFACTORY->createBoolean(result,
299 item0->getStringValue() == item1->getStringValue());
300 STACK_PUSH(true, state);
301 }
302 }
303 STACK_END(state);
304 }
305
306
307 /**
308 *______________________________________________________________________
309 *
310 * 7.4.1 fn:concat
311 *
312 * fn:concat($arg1 as xs:anyAtomicType?,
313 * $arg2 as xs:anyAtomicType?,
314 * ... ) as xs:string
315 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const316 bool ConcatStrIterator::nextImpl(
317 store::Item_t& result,
318 PlanState& planState) const
319 {
320 store::Item_t lItem;
321 std::stringstream lResStream;
322 zstring tmp;
323
324 checked_vector<PlanIter_t>::const_iterator iter = theChildren.begin();
325 checked_vector<PlanIter_t>::const_iterator end = theChildren.end();
326
327 PlanIteratorState* state;
328 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
329
330 for(; iter != end; ++iter )
331 {
332 if (consumeNext(lItem, *iter, planState))
333 {
334 lResStream << lItem->getStringValue();
335
336 if (consumeNext(lItem, *iter, planState))
337 {
338 throw XQUERY_EXCEPTION(
339 err::XPTY0004,
340 ERROR_PARAMS( ZED( NoSeqForConcat ) ),
341 ERROR_LOC( loc )
342 );
343 }
344 }
345 }
346
347 tmp = lResStream.str();
348 STACK_PUSH(GENV_ITEMFACTORY->createString(result, tmp), state);
349
350 STACK_END (state);
351 }
352
353
354 /**
355 *______________________________________________________________________
356 *
357 * 7.4.2 fn:string-join
358 *
359 * fn:string-join($arg1 as xs:string*,
360 * $arg2 as xs:string) as xs:string
361 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const362 bool StringJoinIterator::nextImpl(
363 store::Item_t& result,
364 PlanState& planState) const
365 {
366 store::Item_t item;
367 zstring resStr;
368 zstring separator;
369 bool lFirst;
370
371 PlanIteratorState* state;
372 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
373
374 if(theChildren.size() > 1)
375 {
376 consumeNext(item, theChildren[1].getp(), planState);
377 item->getStringValue2(separator);
378 }
379
380 if (separator.empty())
381 {
382 while(true)
383 {
384 if (consumeNext(item, theChildren[0].getp(), planState))
385 {
386 item->appendStringValue(resStr);
387 }
388 else
389 {
390 GENV_ITEMFACTORY->createString(result, resStr);
391 STACK_PUSH(true, state);
392 break;
393 }
394 }
395 }
396 else
397 {
398 lFirst = true;
399
400 while(true)
401 {
402 if (consumeNext(item, theChildren[0].getp(), planState))
403 {
404 if (!lFirst)
405 {
406 resStr += separator;
407 item->appendStringValue(resStr);
408 }
409 else
410 {
411 item->getStringValue2(resStr);
412 lFirst = false;
413 }
414 }
415 else
416 {
417 GENV_ITEMFACTORY->createString(result, resStr);
418 STACK_PUSH(true, state);
419 break;
420 }
421 }
422 }
423
424 STACK_END (state);
425 }
426
427
428 /**
429 *______________________________________________________________________
430 *
431 * 7.4.3 fn:substring
432 *
433 *fn:substring($sourceString as xs:string?,
434 * $startingLoc as xs:double) as xs:string
435 *fn:substring($sourceString as xs:string?,
436 * $startingLoc as xs:double,
437 * $length as xs:double) as xs:string
438 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const439 bool SubstringIterator::nextImpl(
440 store::Item_t& result,
441 PlanState& planState) const
442 {
443 store::Item_t stringItem;
444 store::Item_t startItem;
445 store::Item_t lenItem;
446 zstring strval;
447 zstring resStr;
448 xs_double start;
449 xs_double len;
450 xs_int istart;
451 xs_int ilen;
452
453 PlanIteratorState* state;
454 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
455
456 if (consumeNext(stringItem, theChildren[0].getp(), planState ))
457 {
458 stringItem->getStringValue2(strval);
459
460 if (!strval.empty())
461 {
462 bool startExists = consumeNext(startItem, theChildren[1], planState);
463
464 ZORBA_ASSERT(startExists);
465
466 // note: The first character of a string is located at position 1,
467 // not position 0.
468
469 start = startItem->getDoubleValue();
470
471 if (!start.isNaN())
472 {
473 if (start.isFinite())
474 {
475 try
476 {
477 istart = to_xs_int(start.round());
478 }
479 catch ( std::range_error const& )
480 {
481 throw XQUERY_EXCEPTION(
482 zerr::ZXQD0004_INVALID_PARAMETER,
483 ERROR_PARAMS(ZED(ZXQD0004_NOT_WITHIN_RANGE),
484 start),
485 ERROR_LOC( loc )
486 );
487 }
488 }
489 else
490 {
491 istart = (xs_int)utf8_string<zstring>(strval).length();
492 }
493
494 if( theChildren.size() == 2)
495 {
496 if (istart <= 0)
497 {
498 resStr = strval;
499 }
500 else
501 {
502 try
503 {
504 resStr = utf8_string<zstring>(strval).substr(istart-1);
505 }
506 catch (...)
507 {
508 zstring::size_type numChars = utf8_string<zstring>(strval).length();
509 if (static_cast<zstring::size_type>(istart) > numChars)
510 {
511 // result is the empty string
512 }
513 else
514 {
515 throw;
516 }
517 }
518 }
519 }
520 else
521 {
522 bool lenItemExists = consumeNext(lenItem, theChildren[2], planState);
523
524 ZORBA_ASSERT(lenItemExists);
525
526 len = lenItem->getDoubleValue();
527
528 if (!len.isNaN())
529 {
530 if (len.isFinite())
531 {
532 try
533 {
534 ilen = to_xs_int(len.round());
535 }
536 catch ( std::range_error const& )
537 {
538 throw XQUERY_EXCEPTION(
539 zerr::ZXQD0004_INVALID_PARAMETER,
540 ERROR_PARAMS(ZED(ZXQD0004_NOT_WITHIN_RANGE),
541 len),
542 ERROR_LOC( loc )
543 );
544 }
545 }
546 else
547 {
548 ilen = (xs_int)(utf8_string<zstring>(strval).length() - istart + 1);
549 }
550
551 if( !(start + len).isNaN())
552 {
553 if (ilen >= 0)
554 {
555 if (istart <= 0)
556 {
557 if ((ilen + istart - 1) >= 0)
558 resStr = utf8_string<zstring>(strval).substr(0, istart - 1 + ilen);
559 }
560 else
561 {
562 try
563 {
564 resStr = utf8_string<zstring>(strval).substr(istart-1, ilen);
565 }
566 catch (...)
567 {
568 zstring::size_type numChars = utf8_string<zstring>(strval).length();
569 if (static_cast<zstring::size_type>(istart) > numChars)
570 {
571 // result is the empty string
572 }
573 else
574 {
575 throw;
576 }
577 }
578 }
579 }
580 }
581 }
582 }
583 } // non NaN start arg
584 } // non empty string arg
585 } // non NULL string arg
586
587 STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
588
589 STACK_END (state);
590 }
591
592
593 /**
594 *______________________________________________________________________
595 *
596 * 7.4.3.1 fn:substring optimized for int arguments
597 *
598 *fn:substring($sourceString as xs:string?,
599 * $startingLoc as xs:integer) as xs:string
600 *fn:substring($sourceString as xs:string?,
601 * $startingLoc as xs:integer,
602 * $length as xs:integer) as xs:string
603 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const604 bool SubstringIntOptIterator::nextImpl(
605 store::Item_t& result,
606 PlanState& planState) const
607 {
608 store::Item_t stringItem;
609 store::Item_t startItem;
610 store::Item_t lenItem;
611 zstring strval;
612 zstring resStr;
613 xs_int start;
614 xs_int len;
615
616 PlanIteratorState* state;
617 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
618
619 if (consumeNext(stringItem, theChildren[0].getp(), planState ))
620 {
621 stringItem->getStringValue2(strval);
622
623 if (!strval.empty())
624 {
625 bool startExists = consumeNext(startItem, theChildren[1], planState);
626
627 ZORBA_ASSERT(startExists);
628
629 // note: The first character of a string is located at position 1,
630 // not position 0.
631
632 try
633 {
634 start = to_xs_int(startItem->getIntegerValue());
635 }
636 catch ( std::range_error const& )
637 {
638 throw XQUERY_EXCEPTION(
639 zerr::ZXQD0004_INVALID_PARAMETER,
640 ERROR_PARAMS(ZED(ZXQD0004_NOT_WITHIN_RANGE),
641 lenItem->getIntegerValue()),
642 ERROR_LOC( loc )
643 );
644 }
645
646 if( theChildren.size() == 2)
647 {
648 if (start <= 0)
649 {
650 resStr = strval;
651 }
652 else
653 {
654 try
655 {
656 resStr = utf8_string<zstring>(strval).substr(start-1);
657 }
658 catch (...)
659 {
660 zstring::size_type numChars = utf8_string<zstring>(strval).length();
661 if (static_cast<zstring::size_type>(start) > numChars)
662 {
663 // result is the empty string
664 }
665 else
666 {
667 throw;
668 }
669 }
670 }
671 }
672 else
673 {
674 bool lenItemExists = consumeNext(lenItem, theChildren[2], planState);
675
676 ZORBA_ASSERT(lenItemExists);
677
678 try
679 {
680 len = to_xs_int(lenItem->getIntegerValue());
681 }
682 catch ( std::range_error const& )
683 {
684 throw XQUERY_EXCEPTION(
685 zerr::ZXQD0004_INVALID_PARAMETER,
686 ERROR_PARAMS(ZED(ZXQD0004_NOT_WITHIN_RANGE),
687 lenItem->getIntegerValue()),
688 ERROR_LOC( loc )
689 );
690 }
691
692 if (len >= 0)
693 {
694 if (start <= 0)
695 {
696 if ((len + start - 1) >= 0)
697 resStr = utf8_string<zstring>(strval).substr(0, start - 1 + len);
698 }
699 else
700 {
701 try
702 {
703 resStr = utf8_string<zstring>(strval).substr(start-1, len);
704 }
705 catch (...)
706 {
707 zstring::size_type numChars = utf8_string<zstring>(strval).length();
708 if (static_cast<zstring::size_type>(start) > numChars)
709 {
710 // result is the empty string
711 }
712 else
713 {
714 throw;
715 }
716 }
717 }
718 }
719 }
720 } // non empty string arg
721 } // non NULL string arg
722
723 STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
724
725 STACK_END (state);
726 }
727
728
729 /**
730 *______________________________________________________________________
731 *
732 * 7.4.4 fn:string-length
733 *
734 *fn:string-length() as xs:integer
735 *fn:string-length($arg as xs:string?) as xs:integer
736 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const737 bool StringLengthIterator::nextImpl(
738 store::Item_t& result,
739 PlanState& planState) const
740 {
741 store::Item_t item;
742 zstring strval;
743
744 PlanIteratorState* state;
745 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
746
747 if (consumeNext(item, theChildren [0].getp(), planState))
748 {
749 item->getStringValue2(strval);
750
751 STACK_PUSH(GENV_ITEMFACTORY->createInteger(result, Integer(utf8::length(strval))),
752 state);
753 }
754 else
755 {
756 STACK_PUSH(GENV_ITEMFACTORY->createInteger(result, Integer::zero()),
757 state);
758 }
759 STACK_END(state);
760 }
761
762
763 /**
764 *______________________________________________________________________
765 *
766 * 7.4.5 fn:normalize-space
767 *
768 *fn:normalize-space() as xs:string
769 *fn:normalize-space($arg as xs:string?) as xs:string
770 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const771 bool NormalizeSpaceIterator::nextImpl(
772 store::Item_t& result,
773 PlanState& planState) const
774 {
775 store::Item_t item;
776 zstring resStr;
777
778 PlanIteratorState* state;
779 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
780
781 if (consumeNext(item, theChildren [0].getp(), planState))
782 {
783 item->getStringValue2(resStr);
784 ascii::normalize_whitespace(resStr);
785 STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
786 }
787 else
788 {
789 STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
790 }
791 STACK_END (state);
792 }
793
794
795 /**
796 *______________________________________________________________________
797 *
798 * 7.4.6 fn:normalize-unicode
799 *
800 *fn:normalize-unicode($arg as xs:string?) as xs:string
801 *fn:normalize-unicode($arg as xs:string?,
802 * $normalizationForm as xs:string) as xs:string
803 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const804 bool NormalizeUnicodeIterator::nextImpl(
805 store::Item_t& result,
806 PlanState& planState) const
807 {
808 store::Item_t item0;
809 store::Item_t item1;
810 zstring normForm;
811 zstring resStr;
812 unicode::normalization::type normType;
813 #ifndef ZORBA_NO_ICU
814 bool success;
815 #endif /* ZORBA_NO_ICU */
816
817 PlanIteratorState* state;
818 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
819
820 if (consumeNext(item0, theChildren[0], planState ))
821 {
822 if(theChildren.size() == 2)
823 {
824 if (!consumeNext(item1, theChildren[1].getp(), planState ))
825 ZORBA_ASSERT(false);
826
827 item1->getStringValue2(normForm);
828 ascii::trim_whitespace(normForm);
829 zstring tmp(normForm);
830 utf8::to_upper(tmp, &normForm);
831 }
832 else
833 {
834 normForm = "NFC";
835 }
836
837 if(normForm.empty())
838 {
839 normType = unicode::normalization::none;
840 }
841 else if (ZSTREQ(normForm, "NFC"))
842 {
843 normType = unicode::normalization::NFC;
844 }
845 else if (ZSTREQ(normForm, "NFKC"))
846 {
847 normType = unicode::normalization::NFKC;
848 }
849 else if (ZSTREQ(normForm, "NFD"))
850 {
851 normType = unicode::normalization::NFD;
852 }
853 else if (ZSTREQ(normForm, "NFKD"))
854 {
855 normType = unicode::normalization::NFKD;
856 }
857 else
858 {
859 throw XQUERY_EXCEPTION(
860 err::FOCH0003, ERROR_PARAMS( normForm ), ERROR_LOC( loc )
861 );
862 }
863
864 item0->getStringValue2(resStr);
865 #ifndef ZORBA_NO_ICU
866 success = utf8::normalize(resStr, normType, &resStr);
867 ZORBA_ASSERT(success);
868 #endif//#ifndef ZORBA_NO_ICU
869 STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state );
870 }
871 else
872 {
873 // must push empty string due to return type of function
874 STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
875 }
876
877 STACK_END (state);
878 }
879
880
881 /**
882 *______________________________________________________________________
883 *
884 * 7.4.7 fn:upper-case
885 *
886 *fn:upper-case($arg as xs:string?) as xs:string
887 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const888 bool UpperCaseIterator::nextImpl(
889 store::Item_t& result,
890 PlanState& planState) const
891 {
892 store::Item_t item;
893 zstring resStr;
894 zstring strval;
895
896 PlanIteratorState* state;
897 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
898
899 if (consumeNext(item, theChildren [0].getp(), planState))
900 {
901 item->getStringValue2(strval);
902
903 utf8::to_upper(strval, &resStr);
904
905 STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
906 }
907 else
908 {
909 STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
910 }
911 STACK_END (state);
912 }
913
914
915 /**
916 *______________________________________________________________________
917 *
918 * 7.4.8 fn:lower-case
919 *
920 *fn:lower-case($arg as xs:string?) as xs:string
921 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const922 bool LowerCaseIterator::nextImpl(
923 store::Item_t& result,
924 PlanState& planState) const
925 {
926 store::Item_t item;
927 zstring resStr;
928 zstring strval;
929
930 PlanIteratorState* state;
931 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
932
933 if (consumeNext(item, theChildren [0].getp(), planState))
934 {
935 item->getStringValue2(strval);
936
937 utf8::to_lower(strval, &resStr);
938
939 STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
940 }
941 else
942 {
943 STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
944 }
945 STACK_END (state);
946 }
947
948
949 /**
950 *______________________________________________________________________
951 *
952 * 7.4.9 fn:translate
953 *
954 *fn:translate($arg as xs:string?,
955 * $mapString as xs:string,
956 * $transString as xs:string) as xs:string
957 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const958 bool TranslateIterator::nextImpl(
959 store::Item_t& result,
960 PlanState& planState) const
961 {
962 store::Item_t arg_item, map_item, trans_item;
963 bool res = false;
964 zstring arg_string;
965 zstring map_string;
966 zstring trans_string;
967 zstring result_string;
968
969 PlanIteratorState* state;
970 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
971
972 if ( consumeNext( arg_item , theChildren[0].getp(), planState ) &&
973 consumeNext( map_item , theChildren[1].getp(), planState ) &&
974 consumeNext( trans_item, theChildren[2].getp(), planState ) ) {
975
976 arg_string = arg_item ->getStringValue().str();
977 map_string = map_item ->getStringValue().str();
978 trans_string = trans_item->getStringValue().str();
979
980 typedef std::map<unicode::code_point,unicode::code_point> cp_map_type;
981 cp_map_type trans_map;
982
983 if ( !map_string.empty() ) {
984 utf8_string<zstring const> const u_map_string ( map_string );
985 utf8_string<zstring const> const u_trans_string( trans_string );
986
987 utf8_string<zstring const>::const_iterator
988 map_i = u_map_string .begin(),
989 map_end = u_map_string .end (),
990 trans_i = u_trans_string.begin(),
991 trans_end = u_trans_string.end ();
992
993 for ( ; map_i != map_end && trans_i != trans_end; ++map_i, ++trans_i )
994 trans_map[ *map_i ] = *trans_i;
995
996 for ( ; map_i != map_end; ++map_i )
997 trans_map[ *map_i ] = static_cast<unicode::code_point>( ~0 );
998 }
999
1000 utf8_string<zstring> u_result_string( result_string );
1001 utf8_string<zstring const> const u_arg_string( arg_string );
1002
1003 utf8_string<zstring const>::const_iterator
1004 arg_i = u_arg_string.begin(),
1005 arg_end = u_arg_string.end ();
1006
1007 for ( ; arg_i != arg_end; ++arg_i ) {
1008 unicode::code_point cp = *arg_i;
1009 cp_map_type::const_iterator const found_i = trans_map.find( cp );
1010 if ( found_i != trans_map.end() ) {
1011 cp = found_i->second;
1012 if ( cp == static_cast<unicode::code_point>( ~0 ) )
1013 continue;
1014 }
1015 u_result_string += cp;
1016 }
1017
1018 res = GENV_ITEMFACTORY->createString(result, result_string);
1019 }
1020
1021 if (!res)
1022 {
1023 res = GENV_ITEMFACTORY->createString(result, result_string);
1024 }
1025
1026 STACK_PUSH( res, state );
1027 STACK_END (state);
1028 }
1029
1030
1031 /**
1032 *______________________________________________________________________
1033 *
1034 * 7.4.10 fn:encode-for-uri
1035 *
1036 *fn:encode-for-uri($uri-part as xs:string?) as xs:string
1037 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const1038 bool EncodeForUriIterator::nextImpl(
1039 store::Item_t& result,
1040 PlanState& planState) const
1041 {
1042 store::Item_t item;
1043 zstring resStr;
1044 zstring strval;
1045
1046 PlanIteratorState* state;
1047 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1048
1049 if (consumeNext(item, theChildren [0].getp(), planState))
1050 {
1051 item->getStringValue2(strval);
1052 uri::encode(strval, &resStr, true);
1053 }
1054
1055 STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
1056 STACK_END (state);
1057 }
1058
1059
1060 /**
1061 *______________________________________________________________________
1062 *
1063 * 7.4.11 fn:iri-to-uri
1064 *
1065 *fn:iri-to-uri($iri as xs:string?) as xs:string
1066 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const1067 bool IriToUriIterator::nextImpl(
1068 store::Item_t& result,
1069 PlanState& planState) const
1070 {
1071 store::Item_t item;
1072 zstring lStrIri;
1073 zstring lStrRes;
1074
1075 PlanIteratorState* state;
1076 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1077
1078 if (consumeNext(item, theChildren [0].getp(), planState))
1079 {
1080 item->getStringValue2(lStrIri);
1081
1082 utf8::iri_to_uri(lStrIri, &lStrRes);
1083
1084 STACK_PUSH(GENV_ITEMFACTORY->createString(result, lStrRes), state);
1085 }
1086 else
1087 {
1088 STACK_PUSH(GENV_ITEMFACTORY->createString(result, lStrRes), state);
1089 }
1090 STACK_END (state);
1091 }
1092
1093
1094 /**
1095 *______________________________________________________________________
1096 *
1097 * 7.4.12 fn:escape-html-uri
1098 *
1099 *fn:escape-html-uri($uri as xs:string?) as xs:string
1100 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const1101 bool EscapeHtmlUriIterator::nextImpl(
1102 store::Item_t& result,
1103 PlanState& planState) const
1104 {
1105 store::Item_t item;
1106 zstring lStrUri;
1107 zstring lStrRes;
1108
1109 PlanIteratorState* state;
1110 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1111
1112 if (consumeNext(item, theChildren [0].getp(), planState))
1113 {
1114 item->getStringValue2(lStrUri);
1115
1116 utf8::to_html_uri(lStrUri, &lStrRes);
1117
1118 STACK_PUSH(GENV_ITEMFACTORY->createString(result, lStrRes), state);
1119 }
1120 else
1121 {
1122 STACK_PUSH(GENV_ITEMFACTORY->createString(result, lStrRes), state);
1123 }
1124 STACK_END (state);
1125 }
1126
1127
1128 /**
1129 *______________________________________________________________________
1130 *
1131 * 7.5.1 fn:contains
1132 *
1133 *fn:contains( $arg1 as xs:string?,
1134 * $arg2 as xs:string?) as xs:boolean
1135 *fn:contains( $arg1 as xs:string?,
1136 * $arg2 as xs:string?,
1137 * $collation as xs:string) as xs:boolean
1138 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const1139 bool ContainsIterator::nextImpl(
1140 store::Item_t& result,
1141 PlanState& planState) const
1142 {
1143 store::Item_t item0;
1144 store::Item_t item1;
1145 store::Item_t itemColl;
1146 bool resBool = false;
1147 zstring arg1;
1148 zstring arg2;
1149
1150 PlanIteratorState* state;
1151 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1152
1153 if (consumeNext(item0, theChildren[0].getp(), planState ))
1154 {
1155 item0->getStringValue2(arg1);
1156 }
1157
1158 if (consumeNext(item1, theChildren[1].getp(), planState ))
1159 {
1160 item1->getStringValue2(arg2);
1161 }
1162
1163 if (arg2.empty())
1164 {
1165 STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, true), state );
1166 }
1167 else if (arg1.empty())
1168 {
1169 STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, false), state );
1170 }
1171 else
1172 {
1173 if (theChildren.size() == 2)
1174 {
1175 resBool = (arg1.find(arg2) != zstring::npos);
1176 }
1177 else
1178 {
1179 if (consumeNext(itemColl, theChildren[2].getp(), planState ))
1180 {
1181 XQPCollator* coll = theSctx->get_collator(itemColl->getStringValue().str(), loc);
1182 resBool = (utf8::find(arg1, arg2, coll) != zstring::npos);
1183 }
1184 }
1185 STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, resBool), state );
1186 }
1187
1188 STACK_END (state);
1189 }
1190 /*end class ContainsIterator*/
1191
1192 /**
1193 *______________________________________________________________________
1194 *
1195 * 7.5.2 fn:starts-with
1196 *
1197 *fn:starts-with($arg1 as xs:string?,
1198 * $arg2 as xs:string?) as xs:boolean
1199 *fn:starts-with($arg1 as xs:string?,
1200 * $arg2 as xs:string?,
1201 * $collation as xs:string) as xs:boolean
1202 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const1203 bool StartsWithIterator::nextImpl(
1204 store::Item_t& result,
1205 PlanState& planState) const
1206 {
1207 store::Item_t item0;
1208 store::Item_t item1;
1209 store::Item_t itemColl;
1210 bool resBool = false;
1211 zstring arg1;
1212 zstring arg2;
1213
1214 PlanIteratorState* state;
1215 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1216
1217 if (theChildren.size() == 2 || theChildren.size()==3)
1218 {
1219 if (consumeNext(item0, theChildren[0].getp(), planState ))
1220 {
1221 item0->getStringValue2(arg1);
1222 }
1223
1224 if (consumeNext(item1, theChildren[1].getp(), planState ))
1225 {
1226 item1->getStringValue2(arg2);
1227 }
1228
1229 if (arg2.empty())
1230 {
1231 STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, true), state );
1232 }
1233 else if (arg1.empty())
1234 {
1235 STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, false), state );
1236 }
1237 else
1238 {
1239 if (theChildren.size() == 2)
1240 {
1241 resBool = (arg1.find(arg2) == 0);
1242 }
1243 else
1244 {
1245 if (consumeNext(itemColl, theChildren[2].getp(), planState ))
1246 {
1247 XQPCollator* coll = theSctx->get_collator(itemColl->getStringValue().str(), loc);
1248 resBool = (utf8::find(arg1, arg2, coll) == 0);
1249 }
1250 }
1251 STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, resBool), state );
1252 }
1253 }
1254 STACK_END (state);
1255 }
1256 /*end class StartsWithIterator*/
1257
1258 /**
1259 *______________________________________________________________________
1260 *
1261 * 7.5.3 fn:ends-with
1262 *
1263 *fn:ends-with($arg1 as xs:string?,
1264 * $arg2 as xs:string?) as xs:boolean
1265 *fn:ends-with($arg1 as xs:string?,
1266 * $arg2 as xs:string?,
1267 * $collation as xs:string) as xs:boolean
1268 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const1269 bool EndsWithIterator::nextImpl(
1270 store::Item_t& result,
1271 PlanState& planState) const
1272 {
1273 store::Item_t item0;
1274 store::Item_t item1;
1275 store::Item_t itemColl;
1276 bool resBool = false;
1277 zstring arg1;
1278 zstring arg2;
1279
1280 PlanIteratorState* state;
1281 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1282
1283 if (consumeNext(item0, theChildren[0].getp(), planState ))
1284 {
1285 item0->getStringValue2(arg1);
1286 }
1287
1288 if (consumeNext(item1, theChildren[1].getp(), planState ))
1289 {
1290 item1->getStringValue2(arg2);
1291 }
1292
1293 if (arg2.empty())
1294 {
1295 STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, true), state );
1296 }
1297 else if (arg1.empty())
1298 {
1299 STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, false), state );
1300 }
1301 else
1302 {
1303 if (theChildren.size() == 2)
1304 {
1305 resBool = utf8::ends_with(arg1, arg2);
1306 }
1307 else
1308 {
1309 if (consumeNext(itemColl, theChildren[2].getp(), planState ))
1310 {
1311 XQPCollator* coll = theSctx->get_collator(itemColl->getStringValue().str(), loc);
1312
1313 resBool = utf8::ends_with(arg1, arg2, coll);
1314 }
1315 }
1316 STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, resBool), state );
1317 }
1318
1319 STACK_END (state);
1320 }
1321
1322
1323 /**
1324 *______________________________________________________________________
1325 *
1326 * 7.5.4 fn:substring-before
1327 *
1328 *fn:substring-before( $arg1 as xs:string?,
1329 * $arg2 as xs:string?) as xs:string
1330 *fn:substring-before( $arg1 as xs:string?,
1331 * $arg2 as xs:string?,
1332 * $collation as xs:string) as xs:string
1333 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const1334 bool SubstringBeforeIterator::nextImpl(
1335 store::Item_t& result,
1336 PlanState& planState) const
1337 {
1338 store::Item_t item0;
1339 store::Item_t item1;
1340 store::Item_t itemColl;
1341 zstring::size_type index = zstring::npos;
1342 zstring arg1;
1343 zstring arg2;
1344 zstring resStr;
1345
1346 PlanIteratorState* state;
1347 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1348
1349 if(theChildren.size() == 2 || theChildren.size()==3)
1350 {
1351 if (consumeNext(item0, theChildren[0].getp(), planState ))
1352 {
1353 item0->getStringValue2(arg1);
1354 }
1355
1356 if (consumeNext(item1, theChildren[1].getp(), planState ))
1357 {
1358 item1->getStringValue2(arg2);
1359 }
1360
1361 if (arg1.empty() || arg2.empty())
1362 {
1363 STACK_PUSH( GENV_ITEMFACTORY->createString(result, resStr), state );
1364 }
1365 else
1366 {
1367 if (theChildren.size() == 2)
1368 {
1369 index = arg1.find(arg2);
1370 }
1371 else
1372 {
1373 if (consumeNext(itemColl, theChildren[2].getp(), planState ))
1374 {
1375 XQPCollator* coll = 0;
1376 coll = theSctx->get_collator(itemColl->getStringValue().str(), loc);
1377 index = utf8::find(arg1, arg2, coll);
1378 }
1379 }
1380
1381 if (index != zstring::npos)
1382 resStr = arg1.substr(0, index);
1383
1384 STACK_PUSH( GENV_ITEMFACTORY->createString(result, resStr), state );
1385 }
1386 }
1387 STACK_END (state);
1388 }
1389
1390
1391 /**
1392 *______________________________________________________________________
1393 *
1394 * 7.5.5 fn:substring-after
1395 *
1396 *fn:substring-after($arg1 as xs:string?,
1397 * $arg2 as xs:string?) as xs:string
1398 *fn:substring-after($arg1 as xs:string?,
1399 * $arg2 as xs:string?,
1400 * $collation as xs:string) as xs:string
1401 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const1402 bool SubstringAfterIterator::nextImpl(
1403 store::Item_t& result,
1404 PlanState& planState) const
1405 {
1406 store::Item_t item0;
1407 store::Item_t item1;
1408 store::Item_t itemColl;
1409 zstring::size_type startPos = zstring::npos;
1410 zstring arg1;
1411 zstring arg2;
1412 zstring resStr;
1413
1414 PlanIteratorState* state;
1415 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1416
1417 if (theChildren.size() == 2 || theChildren.size() == 3)
1418 {
1419 if (consumeNext(item0, theChildren[0].getp(), planState ))
1420 {
1421 item0->getStringValue2(arg1);
1422 }
1423
1424 if (consumeNext(item1, theChildren[1].getp(), planState ))
1425 {
1426 item1->getStringValue2(arg2);
1427 }
1428
1429 if (arg1.empty())
1430 {
1431 STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
1432 }
1433 else if (arg2.empty())
1434 {
1435 resStr = arg1;
1436 STACK_PUSH( GENV_ITEMFACTORY->createString(result, resStr), state );
1437 }
1438 else
1439 {
1440 if (theChildren.size() == 2)
1441 {
1442 startPos = arg1.find(arg2);
1443 }
1444 else
1445 {
1446 if (consumeNext(itemColl, theChildren[2].getp(), planState))
1447 {
1448 XQPCollator* coll = theSctx->get_collator(itemColl->getStringValue().str(), loc);
1449 startPos = utf8::find(arg1, arg2, coll);
1450 }
1451 }
1452
1453 if (startPos != zstring::npos)
1454 {
1455 startPos += arg2.size();
1456 resStr = arg1.substr(startPos, arg1.size() - startPos);
1457 }
1458
1459 STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
1460 }
1461 }
1462 STACK_END (state);
1463 }
1464
1465
1466 /**
1467 *______________________________________________________________________
1468 *
1469 * 7.6.2 fn:matches
1470 *
1471 *fn:matches($input as xs:string?,
1472 * $pattern as xs:string) as xs:boolean
1473 *fn:matches($input as xs:string?,
1474 * $pattern as xs:string,
1475 * $flags as xs:string) as xs:boolean
1476 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const1477 bool FnMatchesIterator::nextImpl(
1478 store::Item_t& result,
1479 PlanState& planState) const
1480 {
1481 zstring input;
1482 zstring xquery_pattern;
1483 zstring flags;
1484 store::Item_t item;
1485 bool res = false;
1486
1487 PlanIteratorState* state;
1488 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1489
1490 if (consumeNext(item, theChildren[0].getp(), planState))
1491 item->getStringValue2(input);
1492
1493 if (!consumeNext(item, theChildren[1].getp(), planState))
1494 ZORBA_ASSERT (false);
1495
1496 item->getStringValue2(xquery_pattern);
1497
1498 if(theChildren.size() == 3)
1499 {
1500 if (!consumeNext(item, theChildren[2].getp(), planState))
1501 ZORBA_ASSERT (false);
1502
1503 item->getStringValue2(flags);
1504 }
1505
1506 try
1507 {
1508 zstring lib_pattern;
1509 convert_xquery_re( xquery_pattern, &lib_pattern, flags.c_str() );
1510 res = utf8::match_part(input, lib_pattern, flags.c_str());
1511 }
1512 catch(XQueryException& ex)
1513 {
1514 set_source( ex, loc );
1515 throw;
1516 }
1517
1518 STACK_PUSH(GENV_ITEMFACTORY->createBoolean(result, res), state);
1519
1520 STACK_END(state);
1521 }
1522
1523
1524 /**
1525 *______________________________________________________________________
1526 *
1527 * 7.6.3 fn:replace
1528 *
1529 *fn:replace($input as xs:string?,
1530 * $pattern as xs:string,
1531 * $replacement as xs:string) as xs:string
1532 *fn:replace($input as xs:string?,
1533 * $pattern as xs:string,
1534 * $replacement as xs:string,
1535 * $flags as xs:string) as xs:string
1536 *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const1537 bool FnReplaceIterator::nextImpl(
1538 store::Item_t& result,
1539 PlanState& planState) const
1540 {
1541 zstring input;
1542 zstring flags;
1543 zstring pattern;
1544 zstring replacement;
1545 zstring resStr;
1546 store::Item_t item;
1547 bool tmp;
1548
1549 PlanIteratorState* state;
1550 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1551
1552 if (consumeNext(item, theChildren[0].getp(), planState))
1553 item->getStringValue2(input);
1554
1555 if (!consumeNext(item, theChildren[1].getp(), planState))
1556 ZORBA_ASSERT (false);
1557
1558 item->getStringValue2(pattern);
1559
1560 if (!consumeNext(item, theChildren[2].getp(), planState))
1561 ZORBA_ASSERT (false);
1562
1563 item->getStringValue2(replacement);
1564
1565 if(theChildren.size() == 4)
1566 {
1567 if (!consumeNext(item, theChildren[3].getp(), planState))
1568 ZORBA_ASSERT (false);
1569
1570 item->getStringValue2(flags);
1571 }
1572
1573 try
1574 {
1575 tmp = utf8::match_part(zstring(), pattern, flags.c_str());
1576 }
1577 catch(XQueryException& ex)
1578 {
1579 set_source( ex, loc );
1580 throw;
1581 }
1582
1583 if (tmp)
1584 throw XQUERY_EXCEPTION(
1585 err::FORX0003, ERROR_PARAMS( pattern ), ERROR_LOC( loc )
1586 );
1587
1588 if ( flags.find( 'q' ) == zstring::npos ) {
1589
1590 // count the number of capturing groups
1591 bool got_paren = false;
1592 int num_capturing_groups = 0;
1593 FOR_EACH( zstring, c, pattern ) {
1594 if ( got_paren && *c != '?' )
1595 ++num_capturing_groups;
1596 got_paren = *c == '(';
1597 }
1598
1599 bool got_backslash = false;
1600 bool got_dollar = false;
1601 zstring temp_replacement;
1602 FOR_EACH( zstring, c, replacement ) {
1603 if ( got_backslash ) {
1604 switch ( *c ) {
1605 case '\\':
1606 case '$':
1607 temp_replacement += '\\';
1608 temp_replacement += *c;
1609 got_backslash = false;
1610 continue;
1611 default:
1612 throw XQUERY_EXCEPTION(
1613 err::FORX0004,
1614 ERROR_PARAMS( replacement, ZED( BadCharAfter_34 ), *c, '\\' ),
1615 ERROR_LOC( loc )
1616 );
1617 }
1618 }
1619 if ( got_dollar ) {
1620 if ( !ascii::is_digit( *c ) )
1621 throw XQUERY_EXCEPTION(
1622 err::FORX0004,
1623 ERROR_PARAMS( replacement, ZED( BadCharAfter_34 ), *c, '$' ),
1624 ERROR_LOC( loc )
1625 );
1626 if ( *c - '0' <= num_capturing_groups ) {
1627 temp_replacement += '$';
1628 temp_replacement += *c;
1629 }
1630 got_dollar = false;
1631 continue;
1632 }
1633 switch ( *c ) {
1634 case '\\':
1635 got_backslash = true;
1636 break;
1637 case '$':
1638 got_dollar = true;
1639 break;
1640 default:
1641 temp_replacement += *c;
1642 break;
1643 }
1644 } // FOR_EACH
1645 if ( got_backslash )
1646 throw XQUERY_EXCEPTION(
1647 err::FORX0004,
1648 ERROR_PARAMS( replacement, ZED( TrailingChar_3 ), '\\' ),
1649 ERROR_LOC( loc )
1650 );
1651 if ( got_dollar )
1652 throw XQUERY_EXCEPTION(
1653 err::FORX0004,
1654 ERROR_PARAMS( replacement, ZED( TrailingChar_3 ), '$' ),
1655 ERROR_LOC( loc )
1656 );
1657 replacement = temp_replacement;
1658 }
1659
1660 try
1661 {
1662 zstring lib_pattern;
1663 convert_xquery_re( pattern, &lib_pattern, flags.c_str() );
1664 utf8::replace_all(input, lib_pattern, flags.c_str(), replacement, &resStr);
1665 }
1666 catch(XQueryException& ex)
1667 {
1668 set_source( ex, loc );
1669 throw;
1670 }
1671
1672 STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
1673
1674 STACK_END (state);
1675 }
1676
1677
1678 /**
1679 *______________________________________________________________________
1680 *
1681 * 7.6.4 fn:tokenize
1682 *
1683 *fn:tokenize($input as xs:string?,
1684 * $pattern as xs:string) as xs:string*
1685 *fn:tokenize($input as xs:string?,
1686 * $pattern as xs:string,
1687 * $flags as xs:string) as xs:string*
1688 *_______________________________________________________________________
1689 */
reset(PlanState & planState)1690 void FnTokenizeIteratorState::reset(PlanState& planState)
1691 {
1692 PlanIteratorState::reset(planState);
1693 theString.clear();
1694 start_pos = 0;
1695 hasmatched = false;
1696 thePattern.clear();
1697 theFlags.clear();
1698 }
1699
1700
nextImpl(store::Item_t & result,PlanState & planState) const1701 bool FnTokenizeIterator::nextImpl(
1702 store::Item_t& result,
1703 PlanState& planState) const
1704 {
1705 zstring token;
1706 store::Item_t item;
1707 bool tmp;
1708 zstring strval;
1709 unicode::string u_string;
1710
1711 FnTokenizeIteratorState* state;
1712 DEFAULT_STACK_INIT(FnTokenizeIteratorState, state, planState);
1713
1714 if (consumeNext(item, theChildren[0].getp(), planState))
1715 {
1716 item->getStringValue2(strval);
1717 state->theString = strval.str();
1718 }
1719
1720 if (!consumeNext(item, theChildren[1].getp(), planState))
1721 ZORBA_ASSERT(false);
1722
1723 item->getStringValue2(strval);
1724 state->thePattern = strval.str();
1725
1726 if(theChildren.size() == 3)
1727 {
1728 if (!consumeNext(item, theChildren[2].getp(), planState))
1729 ZORBA_ASSERT (false);
1730
1731 item->getStringValue2(strval);
1732
1733 state->theFlags = strval.str();
1734 }
1735
1736 try
1737 {
1738 static zstring const empty;
1739 tmp = utf8::match_part( empty, state->thePattern, state->theFlags );
1740 }
1741 catch(XQueryException& ex)
1742 {
1743 set_source( ex, loc );
1744 throw;
1745 }
1746
1747 if(tmp)
1748 throw XQUERY_EXCEPTION(
1749 err::FORX0003, ERROR_PARAMS( state->thePattern ), ERROR_LOC( loc )
1750 );
1751
1752
1753 while ((xs_unsignedInt)state->start_pos < state->theString.length ())
1754 {
1755 try
1756 {
1757 unicode::regex re;
1758 //
1759 // The RE needs to be compiled every time due to the weird stack macros.
1760 //
1761 re.compile( state->thePattern, state->theFlags );
1762 unicode::string u_token;
1763 bool const got_next = re.next_token(
1764 state->theString, &state->start_pos, &u_token, &state->hasmatched
1765 );
1766 utf8::to_string( u_token, &token );
1767 if ( !got_next )
1768 break;
1769 }
1770 catch(XQueryException& ex)
1771 {
1772 set_source( ex, loc );
1773 throw;
1774 }
1775
1776 STACK_PUSH(GENV_ITEMFACTORY->createString(result, token), state);
1777 }
1778
1779 if(state->hasmatched)
1780 {
1781 //the last token is empty (is after the last match)
1782 token.clear();
1783 STACK_PUSH(GENV_ITEMFACTORY->createString(result, token), state);
1784 }
1785 STACK_END(state);
1786 }
1787
1788 /**
1789 *______________________________________________________________________
1790 *
1791 * 5.6.5 fn:analyze-string
1792 *
1793 *fn:analyze-string( $input as xs:string?,
1794 * $pattern as xs:string) as element(fn:analyze-string-result)
1795 *fn:analyze-string( $input as xs:string?,
1796 * $pattern as xs:string,
1797 * $flags as xs:string) as element(fn:analyze-string-result)
1798 *_______________________________________________________________________*/
1799
copyUtf8Chars(const char * & sin,int & utf8start,unsigned int & bytestart,int utf8end,unsigned int byteend,zstring & out)1800 static void copyUtf8Chars(const char *&sin,
1801 int &utf8start,
1802 unsigned int &bytestart,
1803 int utf8end,
1804 unsigned int byteend,
1805 zstring &out)
1806 {
1807 #ifndef ZORBA_NO_ICU
1808 utf8::size_type clen;
1809 if(utf8end)
1810 {
1811 while(utf8start < utf8end)
1812 {
1813 clen = utf8::char_length(*sin);
1814 if(clen == 0)
1815 clen = 1;
1816 out.append(sin, clen);
1817 utf8start++;
1818 bytestart += clen;
1819 sin += clen;
1820 }
1821 }
1822 else
1823 #endif
1824 {
1825 if(!utf8end)
1826 utf8end = byteend;
1827 out.append(sin, utf8end-bytestart);
1828 sin += utf8end-bytestart;
1829 utf8start = utf8end;
1830 bytestart = utf8end;
1831 }
1832 }
1833
addNonMatchElement(store::Item_t & parent,int & match_end1,unsigned int & match_end1_bytes,int match_start2,unsigned int match_start2_bytes,const char * & strin)1834 static void addNonMatchElement(store::Item_t &parent,
1835 int &match_end1,
1836 unsigned int &match_end1_bytes,
1837 int match_start2,
1838 unsigned int match_start2_bytes,
1839 const char *&strin)
1840 {
1841 store::Item_t non_match_elem;
1842 store::Item_t non_match_element_name;
1843 store::Item_t untyped_type_name;
1844 store::NsBindings ns_binding;
1845 zstring baseURI;
1846 GENV_ITEMFACTORY->createQName(untyped_type_name,
1847 XML_SCHEMA_NS, XML_SCHEMA_PREFIX, "untyped");
1848 GENV_ITEMFACTORY->createQName(non_match_element_name,
1849 static_context::W3C_FN_NS, "fn", "non-match");
1850 GENV_ITEMFACTORY->createElementNode(non_match_elem, parent, non_match_element_name, untyped_type_name, false, false, ns_binding, baseURI);
1851 //utf8_it += (match_start2 - match_end1);
1852 zstring non_match_str;
1853 //utf8_string<zstring> non_match_utf8(non_match_str);
1854 //while(match_end1 < match_start2)
1855 //{
1856 // non_match_utf8 += *utf8_it;
1857 // utf8_it++;
1858 // match_end1++;
1859 //}
1860 copyUtf8Chars(strin, match_end1, match_end1_bytes, match_start2, match_start2_bytes, non_match_str);
1861 store::Item_t non_match_text_item;
1862 GENV_ITEMFACTORY->createTextNode(non_match_text_item, non_match_elem, non_match_str);
1863 }
1864
addGroupElement(store::Item_t & parent,store::Item_t & untyped_type_name,store::NsBindings & ns_binding,zstring & baseURI,int match_start2,int match_end2,unsigned int & match_end1_bytes,const char * & sin,unicode::regex & rx,int gparent,std::vector<int> & group_parent,int nr_pattern_groups,int & i)1865 static void addGroupElement(store::Item_t &parent,
1866 store::Item_t &untyped_type_name,
1867 store::NsBindings &ns_binding,
1868 zstring &baseURI,
1869 int match_start2,
1870 int match_end2,
1871 unsigned int &match_end1_bytes,
1872 const char *&sin,
1873 unicode::regex &rx,
1874 int gparent,
1875 std::vector<int> &group_parent,
1876 int nr_pattern_groups,
1877 int &i)
1878 {
1879 int match_startg = match_start2;
1880 int match_endg = match_start2;
1881 int match_endgood = match_start2;
1882 store::Item_t group_element_name;
1883 store::Item_t nr_attrib_name;
1884 for(i=i+1;i<nr_pattern_groups;i++)
1885 {
1886 if(group_parent[i] < gparent)
1887 {
1888 i--;
1889 break;
1890 }
1891 #ifndef ZORBA_NO_ICU
1892 match_startg = rx.get_match_start(i+1);
1893 if((match_startg < 0) && (gparent < 0))
1894 continue;
1895 #else
1896 int temp_endg;
1897 match_startg = -1;
1898 temp_endg = -1;
1899 if(!rx.get_match_start_end_bytes(i+1, &match_startg, &temp_endg) && (gparent < 0))
1900 continue;
1901 #endif
1902 if(match_endgood < match_startg)
1903 {
1904 //add non-group match text
1905 zstring non_group_str;
1906
1907 copyUtf8Chars(sin, match_endgood, match_end1_bytes, match_startg, 0, non_group_str);
1908 store::Item_t non_group_text_item;
1909 GENV_ITEMFACTORY->createTextNode(non_group_text_item, parent.getp(), non_group_str);
1910 }
1911 #ifndef ZORBA_NO_ICU
1912 match_endg = rx.get_match_end(i+1);
1913 #else
1914 match_endg = temp_endg;
1915 #endif
1916 //add group match text
1917 GENV_ITEMFACTORY->createQName(group_element_name,
1918 static_context::W3C_FN_NS, "fn", "group");
1919 GENV_ITEMFACTORY->createQName(nr_attrib_name,
1920 "", "", "nr");
1921 store::Item_t group_elem;
1922 GENV_ITEMFACTORY->createElementNode(group_elem, parent, group_element_name, untyped_type_name, false, false, ns_binding, baseURI);
1923 char strid[40];
1924 sprintf(strid, "%d", i+1);
1925 zstring zstrid(strid);
1926 store::Item_t strid_item;
1927 GENV_ITEMFACTORY->createString(strid_item, zstrid);
1928 store::Item_t id_attrib_item;
1929 GENV_ITEMFACTORY->createAttributeNode(id_attrib_item, group_elem.getp(), nr_attrib_name, untyped_type_name, strid_item);
1930 if((match_startg < 0) || (match_startg < match_endgood))
1931 continue;
1932 match_endgood = match_endg;
1933 if((i+1)<nr_pattern_groups)
1934 {
1935 if(group_parent[i+1] > gparent)
1936 {
1937 addGroupElement(group_elem, untyped_type_name, ns_binding, baseURI,
1938 match_startg, match_endg, match_end1_bytes,
1939 sin, rx,
1940 i, group_parent, nr_pattern_groups, i);
1941 continue;
1942 }
1943 }
1944 zstring group_str;
1945
1946 copyUtf8Chars(sin, match_startg, match_end1_bytes, match_endg, 0, group_str);
1947 store::Item_t group_text_item;
1948 GENV_ITEMFACTORY->createTextNode(group_text_item, group_elem.getp(), group_str);
1949 }
1950 //add last non-group match
1951 if(match_endgood < match_end2)
1952 {
1953 zstring non_group_str;
1954
1955 copyUtf8Chars(sin, match_endgood, match_end1_bytes, match_end2, 0, non_group_str);
1956 store::Item_t non_group_text_item;
1957 GENV_ITEMFACTORY->createTextNode(non_group_text_item, parent, non_group_str);
1958 }
1959 }
1960
addMatchElement(store::Item_t & parent,int match_start2,unsigned int & match_end1_bytes,int match_end2,const char * & sin,unicode::regex & rx,std::vector<int> & group_parent,int nr_pattern_groups)1961 static void addMatchElement(store::Item_t &parent,
1962 int match_start2,
1963 unsigned int &match_end1_bytes,
1964 int match_end2,
1965 //utf8_string<zstring_p>::const_iterator& utf8_it,
1966 const char *&sin,
1967 unicode::regex &rx,
1968 std::vector<int> &group_parent,
1969 int nr_pattern_groups)
1970 {
1971 store::Item_t match_element_name;
1972 store::Item_t untyped_type_name;
1973 store::NsBindings ns_binding;
1974 zstring baseURI;
1975 GENV_ITEMFACTORY->createQName(untyped_type_name,
1976 XML_SCHEMA_NS, XML_SCHEMA_PREFIX, "untyped");
1977 GENV_ITEMFACTORY->createQName(match_element_name,
1978 static_context::W3C_FN_NS, "fn", "match");
1979 store::Item_t match_elem;
1980 GENV_ITEMFACTORY->createElementNode(match_elem, parent, match_element_name, untyped_type_name, false, false, ns_binding, baseURI);
1981 int i = -1;
1982 addGroupElement(match_elem, untyped_type_name, ns_binding, baseURI, match_start2, match_end2, match_end1_bytes, sin, rx, -1, group_parent, nr_pattern_groups, i);
1983 }
1984
computePatternGroupsParents(zstring & xquery_pattern,std::vector<int> & group_parent)1985 static void computePatternGroupsParents(zstring &xquery_pattern, std::vector<int> &group_parent)
1986 {
1987 utf8_string<zstring> utf8_pattern(xquery_pattern);
1988 utf8_string<zstring>::const_iterator c;
1989 std::list<int> parents;
1990 int i = 0;
1991
1992 for(c = utf8_pattern.begin(); c != utf8_pattern.end(); c++)
1993 {
1994 if(*c == '\\')
1995 {
1996 c++;
1997 continue;
1998 }
1999 if(*c == '(')
2000 {
2001 //begin group
2002 if(parents.size())
2003 group_parent.push_back(parents.back());
2004 else
2005 group_parent.push_back(-1);
2006 parents.push_back(i);
2007 i++;
2008 }
2009 else if(*c == ')')
2010 {
2011 if(parents.size())
2012 parents.pop_back();
2013 }
2014 }
2015 }
2016
nextImpl(store::Item_t & result,PlanState & planState) const2017 bool FnAnalyzeStringIterator::nextImpl(
2018 store::Item_t& result,
2019 PlanState& planState) const
2020 {
2021 bool is_input_stream = false;
2022 zstring input;
2023 std::istream *instream = NULL;
2024 #define STREAMBUF_CHUNK_SIZE 4*1024
2025 class SmartCharPtr
2026 {
2027 public:
2028 char *ptr;
2029 SmartCharPtr() : ptr(NULL) {}
2030 ~SmartCharPtr() {if(ptr) ::free(ptr);}
2031 };
2032 SmartCharPtr streambuf;
2033 zstring::size_type streambuf_allocated_size = 0;
2034 zstring::size_type streambuf_read = 0;
2035 //zstring::size_type streambuf_beg = 0;
2036 zstring xquery_pattern;
2037 zstring flags;
2038 store::Item_t item;
2039
2040 PlanIteratorState* state;
2041 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
2042
2043 if (consumeNext(item, theChildren[0].getp(), planState))
2044 {
2045 if(!item->isStreamable())
2046 {
2047 item->getStringValue2(input);
2048 }
2049 else
2050 {
2051 instream = &item->getStream();
2052 is_input_stream = true;
2053 }
2054 }
2055
2056 if (!consumeNext(item, theChildren[1].getp(), planState))
2057 ZORBA_ASSERT (false);
2058
2059 item->getStringValue2(xquery_pattern);
2060
2061 if(theChildren.size() == 3)
2062 {
2063 if (!consumeNext(item, theChildren[2].getp(), planState))
2064 ZORBA_ASSERT (false);
2065
2066 item->getStringValue2(flags);
2067 }
2068
2069 try
2070 {
2071 zstring lib_pattern;
2072 convert_xquery_re( xquery_pattern, &lib_pattern, flags.c_str() );
2073
2074 if(is_input_stream)
2075 {
2076 streambuf.ptr = (char*)malloc(STREAMBUF_CHUNK_SIZE);
2077 streambuf_allocated_size = STREAMBUF_CHUNK_SIZE;
2078 instream->read(streambuf.ptr, streambuf_allocated_size);
2079 streambuf_read = (unsigned int)instream->gcount();
2080 if(streambuf_read == STREAMBUF_CHUNK_SIZE)
2081 {
2082 // Note: const_reverse_iterator would work here, but does not
2083 // compile with gcc 4.0.1 (which is the version in Xcode on MacOS 10.5).
2084 zstring::reverse_iterator xqit = xquery_pattern.rbegin();
2085 if((xqit != xquery_pattern.rend()) && (flags.find('m') == std::string::npos))
2086 {
2087 if(*xqit == '$')
2088 {
2089 xqit++;
2090 int bslashes = 0;
2091 while(xqit != xquery_pattern.rend())
2092 {
2093 if(*xqit == '\\')
2094 bslashes++;
2095 else
2096 break;
2097 }
2098 if(bslashes%2 == 0)
2099 {
2100 //better read all instream
2101 do{
2102 streambuf.ptr = (char*)realloc(streambuf.ptr, streambuf_allocated_size+STREAMBUF_CHUNK_SIZE);
2103 streambuf_allocated_size += STREAMBUF_CHUNK_SIZE;
2104 instream->read(streambuf.ptr + streambuf_read, STREAMBUF_CHUNK_SIZE);
2105 streambuf_read += (unsigned int)instream->gcount();
2106 }while(instream->gcount() == STREAMBUF_CHUNK_SIZE);
2107 }
2108 }
2109 }
2110 }
2111 }
2112
2113 unicode::regex rx;
2114 rx.compile(lib_pattern, flags.c_str());
2115 int nr_pattern_groups = rx.get_pattern_group_count();
2116 std::vector<int> group_parent;
2117 computePatternGroupsParents(xquery_pattern, group_parent);
2118
2119 //see if regex can match empty strings
2120 bool reachedEnd = false;
2121 rx.set_string("", 0);
2122 if(rx.find_next_match(&reachedEnd))
2123 {
2124 throw XQUERY_EXCEPTION(
2125 err::FORX0003, ERROR_PARAMS( lib_pattern )
2126 );
2127
2128 }
2129
2130 store::Item_t null_parent;
2131 store::Item_t result_element_name;
2132 store::Item_t untyped_type_name;
2133 store::NsBindings ns_binding;
2134 zstring baseURI;
2135 GENV_ITEMFACTORY->createQName(untyped_type_name,
2136 XML_SCHEMA_NS, XML_SCHEMA_PREFIX, "untyped");
2137 GENV_ITEMFACTORY->createQName(result_element_name,
2138 static_context::W3C_FN_NS, "fn", "analyze-string-result");
2139 GENV_ITEMFACTORY->createElementNode(result, NULL, result_element_name, untyped_type_name, false, false, ns_binding, baseURI);
2140
2141 int nr_retry = 0;
2142 reachedEnd = false;
2143 do
2144 {
2145 const char *instr;
2146 if(!is_input_stream)
2147 {
2148 rx.set_string(input.data(), input.size());
2149 instr = input.c_str();
2150 streambuf_read = input.size();
2151 }
2152 else
2153 {
2154 unsigned int reducebytes = 0;
2155 if(!instream->eof())
2156 {
2157 //check the last bytes, maybe it is a truncated utf8 char
2158 unsigned int maxbytes = 6;
2159 if(maxbytes > streambuf_read)
2160 maxbytes = streambuf_read;
2161 for(reducebytes=1;reducebytes<=maxbytes;reducebytes++)
2162 {
2163 utf8::size_type clen = utf8::char_length(streambuf.ptr[streambuf_read-reducebytes]);
2164 if((clen > 1) && (clen > reducebytes))
2165 break;
2166 }
2167 if(reducebytes == (maxbytes+1))
2168 reducebytes = 0;
2169 }
2170 rx.set_string(streambuf.ptr, streambuf_read-reducebytes);
2171 instr = streambuf.ptr;
2172 }
2173 //zstring_p zinstr(instr);
2174 //utf8_string<zstring_p> utf8_instr(zinstr);
2175 //utf8_string<zstring_p>::const_iterator utf8_it = utf8_instr.begin();
2176
2177 //int match_start1 = 0;
2178 int match_end1 = 0;
2179 unsigned int match_end1_bytes = 0;
2180 reachedEnd = false;
2181 while(rx.find_next_match(&reachedEnd))
2182 {
2183 int match_start2;
2184 int match_end2;
2185 #ifndef ZORBA_NO_ICU
2186 match_start2 = rx.get_match_start();
2187 match_end2 = rx.get_match_end();
2188 #else
2189 rx.get_match_start_end_bytes(0, &match_start2, &match_end2);
2190 #endif
2191 ZORBA_ASSERT(match_start2 >= 0);
2192
2193 if(is_input_stream && reachedEnd && !instream->eof())
2194 {
2195 //load some more data, maybe the match will be different
2196 break;
2197 }
2198
2199 //construct the fn:non-match
2200 if(match_start2 > match_end1)
2201 {
2202 addNonMatchElement(result, match_end1, match_end1_bytes, match_start2, 0, instr);
2203 }
2204
2205 //construct the fn:match
2206 addMatchElement(result, match_start2, match_end1_bytes, match_end2, instr, rx, group_parent, nr_pattern_groups);
2207 match_end1 = match_end2;
2208 }
2209
2210 if(is_input_stream && !instream->eof())
2211 {
2212 //load some more data, maybe the match will be different
2213 if(match_end1_bytes)
2214 {
2215 memmove(streambuf.ptr, streambuf.ptr+match_end1_bytes, streambuf_read-match_end1_bytes);
2216 streambuf_read -= match_end1_bytes;
2217 nr_retry = 0;
2218 }
2219 else
2220 nr_retry++;
2221 if(!match_end1_bytes && (nr_retry == 2))
2222 {
2223 if(streambuf_allocated_size > streambuf_read)
2224 {
2225 instream->read(streambuf.ptr + streambuf_read, streambuf_allocated_size - streambuf_read);
2226 streambuf_read += (unsigned int)instream->gcount();
2227 }
2228 //better read all instream
2229 while(!instream->eof())
2230 {
2231 streambuf.ptr = (char*)realloc(streambuf.ptr, streambuf_allocated_size+STREAMBUF_CHUNK_SIZE);
2232 instream->read(streambuf.ptr + streambuf_read, STREAMBUF_CHUNK_SIZE);
2233 streambuf_read += (unsigned int)instream->gcount();
2234 streambuf_allocated_size += STREAMBUF_CHUNK_SIZE;
2235 }
2236 }
2237 else
2238 {
2239 //read some more data from instream
2240 if(streambuf_allocated_size > streambuf_read)
2241 {
2242 instream->read(streambuf.ptr + streambuf_read, streambuf_allocated_size - streambuf_read);
2243 streambuf_read += (unsigned int)instream->gcount();
2244 }
2245 else
2246 {
2247 streambuf.ptr = (char*)realloc(streambuf.ptr, streambuf_allocated_size+STREAMBUF_CHUNK_SIZE);
2248 instream->read(streambuf.ptr + streambuf_read, STREAMBUF_CHUNK_SIZE);
2249 streambuf_read += (unsigned int)instream->gcount();
2250 streambuf_allocated_size += STREAMBUF_CHUNK_SIZE;
2251 }
2252 }
2253 reachedEnd = false;
2254 }
2255 else
2256 {
2257 if(match_end1_bytes < streambuf_read)
2258 addNonMatchElement(result, match_end1, match_end1_bytes, 0, streambuf_read, instr);
2259 if(is_input_stream && instream->eof())
2260 reachedEnd = true;
2261 }
2262
2263 }while(is_input_stream && !reachedEnd);
2264 }
2265 catch(XQueryException& ex)
2266 {
2267 set_source( ex, loc );
2268 throw;
2269 }
2270
2271 STACK_PUSH(true, state);
2272
2273 STACK_END(state);
2274 }
2275
2276
2277 /**
2278 *______________________________________________________________________
2279 *
2280 * http://www.zorba-xquery.com/modules/string
2281 * string:materialize
2282 */
2283
nextImpl(store::Item_t & result,PlanState & planState) const2284 bool StringMaterializeIterator::nextImpl(
2285 store::Item_t& result,
2286 PlanState& planState) const
2287 {
2288 store::Item_t item;
2289 zstring lString;
2290
2291 PlanIteratorState* state;
2292 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
2293
2294 #ifndef NDEBUG
2295 assert(consumeNext(item, theChildren[0].getp(), planState));
2296 #else
2297 consumeNext(item, theChildren[0].getp(), planState);
2298 #endif
2299 if (item->isStreamable()) {
2300 lString = item->getString();
2301 STACK_PUSH(GENV_ITEMFACTORY->createString(result, lString), state);
2302 } else {
2303 result = item;
2304 STACK_PUSH(result != 0 , state);
2305 }
2306
2307 STACK_END(state);
2308 }
2309
2310 /**
2311 *______________________________________________________________________
2312 *
2313 * http://www.zorba-xquery.com/modules/string
2314 * string:materialize
2315 */
nextImpl(store::Item_t & result,PlanState & planState) const2316 bool StringIsStreamableIterator::nextImpl(
2317 store::Item_t& result,
2318 PlanState& planState) const
2319 {
2320 store::Item_t item;
2321
2322 PlanIteratorState* state;
2323 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
2324
2325 #ifndef NDEBUG
2326 assert(consumeNext(item, theChildren[0].getp(), planState));
2327 #else
2328 consumeNext(item, theChildren[0].getp(), planState);
2329 #endif
2330 STACK_PUSH(GENV_ITEMFACTORY->createBoolean(result, item->isStreamable()), state);
2331
2332 STACK_END(state);
2333 }
2334
2335 /**
2336 *______________________________________________________________________
2337 *
2338 * http://www.zorba-xquery.com/modules/string
2339 * string:split
2340 */
nextImpl(store::Item_t & result,PlanState & planState) const2341 bool StringSplitIterator::nextImpl(
2342 store::Item_t& result,
2343 PlanState& planState) const
2344 {
2345 store::Item_t item;
2346 size_t lNewPos = 0;
2347 zstring lToken;
2348 zstring lPartialMatch;
2349
2350 StringSplitIteratorState* state;
2351 DEFAULT_STACK_INIT(StringSplitIteratorState, state, planState);
2352
2353 // init phase, get input string and tokens
2354 consumeNext(item, theChildren[0].getp(), planState);
2355
2356 if (item->isStreamable())
2357 {
2358 state->theIStream = &item->getStream();
2359 }
2360 else
2361 {
2362 state->theIStream = 0;
2363 item->getStringValue2(state->theInput);
2364 }
2365
2366 consumeNext(item, theChildren[1].getp(), planState);
2367
2368 item->getStringValue2(state->theSeparator);
2369
2370 // working phase, do the tokenization
2371 if (state->theIStream)
2372 {
2373 while ( !state->theIStream->eof() )
2374 {
2375 utf8::encoded_char_type ec;
2376 memset( ec, '\0' , sizeof(ec) );
2377 utf8::storage_type *p;
2378 p = ec;
2379
2380 if ( utf8::read( *state->theIStream, ec ) != utf8::npos )
2381 {
2382 if (state->theSeparator.compare(lNewPos, 1, ec) == 0)
2383 {
2384 if (++lNewPos == state->theSeparator.length())
2385 {
2386 GENV_ITEMFACTORY->createString(result, lToken);
2387 STACK_PUSH(true, state);
2388 }
2389 else
2390 {
2391 lPartialMatch.append(ec);
2392 }
2393 }
2394 else
2395 {
2396 lToken.append(lPartialMatch);
2397 lToken.append(ec);
2398 }
2399 }
2400 else
2401 {
2402 if (state->theIStream->good())
2403 {
2404 char buf[ 6 /* bytes at most */ * 5 /* chars per byte */ ], *b = buf;
2405 bool first = true;
2406 for ( ; *p; ++p ) {
2407 if ( first )
2408 first = false;
2409 else
2410 *b++ = ',';
2411 ::strcpy( b, "0x" ); b += 2;
2412 ::sprintf( b, "%0hhX", *p ); b += 2;
2413 }
2414 throw XQUERY_EXCEPTION(
2415 zerr::ZXQD0006_INVALID_UTF8_BYTE_SEQUENCE,
2416 ERROR_PARAMS( buf ),
2417 ERROR_LOC( loc )
2418 );
2419 }
2420 if (!lToken.empty())
2421 {
2422 GENV_ITEMFACTORY->createString(result, lToken);
2423 STACK_PUSH(true, state);
2424 }
2425 break;
2426 }
2427 }
2428 }
2429 else
2430 {
2431 while (true)
2432 {
2433 if (state->theNextStartPos == zstring::npos)
2434 {
2435 break;
2436 }
2437
2438 lNewPos =
2439 state->theInput.find(state->theSeparator, state->theNextStartPos);
2440 if (lNewPos != zstring::npos)
2441 {
2442 zstring lSubStr = state->theInput.substr(
2443 state->theNextStartPos,
2444 lNewPos - state->theNextStartPos);
2445 GENV_ITEMFACTORY->createString(result, lSubStr);
2446 state->theNextStartPos =
2447 lNewPos==state->theInput.length() - state->theSeparator.length()
2448 ? zstring::npos
2449 : lNewPos + state->theSeparator.length();
2450 }
2451 else
2452 {
2453 zstring lSubStr = state->theInput.substr(state->theNextStartPos);
2454 GENV_ITEMFACTORY->createString(result, lSubStr);
2455 state->theNextStartPos = zstring::npos;
2456 }
2457 STACK_PUSH(true, state);
2458 }
2459 }
2460
2461 STACK_END(state);
2462 }
2463 } // namespace zorba
2464 /* vim:set et sw=2 ts=2: */
2465