1 /*
2  * Copyright 2006-2008 The FLWOR Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "stdafx.h"
17 
18 #include <iostream>
19 
20 #include "common/common.h"
21 
22 #include "zorbamisc/ns_consts.h"
23 #include "diagnostics/assert.h"
24 #include "diagnostics/xquery_diagnostics.h"
25 
26 #include "zorbatypes/numconversions.h"
27 
28 #include "system/globalenv.h"
29 
30 #include "context/static_context.h"
31 
32 #include "compiler/api/compilercb.h"
33 
34 #include "runtime/strings/strings.h"
35 #include "runtime/visitors/planiter_visitor.h"
36 
37 #include "store/api/item.h"
38 #include "store/api/item_factory.h"
39 
40 #include "zorbautils/string_util.h"
41 
42 #include "util/regex.h"
43 #include "util/utf8_util.h"
44 #include "util/utf8_string.h"
45 #include "util/string_util.h"
46 #include "util/uri_util.h"
47 #include "util/xml_util.h"
48 
49 
50 using namespace std;
51 
52 namespace zorba {
53 
54 
55 /**
56   *______________________________________________________________________
57   *
58   *  7.2.1 fn:codepoints-to-string
59   *
60   *  fn:codepoints-to-string($arg as xs:integer*) as xs:string
61   *_______________________________________________________________________*/
62 bool
nextImpl(store::Item_t & result,PlanState & planState) const63 CodepointsToStringIterator::nextImpl(store::Item_t& result, PlanState& planState) const
64 {
65   store::Item_t item;
66   zstring resStr;
67 
68   PlanIteratorState* state;
69   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
70 
71   while(true)
72   {
73     if (consumeNext(item, theChildren [0].getp(), planState ))
74     {
75       {
76         zstring lUtf8Code = item->getIntegerValue().toString();
77         try
78         {
79           xs_unsignedInt lCode = ztd::aton<xs_unsignedInt>(lUtf8Code.c_str());
80           if (!xml::is_valid(lCode))
81             throw std::invalid_argument( lUtf8Code.str() );
82           utf8::encode( lCode, &resStr );
83         }
84         catch ( std::exception const& )
85         {
86           throw XQUERY_EXCEPTION(
87             err::FOCH0001, ERROR_PARAMS( lUtf8Code ), ERROR_LOC( loc )
88           );
89         }
90       }
91     }
92     else
93     {
94       STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state );
95       break;
96     }
97   }
98   STACK_END (state);
99 }
100 
101 /**
102  *______________________________________________________________________
103  *
104  *  7.2.2 fn:string-to-codepoints
105  *
106  *  fn:string-to-codepoints($arg as xs:string?) as xs:integer*
107  *_______________________________________________________________________
108  */
nextImpl(store::Item_t & result,PlanState & planState) const109 bool StringToCodepointsIterator::nextImpl(
110     store::Item_t& result,
111     PlanState& planState) const
112 {
113   // TODO Optimization for large strings: large strings mean that a large
114   // integer vector should be stored in the state that is not good.
115   store::Item_t item;
116   zstring inputStr;
117 
118   StringToCodepointsIteratorState* state;
119   DEFAULT_STACK_INIT(StringToCodepointsIteratorState, state, planState);
120 
121   if (consumeNext(item, theChildren [0].getp(), planState ))
122   {
123     if(!item->isStreamable())
124     {
125       item->getStringValue2(inputStr);
126     }
127     else
128     {
129       state->theStreamItem = item;
130       state->theStream = &item->getStream();
131     }
132   }
133 
134   if ( state->theStream )
135   {
136     while ( true )
137     {
138       utf8::encoded_char_type ec;
139       memset( ec, 0, sizeof( ec ) );
140       utf8::storage_type *p;
141       p = ec;
142 
143       if ( utf8::read( *state->theStream, ec ) == utf8::npos )
144       {
145         if ( state->theStream->eof() )
146           break;
147         if ( state->theStream->good() ) {
148           //
149           // If read() failed but the stream state is good, it means that an
150           // invalid byte was encountered.
151           //
152           char buf[ 6 /* bytes at most */ * 5 /* chars per byte */ ], *b = buf;
153           bool first = true;
154           for ( ; *p; ++p ) {
155             if ( first )
156               first = false;
157             else
158               *b++ = ',';
159             ::strcpy( b, "0x" );          b += 2;
160             ::sprintf( b, "%0hhX", *p );  b += 2;
161           }
162           throw XQUERY_EXCEPTION(
163             zerr::ZXQD0006_INVALID_UTF8_BYTE_SEQUENCE,
164             ERROR_PARAMS( buf ),
165             ERROR_LOC( loc )
166           );
167         } else {
168           throw XQUERY_EXCEPTION(
169             zerr::ZOSE0003_STREAM_READ_FAILURE, ERROR_LOC( loc )
170           );
171         }
172       }
173       state->theResult.clear();
174       state->theResult.push_back( utf8::next_char( p ) );
175 
176       GENV_ITEMFACTORY->createInteger(
177         result,
178         Integer(state->theResult[0])
179       );
180 
181       STACK_PUSH(true, state );
182       state->theIterator = state->theIterator + 1;
183     }
184   }
185   else if (!inputStr.empty())
186   {
187     utf8::to_codepoints(inputStr, &state->theResult);
188 
189     while (state->theIterator < state->theResult.size())
190     {
191       GENV_ITEMFACTORY->createInteger(
192         result,
193         Integer(state->theResult[state->theIterator])
194       );
195 
196       STACK_PUSH(true, state );
197       state->theIterator = state->theIterator + 1;
198     }
199   }
200   STACK_END (state);
201 }
202 
203 
init(PlanState & planState)204 void StringToCodepointsIteratorState::init(PlanState& planState)
205 {
206   PlanIteratorState::init(planState);
207   theIterator = 0;
208   theStream   = 0;
209   theResult.clear();
210 }
211 
212 
reset(PlanState & planState)213 void StringToCodepointsIteratorState::reset(PlanState& planState)
214 {
215   PlanIteratorState::reset(planState);
216   theIterator = 0;
217   theResult.clear();
218   theStreamItem = 0;
219 }
220 
221 
222 /**
223   *______________________________________________________________________
224   *
225   *  7.3.2 fn:compare
226   *
227   * fn:compare($comparand1 as xs:string?,
228   *            $comparand2 as xs:string?) as xs:integer
229   * fn:compare($comparand1 as xs:string?,
230   *            $comparand2 as xs:string?,
231   *            $collation  as xs:string) as xs:integer?
232   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const233 bool CompareStrIterator::nextImpl(
234     store::Item_t& result,
235     PlanState& planState) const
236 {
237   store::Item_t n0;
238   store::Item_t n1;
239   store::Item_t n2;
240   int res;
241 
242   PlanIteratorState* state;
243   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
244 
245   if (consumeNext(n0, theChildren[0].getp(), planState ))
246   {
247     if (consumeNext(n1, theChildren[1].getp(), planState ))
248     {
249       XQPCollator* coll;
250 
251       if (theChildren.size() == 3)
252       {
253         consumeNext(n2, theChildren[2].getp(), planState);
254 
255         coll = theSctx->get_collator(n2->getStringValue().str(), loc);
256       }
257       else
258       {
259         coll = theSctx->get_default_collator(loc);
260       }
261 
262       res  = utf8::compare(n0->getStringValue(), n1->getStringValue(), coll);
263 
264       res = (res < 0 ? -1 : (res > 0 ? 1 : 0));
265 
266       GENV_ITEMFACTORY->createInteger(result, Integer(res));
267 
268       STACK_PUSH(true, state);
269     }
270   }
271 
272   STACK_END (state);
273 }
274 
275 
276 /**
277   *______________________________________________________________________
278   *
279   *  7.3.3 fn:codepoint-equal
280   *
281   *  fn:codepoint-equal($comparand1 as xs:string?,
282   *                     $comparand2 as xs:string?) as xs:boolean?
283   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const284 bool CodepointEqualIterator::nextImpl(
285     store::Item_t& result,
286     PlanState& planState) const
287 {
288   store::Item_t item0;
289   store::Item_t item1;
290 
291   PlanIteratorState* state;
292   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
293 
294   if (consumeNext(item0, theChildren [0].getp(), planState ))
295   {
296     if (consumeNext(item1, theChildren [1].getp(), planState ))
297     {
298       GENV_ITEMFACTORY->createBoolean(result,
299                                       item0->getStringValue() == item1->getStringValue());
300       STACK_PUSH(true, state);
301     }
302   }
303   STACK_END(state);
304 }
305 
306 
307 /**
308   *______________________________________________________________________
309   *
310   *  7.4.1 fn:concat
311   *
312   * fn:concat($arg1 as xs:anyAtomicType?,
313   *           $arg2    as xs:anyAtomicType?,
314   *           ...                          ) as xs:string
315   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const316 bool ConcatStrIterator::nextImpl(
317     store::Item_t& result,
318     PlanState& planState) const
319 {
320   store::Item_t lItem;
321   std::stringstream lResStream;
322   zstring tmp;
323 
324   checked_vector<PlanIter_t>::const_iterator iter = theChildren.begin();
325   checked_vector<PlanIter_t>::const_iterator end  = theChildren.end();
326 
327   PlanIteratorState* state;
328   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
329 
330   for(; iter != end;  ++iter )
331   {
332     if (consumeNext(lItem, *iter, planState))
333     {
334       lResStream << lItem->getStringValue();
335 
336       if (consumeNext(lItem, *iter, planState))
337       {
338         throw XQUERY_EXCEPTION(
339           err::XPTY0004,
340           ERROR_PARAMS( ZED( NoSeqForConcat ) ),
341           ERROR_LOC( loc )
342         );
343       }
344     }
345   }
346 
347   tmp = lResStream.str();
348   STACK_PUSH(GENV_ITEMFACTORY->createString(result, tmp), state);
349 
350   STACK_END (state);
351 }
352 
353 
354 /**
355   *______________________________________________________________________
356   *
357   *  7.4.2 fn:string-join
358   *
359   * fn:string-join($arg1 as xs:string*,
360   *                $arg2 as xs:string) as xs:string
361   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const362 bool StringJoinIterator::nextImpl(
363     store::Item_t& result,
364     PlanState& planState) const
365 {
366   store::Item_t item;
367   zstring resStr;
368   zstring separator;
369   bool lFirst;
370 
371   PlanIteratorState* state;
372   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
373 
374   if(theChildren.size() > 1)
375   {
376     consumeNext(item, theChildren[1].getp(), planState);
377     item->getStringValue2(separator);
378   }
379 
380   if (separator.empty())
381   {
382     while(true)
383     {
384       if (consumeNext(item, theChildren[0].getp(), planState))
385       {
386         item->appendStringValue(resStr);
387       }
388       else
389       {
390         GENV_ITEMFACTORY->createString(result, resStr);
391         STACK_PUSH(true, state);
392         break;
393       }
394     }
395   }
396   else
397   {
398     lFirst = true;
399 
400     while(true)
401     {
402       if (consumeNext(item, theChildren[0].getp(), planState))
403       {
404         if (!lFirst)
405         {
406           resStr += separator;
407           item->appendStringValue(resStr);
408         }
409         else
410         {
411           item->getStringValue2(resStr);
412           lFirst = false;
413         }
414       }
415       else
416       {
417         GENV_ITEMFACTORY->createString(result, resStr);
418         STACK_PUSH(true, state);
419         break;
420       }
421     }
422   }
423 
424   STACK_END (state);
425 }
426 
427 
428 /**
429   *______________________________________________________________________
430   *
431   *  7.4.3 fn:substring
432   *
433   *fn:substring($sourceString   as xs:string?,
434   *             $startingLoc    as xs:double) as xs:string
435   *fn:substring($sourceString as xs:string?,
436   *             $startingLoc  as xs:double,
437   *             $length       as xs:double)   as xs:string
438   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const439 bool SubstringIterator::nextImpl(
440     store::Item_t& result,
441     PlanState& planState) const
442 {
443   store::Item_t stringItem;
444   store::Item_t startItem;
445   store::Item_t lenItem;
446   zstring strval;
447   zstring resStr;
448   xs_double start;
449   xs_double len;
450   xs_int istart;
451   xs_int ilen;
452 
453   PlanIteratorState* state;
454   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
455 
456   if (consumeNext(stringItem, theChildren[0].getp(), planState ))
457   {
458     stringItem->getStringValue2(strval);
459 
460     if (!strval.empty())
461     {
462       bool startExists = consumeNext(startItem, theChildren[1], planState);
463 
464       ZORBA_ASSERT(startExists);
465 
466       // note: The first character of a string is located at position 1,
467       // not position 0.
468 
469       start = startItem->getDoubleValue();
470 
471       if (!start.isNaN())
472       {
473         if (start.isFinite())
474         {
475           try
476           {
477             istart = to_xs_int(start.round());
478           }
479           catch ( std::range_error const& )
480           {
481             throw XQUERY_EXCEPTION(
482               zerr::ZXQD0004_INVALID_PARAMETER,
483               ERROR_PARAMS(ZED(ZXQD0004_NOT_WITHIN_RANGE),
484                           start),
485               ERROR_LOC( loc )
486             );
487           }
488         }
489         else
490         {
491           istart = (xs_int)utf8_string<zstring>(strval).length();
492         }
493 
494         if( theChildren.size() == 2)
495         {
496           if (istart <= 0)
497           {
498             resStr = strval;
499           }
500           else
501           {
502             try
503             {
504               resStr = utf8_string<zstring>(strval).substr(istart-1);
505             }
506             catch (...)
507             {
508               zstring::size_type numChars = utf8_string<zstring>(strval).length();
509               if (static_cast<zstring::size_type>(istart) > numChars)
510               {
511                 // result is the empty string
512               }
513               else
514               {
515                 throw;
516               }
517             }
518           }
519         }
520         else
521         {
522           bool lenItemExists = consumeNext(lenItem, theChildren[2], planState);
523 
524           ZORBA_ASSERT(lenItemExists);
525 
526           len = lenItem->getDoubleValue();
527 
528           if (!len.isNaN())
529           {
530             if (len.isFinite())
531             {
532               try
533               {
534                 ilen = to_xs_int(len.round());
535               }
536               catch ( std::range_error const& )
537               {
538                 throw XQUERY_EXCEPTION(
539                   zerr::ZXQD0004_INVALID_PARAMETER,
540                   ERROR_PARAMS(ZED(ZXQD0004_NOT_WITHIN_RANGE),
541                               len),
542                   ERROR_LOC( loc )
543                 );
544               }
545             }
546             else
547             {
548               ilen = (xs_int)(utf8_string<zstring>(strval).length() - istart + 1);
549             }
550 
551             if( !(start + len).isNaN())
552             {
553               if (ilen >= 0)
554               {
555                 if (istart <= 0)
556                 {
557                   if ((ilen + istart - 1) >= 0)
558                     resStr = utf8_string<zstring>(strval).substr(0,  istart - 1 + ilen);
559                 }
560                 else
561                 {
562                   try
563                   {
564                     resStr = utf8_string<zstring>(strval).substr(istart-1, ilen);
565                   }
566                   catch (...)
567                   {
568                     zstring::size_type numChars = utf8_string<zstring>(strval).length();
569                     if (static_cast<zstring::size_type>(istart) > numChars)
570                     {
571                       // result is the empty string
572                     }
573                     else
574                     {
575                       throw;
576                     }
577                   }
578                 }
579               }
580             }
581           }
582         }
583       } // non NaN start arg
584     } // non empty string arg
585   } // non NULL string arg
586 
587   STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
588 
589   STACK_END (state);
590 }
591 
592 
593 /**
594   *______________________________________________________________________
595   *
596   *  7.4.3.1  fn:substring optimized for int arguments
597   *
598   *fn:substring($sourceString   as xs:string?,
599   *             $startingLoc    as xs:integer) as xs:string
600   *fn:substring($sourceString as xs:string?,
601   *             $startingLoc  as xs:integer,
602   *             $length       as xs:integer)   as xs:string
603   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const604 bool SubstringIntOptIterator::nextImpl(
605     store::Item_t& result,
606     PlanState& planState) const
607 {
608   store::Item_t stringItem;
609   store::Item_t startItem;
610   store::Item_t lenItem;
611   zstring strval;
612   zstring resStr;
613   xs_int start;
614   xs_int len;
615 
616   PlanIteratorState* state;
617   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
618 
619   if (consumeNext(stringItem, theChildren[0].getp(), planState ))
620   {
621     stringItem->getStringValue2(strval);
622 
623     if (!strval.empty())
624     {
625       bool startExists = consumeNext(startItem, theChildren[1], planState);
626 
627       ZORBA_ASSERT(startExists);
628 
629       // note: The first character of a string is located at position 1,
630       // not position 0.
631 
632       try
633       {
634         start = to_xs_int(startItem->getIntegerValue());
635       }
636       catch ( std::range_error const& )
637       {
638         throw XQUERY_EXCEPTION(
639           zerr::ZXQD0004_INVALID_PARAMETER,
640           ERROR_PARAMS(ZED(ZXQD0004_NOT_WITHIN_RANGE),
641                       lenItem->getIntegerValue()),
642           ERROR_LOC( loc )
643         );
644       }
645 
646       if( theChildren.size() == 2)
647       {
648         if (start <= 0)
649         {
650           resStr = strval;
651         }
652         else
653         {
654           try
655           {
656             resStr = utf8_string<zstring>(strval).substr(start-1);
657           }
658           catch (...)
659           {
660             zstring::size_type numChars = utf8_string<zstring>(strval).length();
661             if (static_cast<zstring::size_type>(start) > numChars)
662             {
663               // result is the empty string
664             }
665             else
666             {
667               throw;
668             }
669           }
670         }
671       }
672       else
673       {
674         bool lenItemExists = consumeNext(lenItem, theChildren[2], planState);
675 
676         ZORBA_ASSERT(lenItemExists);
677 
678         try
679         {
680           len = to_xs_int(lenItem->getIntegerValue());
681         }
682         catch ( std::range_error const& )
683         {
684           throw XQUERY_EXCEPTION(
685             zerr::ZXQD0004_INVALID_PARAMETER,
686             ERROR_PARAMS(ZED(ZXQD0004_NOT_WITHIN_RANGE),
687                         lenItem->getIntegerValue()),
688             ERROR_LOC( loc )
689           );
690         }
691 
692         if (len >= 0)
693         {
694           if (start <= 0)
695           {
696             if ((len + start - 1) >= 0)
697               resStr = utf8_string<zstring>(strval).substr(0,  start - 1 + len);
698           }
699           else
700           {
701             try
702             {
703               resStr = utf8_string<zstring>(strval).substr(start-1, len);
704             }
705             catch (...)
706             {
707               zstring::size_type numChars = utf8_string<zstring>(strval).length();
708               if (static_cast<zstring::size_type>(start) > numChars)
709               {
710                 // result is the empty string
711               }
712               else
713               {
714                 throw;
715               }
716             }
717           }
718         }
719       }
720     } // non empty string arg
721   } // non NULL string arg
722 
723 STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
724 
725 STACK_END (state);
726 }
727 
728 
729 /**
730   *______________________________________________________________________
731   *
732   *  7.4.4 fn:string-length
733   *
734   *fn:string-length()                   as xs:integer
735   *fn:string-length($arg as xs:string?) as xs:integer
736   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const737 bool StringLengthIterator::nextImpl(
738     store::Item_t& result,
739     PlanState& planState) const
740 {
741   store::Item_t item;
742   zstring strval;
743 
744   PlanIteratorState* state;
745   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
746 
747   if (consumeNext(item, theChildren [0].getp(), planState))
748   {
749     item->getStringValue2(strval);
750 
751     STACK_PUSH(GENV_ITEMFACTORY->createInteger(result, Integer(utf8::length(strval))),
752                state);
753   }
754   else
755   {
756     STACK_PUSH(GENV_ITEMFACTORY->createInteger(result, Integer::zero()),
757                state);
758   }
759   STACK_END(state);
760 }
761 
762 
763 /**
764   *______________________________________________________________________
765   *
766   *  7.4.5 fn:normalize-space
767   *
768   *fn:normalize-space()                   as xs:string
769   *fn:normalize-space($arg as xs:string?) as xs:string
770   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const771 bool NormalizeSpaceIterator::nextImpl(
772     store::Item_t& result,
773     PlanState& planState) const
774 {
775   store::Item_t item;
776   zstring resStr;
777 
778   PlanIteratorState* state;
779   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
780 
781   if (consumeNext(item, theChildren [0].getp(), planState))
782   {
783     item->getStringValue2(resStr);
784     ascii::normalize_whitespace(resStr);
785     STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
786   }
787   else
788   {
789     STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
790   }
791   STACK_END (state);
792 }
793 
794 
795 /**
796   *______________________________________________________________________
797   *
798   *  7.4.6 fn:normalize-unicode
799   *
800   *fn:normalize-unicode($arg as xs:string?)              as xs:string
801   *fn:normalize-unicode($arg as xs:string?,
802   *                     $normalizationForm as xs:string) as xs:string
803   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const804 bool NormalizeUnicodeIterator::nextImpl(
805     store::Item_t& result,
806     PlanState& planState) const
807 {
808   store::Item_t item0;
809   store::Item_t item1;
810   zstring normForm;
811   zstring resStr;
812   unicode::normalization::type normType;
813 #ifndef ZORBA_NO_ICU
814   bool success;
815 #endif /* ZORBA_NO_ICU */
816 
817   PlanIteratorState* state;
818   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
819 
820   if (consumeNext(item0, theChildren[0], planState ))
821   {
822     if(theChildren.size() == 2)
823     {
824       if (!consumeNext(item1, theChildren[1].getp(), planState ))
825         ZORBA_ASSERT(false);
826 
827       item1->getStringValue2(normForm);
828       ascii::trim_whitespace(normForm);
829       zstring tmp(normForm);
830       utf8::to_upper(tmp, &normForm);
831     }
832     else
833     {
834       normForm = "NFC";
835     }
836 
837     if(normForm.empty())
838     {
839       normType = unicode::normalization::none;
840     }
841     else if (ZSTREQ(normForm, "NFC"))
842     {
843       normType = unicode::normalization::NFC;
844     }
845     else if (ZSTREQ(normForm, "NFKC"))
846     {
847       normType = unicode::normalization::NFKC;
848     }
849     else if (ZSTREQ(normForm, "NFD"))
850     {
851       normType = unicode::normalization::NFD;
852     }
853     else if (ZSTREQ(normForm, "NFKD"))
854     {
855       normType = unicode::normalization::NFKD;
856     }
857     else
858     {
859       throw XQUERY_EXCEPTION(
860         err::FOCH0003, ERROR_PARAMS( normForm ), ERROR_LOC( loc )
861       );
862     }
863 
864     item0->getStringValue2(resStr);
865 #ifndef ZORBA_NO_ICU
866     success = utf8::normalize(resStr, normType, &resStr);
867     ZORBA_ASSERT(success);
868 #endif//#ifndef ZORBA_NO_ICU
869     STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state );
870   }
871   else
872   {
873     // must push empty string due to return type of function
874     STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
875   }
876 
877   STACK_END (state);
878 }
879 
880 
881 /**
882   *______________________________________________________________________
883   *
884   *  7.4.7 fn:upper-case
885   *
886   *fn:upper-case($arg as xs:string?) as xs:string
887   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const888 bool UpperCaseIterator::nextImpl(
889     store::Item_t& result,
890     PlanState& planState) const
891 {
892   store::Item_t item;
893   zstring resStr;
894   zstring strval;
895 
896   PlanIteratorState* state;
897   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
898 
899   if (consumeNext(item, theChildren [0].getp(), planState))
900   {
901     item->getStringValue2(strval);
902 
903     utf8::to_upper(strval, &resStr);
904 
905     STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
906   }
907   else
908   {
909     STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
910   }
911   STACK_END (state);
912 }
913 
914 
915 /**
916   *______________________________________________________________________
917   *
918   *  7.4.8 fn:lower-case
919   *
920   *fn:lower-case($arg as xs:string?) as xs:string
921   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const922 bool LowerCaseIterator::nextImpl(
923     store::Item_t& result,
924     PlanState& planState) const
925 {
926   store::Item_t item;
927   zstring resStr;
928   zstring strval;
929 
930   PlanIteratorState* state;
931   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
932 
933   if (consumeNext(item, theChildren [0].getp(), planState))
934   {
935     item->getStringValue2(strval);
936 
937     utf8::to_lower(strval, &resStr);
938 
939     STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
940   }
941   else
942   {
943     STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
944   }
945   STACK_END (state);
946 }
947 
948 
949 /**
950   *______________________________________________________________________
951   *
952   *  7.4.9 fn:translate
953   *
954   *fn:translate($arg          as xs:string?,
955   *             $mapString    as xs:string,
956   *             $transString  as xs:string) as xs:string
957   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const958 bool TranslateIterator::nextImpl(
959     store::Item_t& result,
960     PlanState& planState) const
961 {
962   store::Item_t arg_item, map_item, trans_item;
963   bool res = false;
964   zstring arg_string;
965   zstring map_string;
966   zstring trans_string;
967   zstring result_string;
968 
969   PlanIteratorState* state;
970   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
971 
972   if ( consumeNext( arg_item  , theChildren[0].getp(), planState ) &&
973        consumeNext( map_item  , theChildren[1].getp(), planState ) &&
974        consumeNext( trans_item, theChildren[2].getp(), planState ) ) {
975 
976     arg_string   = arg_item  ->getStringValue().str();
977     map_string   = map_item  ->getStringValue().str();
978     trans_string = trans_item->getStringValue().str();
979 
980     typedef std::map<unicode::code_point,unicode::code_point> cp_map_type;
981     cp_map_type trans_map;
982 
983     if ( !map_string.empty() ) {
984       utf8_string<zstring const> const u_map_string  ( map_string   );
985       utf8_string<zstring const> const u_trans_string( trans_string );
986 
987       utf8_string<zstring const>::const_iterator
988         map_i     = u_map_string  .begin(),
989         map_end   = u_map_string  .end  (),
990         trans_i   = u_trans_string.begin(),
991         trans_end = u_trans_string.end  ();
992 
993       for ( ; map_i != map_end && trans_i != trans_end; ++map_i, ++trans_i )
994         trans_map[ *map_i ] = *trans_i;
995 
996       for ( ; map_i != map_end; ++map_i )
997         trans_map[ *map_i ] = static_cast<unicode::code_point>( ~0 );
998     }
999 
1000     utf8_string<zstring> u_result_string( result_string );
1001     utf8_string<zstring const> const u_arg_string( arg_string );
1002 
1003     utf8_string<zstring const>::const_iterator
1004       arg_i   = u_arg_string.begin(),
1005       arg_end = u_arg_string.end  ();
1006 
1007     for ( ; arg_i != arg_end; ++arg_i ) {
1008       unicode::code_point cp = *arg_i;
1009       cp_map_type::const_iterator const found_i = trans_map.find( cp );
1010       if ( found_i != trans_map.end() ) {
1011         cp = found_i->second;
1012         if ( cp == static_cast<unicode::code_point>( ~0 ) )
1013           continue;
1014       }
1015       u_result_string += cp;
1016     }
1017 
1018     res = GENV_ITEMFACTORY->createString(result, result_string);
1019   }
1020 
1021   if (!res)
1022   {
1023     res = GENV_ITEMFACTORY->createString(result, result_string);
1024   }
1025 
1026   STACK_PUSH( res, state );
1027   STACK_END (state);
1028 }
1029 
1030 
1031 /**
1032   *______________________________________________________________________
1033   *
1034   *  7.4.10 fn:encode-for-uri
1035   *
1036   *fn:encode-for-uri($uri-part as xs:string?) as xs:string
1037   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const1038 bool EncodeForUriIterator::nextImpl(
1039     store::Item_t& result,
1040     PlanState& planState) const
1041 {
1042   store::Item_t item;
1043   zstring resStr;
1044   zstring strval;
1045 
1046   PlanIteratorState* state;
1047   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1048 
1049   if (consumeNext(item, theChildren [0].getp(), planState))
1050   {
1051     item->getStringValue2(strval);
1052     uri::encode(strval, &resStr, true);
1053   }
1054 
1055   STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
1056   STACK_END (state);
1057 }
1058 
1059 
1060 /**
1061   *______________________________________________________________________
1062   *
1063   *  7.4.11 fn:iri-to-uri
1064   *
1065   *fn:iri-to-uri($iri as xs:string?) as xs:string
1066   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const1067 bool IriToUriIterator::nextImpl(
1068     store::Item_t& result,
1069     PlanState& planState) const
1070 {
1071   store::Item_t item;
1072   zstring lStrIri;
1073   zstring lStrRes;
1074 
1075   PlanIteratorState* state;
1076   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1077 
1078   if (consumeNext(item, theChildren [0].getp(), planState))
1079   {
1080     item->getStringValue2(lStrIri);
1081 
1082     utf8::iri_to_uri(lStrIri, &lStrRes);
1083 
1084     STACK_PUSH(GENV_ITEMFACTORY->createString(result, lStrRes), state);
1085   }
1086   else
1087   {
1088     STACK_PUSH(GENV_ITEMFACTORY->createString(result, lStrRes), state);
1089   }
1090   STACK_END (state);
1091 }
1092 
1093 
1094 /**
1095   *______________________________________________________________________
1096   *
1097   *  7.4.12 fn:escape-html-uri
1098   *
1099   *fn:escape-html-uri($uri as xs:string?) as xs:string
1100   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const1101 bool EscapeHtmlUriIterator::nextImpl(
1102     store::Item_t& result,
1103     PlanState& planState) const
1104 {
1105   store::Item_t item;
1106   zstring lStrUri;
1107   zstring lStrRes;
1108 
1109   PlanIteratorState* state;
1110   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1111 
1112   if (consumeNext(item, theChildren [0].getp(), planState))
1113   {
1114     item->getStringValue2(lStrUri);
1115 
1116     utf8::to_html_uri(lStrUri, &lStrRes);
1117 
1118     STACK_PUSH(GENV_ITEMFACTORY->createString(result, lStrRes), state);
1119   }
1120   else
1121   {
1122     STACK_PUSH(GENV_ITEMFACTORY->createString(result, lStrRes), state);
1123   }
1124   STACK_END (state);
1125 }
1126 
1127 
1128 /**
1129   *______________________________________________________________________
1130   *
1131   *  7.5.1 fn:contains
1132   *
1133   *fn:contains( $arg1       as xs:string?,
1134   *             $arg2       as xs:string?) as xs:boolean
1135   *fn:contains( $arg1       as xs:string?,
1136   *             $arg2       as xs:string?,
1137   *             $collation  as xs:string) as xs:boolean
1138   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const1139 bool ContainsIterator::nextImpl(
1140     store::Item_t& result,
1141     PlanState& planState) const
1142 {
1143   store::Item_t item0;
1144   store::Item_t item1;
1145   store::Item_t itemColl;
1146   bool resBool = false;
1147   zstring arg1;
1148   zstring arg2;
1149 
1150   PlanIteratorState* state;
1151   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1152 
1153   if (consumeNext(item0, theChildren[0].getp(), planState ))
1154   {
1155     item0->getStringValue2(arg1);
1156   }
1157 
1158   if (consumeNext(item1, theChildren[1].getp(), planState ))
1159   {
1160     item1->getStringValue2(arg2);
1161   }
1162 
1163   if (arg2.empty())
1164   {
1165     STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, true), state );
1166   }
1167   else if (arg1.empty())
1168   {
1169     STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, false), state );
1170   }
1171   else
1172   {
1173     if (theChildren.size() == 2)
1174     {
1175       resBool = (arg1.find(arg2) != zstring::npos);
1176     }
1177     else
1178     {
1179       if (consumeNext(itemColl, theChildren[2].getp(), planState ))
1180       {
1181         XQPCollator* coll = theSctx->get_collator(itemColl->getStringValue().str(), loc);
1182         resBool = (utf8::find(arg1, arg2, coll) != zstring::npos);
1183       }
1184     }
1185     STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, resBool), state );
1186   }
1187 
1188   STACK_END (state);
1189 }
1190 /*end class ContainsIterator*/
1191 
1192 /**
1193   *______________________________________________________________________
1194   *
1195   *  7.5.2 fn:starts-with
1196   *
1197   *fn:starts-with($arg1       as xs:string?,
1198   *               $arg2       as xs:string?) as xs:boolean
1199   *fn:starts-with($arg1       as xs:string?,
1200   *               $arg2       as xs:string?,
1201   *               $collation  as xs:string) as xs:boolean
1202   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const1203 bool StartsWithIterator::nextImpl(
1204     store::Item_t& result,
1205     PlanState& planState) const
1206 {
1207   store::Item_t item0;
1208   store::Item_t item1;
1209   store::Item_t itemColl;
1210   bool resBool = false;
1211   zstring arg1;
1212   zstring arg2;
1213 
1214   PlanIteratorState* state;
1215   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1216 
1217   if (theChildren.size() == 2 || theChildren.size()==3)
1218   {
1219     if (consumeNext(item0, theChildren[0].getp(), planState ))
1220     {
1221       item0->getStringValue2(arg1);
1222     }
1223 
1224     if (consumeNext(item1, theChildren[1].getp(), planState ))
1225     {
1226       item1->getStringValue2(arg2);
1227     }
1228 
1229     if (arg2.empty())
1230     {
1231       STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, true), state );
1232     }
1233     else if (arg1.empty())
1234     {
1235       STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, false), state );
1236     }
1237     else
1238     {
1239       if (theChildren.size() == 2)
1240       {
1241         resBool = (arg1.find(arg2) == 0);
1242       }
1243       else
1244       {
1245         if (consumeNext(itemColl, theChildren[2].getp(), planState ))
1246         {
1247           XQPCollator* coll = theSctx->get_collator(itemColl->getStringValue().str(), loc);
1248           resBool = (utf8::find(arg1, arg2, coll) == 0);
1249         }
1250       }
1251       STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, resBool), state );
1252     }
1253   }
1254   STACK_END (state);
1255 }
1256 /*end class StartsWithIterator*/
1257 
1258 /**
1259   *______________________________________________________________________
1260   *
1261   *  7.5.3 fn:ends-with
1262   *
1263   *fn:ends-with($arg1       as xs:string?,
1264   *             $arg2       as xs:string?)  as xs:boolean
1265   *fn:ends-with($arg1       as xs:string?,
1266   *             $arg2       as xs:string?,
1267   *             $collation  as xs:string)   as xs:boolean
1268   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const1269 bool EndsWithIterator::nextImpl(
1270     store::Item_t& result,
1271     PlanState& planState) const
1272 {
1273   store::Item_t item0;
1274   store::Item_t item1;
1275   store::Item_t itemColl;
1276   bool resBool = false;
1277   zstring arg1;
1278   zstring arg2;
1279 
1280   PlanIteratorState* state;
1281   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1282 
1283   if (consumeNext(item0, theChildren[0].getp(), planState ))
1284   {
1285     item0->getStringValue2(arg1);
1286   }
1287 
1288   if (consumeNext(item1, theChildren[1].getp(), planState ))
1289   {
1290     item1->getStringValue2(arg2);
1291   }
1292 
1293   if (arg2.empty())
1294   {
1295     STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, true), state );
1296   }
1297   else if (arg1.empty())
1298   {
1299     STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, false), state );
1300   }
1301   else
1302   {
1303     if (theChildren.size() == 2)
1304     {
1305       resBool = utf8::ends_with(arg1, arg2);
1306     }
1307     else
1308     {
1309       if (consumeNext(itemColl, theChildren[2].getp(), planState ))
1310       {
1311         XQPCollator* coll = theSctx->get_collator(itemColl->getStringValue().str(), loc);
1312 
1313         resBool = utf8::ends_with(arg1, arg2, coll);
1314       }
1315     }
1316     STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, resBool), state );
1317   }
1318 
1319   STACK_END (state);
1320 }
1321 
1322 
1323 /**
1324   *______________________________________________________________________
1325   *
1326   *  7.5.4 fn:substring-before
1327   *
1328   *fn:substring-before( $arg1       as xs:string?,
1329   *                     $arg2       as xs:string?)  as xs:string
1330   *fn:substring-before( $arg1       as xs:string?,
1331   *                     $arg2       as xs:string?,
1332   *                     $collation  as xs:string)   as xs:string
1333   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const1334 bool SubstringBeforeIterator::nextImpl(
1335     store::Item_t& result,
1336     PlanState& planState) const
1337 {
1338   store::Item_t item0;
1339   store::Item_t item1;
1340   store::Item_t itemColl;
1341   zstring::size_type index = zstring::npos;
1342   zstring arg1;
1343   zstring arg2;
1344   zstring resStr;
1345 
1346   PlanIteratorState* state;
1347   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1348 
1349   if(theChildren.size() == 2 || theChildren.size()==3)
1350   {
1351     if (consumeNext(item0, theChildren[0].getp(), planState ))
1352     {
1353       item0->getStringValue2(arg1);
1354     }
1355 
1356     if (consumeNext(item1, theChildren[1].getp(), planState ))
1357     {
1358       item1->getStringValue2(arg2);
1359     }
1360 
1361     if (arg1.empty() || arg2.empty())
1362     {
1363       STACK_PUSH( GENV_ITEMFACTORY->createString(result, resStr), state );
1364     }
1365     else
1366     {
1367       if (theChildren.size() == 2)
1368       {
1369         index = arg1.find(arg2);
1370       }
1371       else
1372       {
1373         if (consumeNext(itemColl, theChildren[2].getp(), planState ))
1374         {
1375           XQPCollator* coll = 0;
1376           coll = theSctx->get_collator(itemColl->getStringValue().str(), loc);
1377           index = utf8::find(arg1, arg2, coll);
1378         }
1379       }
1380 
1381       if (index != zstring::npos)
1382         resStr = arg1.substr(0, index);
1383 
1384       STACK_PUSH( GENV_ITEMFACTORY->createString(result, resStr), state );
1385     }
1386   }
1387   STACK_END (state);
1388 }
1389 
1390 
1391 /**
1392   *______________________________________________________________________
1393   *
1394   *  7.5.5 fn:substring-after
1395   *
1396   *fn:substring-after($arg1       as xs:string?,
1397   *                   $arg2       as xs:string?)  as xs:string
1398   *fn:substring-after($arg1       as xs:string?,
1399   *                   $arg2       as xs:string?,
1400   *                   $collation  as xs:string)   as xs:string
1401   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const1402 bool SubstringAfterIterator::nextImpl(
1403     store::Item_t& result,
1404     PlanState& planState) const
1405 {
1406   store::Item_t item0;
1407   store::Item_t item1;
1408   store::Item_t itemColl;
1409   zstring::size_type startPos = zstring::npos;
1410   zstring arg1;
1411   zstring arg2;
1412   zstring resStr;
1413 
1414   PlanIteratorState* state;
1415   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1416 
1417   if (theChildren.size() == 2 || theChildren.size() == 3)
1418   {
1419     if (consumeNext(item0, theChildren[0].getp(), planState ))
1420     {
1421       item0->getStringValue2(arg1);
1422     }
1423 
1424     if (consumeNext(item1, theChildren[1].getp(), planState ))
1425     {
1426       item1->getStringValue2(arg2);
1427     }
1428 
1429     if (arg1.empty())
1430     {
1431       STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
1432     }
1433     else if (arg2.empty())
1434     {
1435       resStr = arg1;
1436       STACK_PUSH( GENV_ITEMFACTORY->createString(result, resStr), state );
1437     }
1438     else
1439     {
1440       if (theChildren.size() == 2)
1441       {
1442         startPos = arg1.find(arg2);
1443       }
1444       else
1445       {
1446         if (consumeNext(itemColl, theChildren[2].getp(), planState))
1447         {
1448           XQPCollator* coll = theSctx->get_collator(itemColl->getStringValue().str(), loc);
1449           startPos = utf8::find(arg1, arg2, coll);
1450         }
1451       }
1452 
1453       if (startPos != zstring::npos)
1454       {
1455         startPos += arg2.size();
1456         resStr = arg1.substr(startPos, arg1.size() - startPos);
1457       }
1458 
1459       STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
1460     }
1461   }
1462   STACK_END (state);
1463 }
1464 
1465 
1466 /**
1467   *______________________________________________________________________
1468   *
1469   *  7.6.2 fn:matches
1470   *
1471   *fn:matches($input   as xs:string?,
1472   *           $pattern as xs:string) as xs:boolean
1473   *fn:matches($input   as xs:string?,
1474   *           $pattern as xs:string,
1475   *           $flags   as xs:string) as xs:boolean
1476   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const1477 bool FnMatchesIterator::nextImpl(
1478     store::Item_t& result,
1479     PlanState& planState) const
1480 {
1481   zstring input;
1482   zstring xquery_pattern;
1483   zstring flags;
1484   store::Item_t item;
1485   bool res = false;
1486 
1487   PlanIteratorState* state;
1488   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1489 
1490   if (consumeNext(item, theChildren[0].getp(), planState))
1491     item->getStringValue2(input);
1492 
1493   if (!consumeNext(item, theChildren[1].getp(), planState))
1494     ZORBA_ASSERT (false);
1495 
1496   item->getStringValue2(xquery_pattern);
1497 
1498   if(theChildren.size() == 3)
1499   {
1500     if (!consumeNext(item, theChildren[2].getp(), planState))
1501       ZORBA_ASSERT (false);
1502 
1503     item->getStringValue2(flags);
1504   }
1505 
1506   try
1507   {
1508     zstring lib_pattern;
1509     convert_xquery_re( xquery_pattern, &lib_pattern, flags.c_str() );
1510     res = utf8::match_part(input, lib_pattern, flags.c_str());
1511   }
1512   catch(XQueryException& ex)
1513   {
1514     set_source( ex, loc );
1515     throw;
1516   }
1517 
1518   STACK_PUSH(GENV_ITEMFACTORY->createBoolean(result, res), state);
1519 
1520   STACK_END(state);
1521 }
1522 
1523 
1524 /**
1525   *______________________________________________________________________
1526   *
1527   *  7.6.3 fn:replace
1528   *
1529   *fn:replace($input       as xs:string?,
1530   *           $pattern     as xs:string,
1531   *           $replacement as xs:string) as xs:string
1532   *fn:replace($input       as xs:string?,
1533   *           $pattern     as xs:string,
1534   *           $replacement as xs:string,
1535   *           $flags       as xs:string) as xs:string
1536   *_______________________________________________________________________*/
nextImpl(store::Item_t & result,PlanState & planState) const1537 bool FnReplaceIterator::nextImpl(
1538     store::Item_t& result,
1539     PlanState& planState) const
1540 {
1541   zstring input;
1542   zstring flags;
1543   zstring pattern;
1544   zstring replacement;
1545   zstring resStr;
1546   store::Item_t item;
1547   bool tmp;
1548 
1549   PlanIteratorState* state;
1550   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1551 
1552   if (consumeNext(item, theChildren[0].getp(), planState))
1553     item->getStringValue2(input);
1554 
1555   if (!consumeNext(item, theChildren[1].getp(), planState))
1556     ZORBA_ASSERT (false);
1557 
1558   item->getStringValue2(pattern);
1559 
1560   if (!consumeNext(item, theChildren[2].getp(), planState))
1561     ZORBA_ASSERT (false);
1562 
1563   item->getStringValue2(replacement);
1564 
1565   if(theChildren.size() == 4)
1566   {
1567     if (!consumeNext(item, theChildren[3].getp(), planState))
1568       ZORBA_ASSERT (false);
1569 
1570     item->getStringValue2(flags);
1571   }
1572 
1573   try
1574   {
1575     tmp = utf8::match_part(zstring(), pattern, flags.c_str());
1576   }
1577   catch(XQueryException& ex)
1578   {
1579     set_source( ex, loc );
1580     throw;
1581   }
1582 
1583   if (tmp)
1584     throw XQUERY_EXCEPTION(
1585       err::FORX0003, ERROR_PARAMS( pattern ), ERROR_LOC( loc )
1586     );
1587 
1588   if ( flags.find( 'q' ) == zstring::npos ) {
1589 
1590     // count the number of capturing groups
1591     bool got_paren = false;
1592     int num_capturing_groups = 0;
1593     FOR_EACH( zstring, c, pattern ) {
1594       if ( got_paren && *c != '?' )
1595         ++num_capturing_groups;
1596       got_paren = *c == '(';
1597     }
1598 
1599     bool got_backslash = false;
1600     bool got_dollar = false;
1601     zstring temp_replacement;
1602     FOR_EACH( zstring, c, replacement ) {
1603       if ( got_backslash ) {
1604         switch ( *c ) {
1605           case '\\':
1606           case '$':
1607             temp_replacement += '\\';
1608             temp_replacement += *c;
1609             got_backslash = false;
1610             continue;
1611           default:
1612             throw XQUERY_EXCEPTION(
1613               err::FORX0004,
1614               ERROR_PARAMS( replacement, ZED( BadCharAfter_34 ), *c, '\\' ),
1615               ERROR_LOC( loc )
1616             );
1617         }
1618       }
1619       if ( got_dollar ) {
1620         if ( !ascii::is_digit( *c ) )
1621           throw XQUERY_EXCEPTION(
1622             err::FORX0004,
1623             ERROR_PARAMS( replacement, ZED( BadCharAfter_34 ), *c, '$' ),
1624             ERROR_LOC( loc )
1625           );
1626         if ( *c - '0' <= num_capturing_groups ) {
1627           temp_replacement += '$';
1628           temp_replacement += *c;
1629         }
1630         got_dollar = false;
1631         continue;
1632       }
1633       switch ( *c ) {
1634         case '\\':
1635           got_backslash = true;
1636           break;
1637         case '$':
1638           got_dollar = true;
1639           break;
1640         default:
1641           temp_replacement += *c;
1642           break;
1643       }
1644     } // FOR_EACH
1645     if ( got_backslash )
1646       throw XQUERY_EXCEPTION(
1647         err::FORX0004,
1648         ERROR_PARAMS( replacement, ZED( TrailingChar_3 ), '\\' ),
1649         ERROR_LOC( loc )
1650       );
1651     if ( got_dollar )
1652       throw XQUERY_EXCEPTION(
1653         err::FORX0004,
1654         ERROR_PARAMS( replacement, ZED( TrailingChar_3 ), '$' ),
1655         ERROR_LOC( loc )
1656       );
1657     replacement = temp_replacement;
1658   }
1659 
1660   try
1661   {
1662     zstring lib_pattern;
1663     convert_xquery_re( pattern, &lib_pattern, flags.c_str() );
1664     utf8::replace_all(input, lib_pattern, flags.c_str(), replacement, &resStr);
1665   }
1666   catch(XQueryException& ex)
1667   {
1668     set_source( ex, loc );
1669     throw;
1670   }
1671 
1672   STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
1673 
1674   STACK_END (state);
1675 }
1676 
1677 
1678 /**
1679  *______________________________________________________________________
1680  *
1681  *  7.6.4 fn:tokenize
1682  *
1683  *fn:tokenize($input    as xs:string?,
1684  *            $pattern  as xs:string) as xs:string*
1685  *fn:tokenize($input    as xs:string?,
1686  *            $pattern  as xs:string,
1687  *            $flags    as xs:string) as xs:string*
1688  *_______________________________________________________________________
1689  */
reset(PlanState & planState)1690 void FnTokenizeIteratorState::reset(PlanState& planState)
1691 {
1692   PlanIteratorState::reset(planState);
1693   theString.clear();
1694   start_pos = 0;
1695   hasmatched = false;
1696   thePattern.clear();
1697   theFlags.clear();
1698 }
1699 
1700 
nextImpl(store::Item_t & result,PlanState & planState) const1701 bool FnTokenizeIterator::nextImpl(
1702     store::Item_t& result,
1703     PlanState& planState) const
1704 {
1705   zstring token;
1706   store::Item_t item;
1707   bool tmp;
1708   zstring strval;
1709   unicode::string u_string;
1710 
1711   FnTokenizeIteratorState* state;
1712   DEFAULT_STACK_INIT(FnTokenizeIteratorState, state, planState);
1713 
1714   if (consumeNext(item, theChildren[0].getp(), planState))
1715   {
1716     item->getStringValue2(strval);
1717     state->theString = strval.str();
1718   }
1719 
1720   if (!consumeNext(item, theChildren[1].getp(), planState))
1721     ZORBA_ASSERT(false);
1722 
1723   item->getStringValue2(strval);
1724   state->thePattern = strval.str();
1725 
1726   if(theChildren.size() == 3)
1727   {
1728     if (!consumeNext(item, theChildren[2].getp(), planState))
1729       ZORBA_ASSERT (false);
1730 
1731     item->getStringValue2(strval);
1732 
1733     state->theFlags = strval.str();
1734   }
1735 
1736   try
1737   {
1738     static zstring const empty;
1739     tmp = utf8::match_part( empty, state->thePattern, state->theFlags );
1740   }
1741   catch(XQueryException& ex)
1742   {
1743     set_source( ex, loc );
1744     throw;
1745   }
1746 
1747   if(tmp)
1748     throw XQUERY_EXCEPTION(
1749       err::FORX0003, ERROR_PARAMS( state->thePattern ), ERROR_LOC( loc )
1750     );
1751 
1752 
1753   while ((xs_unsignedInt)state->start_pos < state->theString.length ())
1754   {
1755     try
1756     {
1757       unicode::regex re;
1758       //
1759       // The RE needs to be compiled every time due to the weird stack macros.
1760       //
1761       re.compile( state->thePattern, state->theFlags );
1762       unicode::string u_token;
1763       bool const got_next = re.next_token(
1764         state->theString, &state->start_pos, &u_token, &state->hasmatched
1765       );
1766       utf8::to_string( u_token, &token );
1767       if ( !got_next )
1768         break;
1769     }
1770     catch(XQueryException& ex)
1771     {
1772       set_source( ex, loc );
1773       throw;
1774     }
1775 
1776     STACK_PUSH(GENV_ITEMFACTORY->createString(result, token), state);
1777   }
1778 
1779   if(state->hasmatched)
1780   {
1781     //the last token is empty (is after the last match)
1782     token.clear();
1783     STACK_PUSH(GENV_ITEMFACTORY->createString(result, token), state);
1784   }
1785   STACK_END(state);
1786 }
1787 
1788 /**
1789   *______________________________________________________________________
1790   *
1791   *  5.6.5 fn:analyze-string
1792   *
1793   *fn:analyze-string( $input   as xs:string?,
1794   *                   $pattern as xs:string) as element(fn:analyze-string-result)
1795   *fn:analyze-string( $input   as xs:string?,
1796   *                   $pattern as xs:string,
1797   *                   $flags   as xs:string) as element(fn:analyze-string-result)
1798   *_______________________________________________________________________*/
1799 
copyUtf8Chars(const char * & sin,int & utf8start,unsigned int & bytestart,int utf8end,unsigned int byteend,zstring & out)1800 static void copyUtf8Chars(const char *&sin,
1801                           int &utf8start,
1802                           unsigned int &bytestart,
1803                           int utf8end,
1804                           unsigned int byteend,
1805                           zstring &out)
1806 {
1807 #ifndef ZORBA_NO_ICU
1808   utf8::size_type clen;
1809   if(utf8end)
1810   {
1811     while(utf8start < utf8end)
1812     {
1813       clen = utf8::char_length(*sin);
1814       if(clen == 0)
1815         clen = 1;
1816       out.append(sin, clen);
1817       utf8start++;
1818       bytestart += clen;
1819       sin += clen;
1820     }
1821   }
1822   else
1823 #endif
1824   {
1825     if(!utf8end)
1826       utf8end = byteend;
1827     out.append(sin, utf8end-bytestart);
1828     sin += utf8end-bytestart;
1829     utf8start = utf8end;
1830     bytestart = utf8end;
1831   }
1832 }
1833 
addNonMatchElement(store::Item_t & parent,int & match_end1,unsigned int & match_end1_bytes,int match_start2,unsigned int match_start2_bytes,const char * & strin)1834 static void addNonMatchElement(store::Item_t &parent,
1835                                int &match_end1,
1836                                unsigned int &match_end1_bytes,
1837                                int match_start2,
1838                                unsigned int match_start2_bytes,
1839                                const char *&strin)
1840 {
1841   store::Item_t non_match_elem;
1842   store::Item_t non_match_element_name;
1843   store::Item_t untyped_type_name;
1844   store::NsBindings   ns_binding;
1845   zstring baseURI;
1846   GENV_ITEMFACTORY->createQName(untyped_type_name,
1847                                 XML_SCHEMA_NS, XML_SCHEMA_PREFIX, "untyped");
1848   GENV_ITEMFACTORY->createQName(non_match_element_name,
1849                                 static_context::W3C_FN_NS, "fn", "non-match");
1850   GENV_ITEMFACTORY->createElementNode(non_match_elem, parent, non_match_element_name, untyped_type_name, false, false, ns_binding, baseURI);
1851   //utf8_it += (match_start2 - match_end1);
1852   zstring                non_match_str;
1853   //utf8_string<zstring>   non_match_utf8(non_match_str);
1854   //while(match_end1 < match_start2)
1855   //{
1856   //  non_match_utf8 += *utf8_it;
1857   //  utf8_it++;
1858   //  match_end1++;
1859   //}
1860   copyUtf8Chars(strin, match_end1, match_end1_bytes, match_start2, match_start2_bytes, non_match_str);
1861   store::Item_t non_match_text_item;
1862   GENV_ITEMFACTORY->createTextNode(non_match_text_item, non_match_elem, non_match_str);
1863 }
1864 
addGroupElement(store::Item_t & parent,store::Item_t & untyped_type_name,store::NsBindings & ns_binding,zstring & baseURI,int match_start2,int match_end2,unsigned int & match_end1_bytes,const char * & sin,unicode::regex & rx,int gparent,std::vector<int> & group_parent,int nr_pattern_groups,int & i)1865 static void addGroupElement(store::Item_t &parent,
1866                             store::Item_t &untyped_type_name,
1867                             store::NsBindings   &ns_binding,
1868                             zstring &baseURI,
1869                             int match_start2,
1870                             int match_end2,
1871                             unsigned int &match_end1_bytes,
1872                             const char *&sin,
1873                             unicode::regex &rx,
1874                             int gparent,
1875                             std::vector<int> &group_parent,
1876                             int nr_pattern_groups,
1877                             int &i)
1878 {
1879   int    match_startg = match_start2;
1880   int    match_endg = match_start2;
1881   int    match_endgood = match_start2;
1882   store::Item_t group_element_name;
1883   store::Item_t nr_attrib_name;
1884   for(i=i+1;i<nr_pattern_groups;i++)
1885   {
1886     if(group_parent[i] < gparent)
1887     {
1888       i--;
1889       break;
1890     }
1891 #ifndef ZORBA_NO_ICU
1892     match_startg = rx.get_match_start(i+1);
1893     if((match_startg < 0) && (gparent < 0))
1894       continue;
1895 #else
1896     int temp_endg;
1897     match_startg = -1;
1898     temp_endg = -1;
1899     if(!rx.get_match_start_end_bytes(i+1, &match_startg, &temp_endg) && (gparent < 0))
1900       continue;
1901 #endif
1902     if(match_endgood < match_startg)
1903     {
1904       //add non-group match text
1905       zstring                non_group_str;
1906 
1907       copyUtf8Chars(sin, match_endgood, match_end1_bytes, match_startg, 0, non_group_str);
1908       store::Item_t non_group_text_item;
1909       GENV_ITEMFACTORY->createTextNode(non_group_text_item, parent.getp(), non_group_str);
1910     }
1911 #ifndef ZORBA_NO_ICU
1912     match_endg = rx.get_match_end(i+1);
1913 #else
1914     match_endg = temp_endg;
1915 #endif
1916     //add group match text
1917     GENV_ITEMFACTORY->createQName(group_element_name,
1918                                   static_context::W3C_FN_NS, "fn", "group");
1919     GENV_ITEMFACTORY->createQName(nr_attrib_name,
1920                                   "", "", "nr");
1921     store::Item_t group_elem;
1922     GENV_ITEMFACTORY->createElementNode(group_elem, parent, group_element_name, untyped_type_name, false, false, ns_binding, baseURI);
1923     char strid[40];
1924     sprintf(strid, "%d", i+1);
1925     zstring zstrid(strid);
1926     store::Item_t strid_item;
1927     GENV_ITEMFACTORY->createString(strid_item, zstrid);
1928     store::Item_t id_attrib_item;
1929     GENV_ITEMFACTORY->createAttributeNode(id_attrib_item, group_elem.getp(), nr_attrib_name, untyped_type_name, strid_item);
1930     if((match_startg < 0) || (match_startg < match_endgood))
1931       continue;
1932     match_endgood = match_endg;
1933     if((i+1)<nr_pattern_groups)
1934     {
1935       if(group_parent[i+1] > gparent)
1936       {
1937         addGroupElement(group_elem, untyped_type_name, ns_binding, baseURI,
1938                         match_startg, match_endg, match_end1_bytes,
1939                         sin, rx,
1940                         i, group_parent, nr_pattern_groups, i);
1941         continue;
1942       }
1943     }
1944     zstring                group_str;
1945 
1946     copyUtf8Chars(sin, match_startg, match_end1_bytes, match_endg, 0, group_str);
1947     store::Item_t group_text_item;
1948     GENV_ITEMFACTORY->createTextNode(group_text_item, group_elem.getp(), group_str);
1949   }
1950   //add last non-group match
1951   if(match_endgood < match_end2)
1952   {
1953     zstring                non_group_str;
1954 
1955     copyUtf8Chars(sin, match_endgood, match_end1_bytes, match_end2, 0, non_group_str);
1956     store::Item_t non_group_text_item;
1957     GENV_ITEMFACTORY->createTextNode(non_group_text_item, parent, non_group_str);
1958   }
1959 }
1960 
addMatchElement(store::Item_t & parent,int match_start2,unsigned int & match_end1_bytes,int match_end2,const char * & sin,unicode::regex & rx,std::vector<int> & group_parent,int nr_pattern_groups)1961 static void addMatchElement(store::Item_t &parent,
1962                     int match_start2,
1963                     unsigned int &match_end1_bytes,
1964                     int match_end2,
1965                     //utf8_string<zstring_p>::const_iterator& utf8_it,
1966                     const char *&sin,
1967                     unicode::regex &rx,
1968                     std::vector<int> &group_parent,
1969                     int nr_pattern_groups)
1970 {
1971   store::Item_t match_element_name;
1972   store::Item_t untyped_type_name;
1973   store::NsBindings   ns_binding;
1974   zstring baseURI;
1975   GENV_ITEMFACTORY->createQName(untyped_type_name,
1976                                 XML_SCHEMA_NS, XML_SCHEMA_PREFIX, "untyped");
1977   GENV_ITEMFACTORY->createQName(match_element_name,
1978                                 static_context::W3C_FN_NS, "fn", "match");
1979   store::Item_t match_elem;
1980   GENV_ITEMFACTORY->createElementNode(match_elem, parent, match_element_name, untyped_type_name, false, false, ns_binding, baseURI);
1981   int i = -1;
1982   addGroupElement(match_elem, untyped_type_name, ns_binding, baseURI, match_start2, match_end2, match_end1_bytes, sin, rx, -1, group_parent, nr_pattern_groups, i);
1983 }
1984 
computePatternGroupsParents(zstring & xquery_pattern,std::vector<int> & group_parent)1985 static void computePatternGroupsParents(zstring &xquery_pattern, std::vector<int> &group_parent)
1986 {
1987   utf8_string<zstring>   utf8_pattern(xquery_pattern);
1988   utf8_string<zstring>::const_iterator    c;
1989   std::list<int>    parents;
1990   int i = 0;
1991 
1992   for(c = utf8_pattern.begin(); c != utf8_pattern.end(); c++)
1993   {
1994     if(*c == '\\')
1995     {
1996       c++;
1997       continue;
1998     }
1999     if(*c == '(')
2000     {
2001       //begin group
2002       if(parents.size())
2003         group_parent.push_back(parents.back());
2004       else
2005         group_parent.push_back(-1);
2006       parents.push_back(i);
2007       i++;
2008     }
2009     else if(*c == ')')
2010     {
2011       if(parents.size())
2012         parents.pop_back();
2013     }
2014   }
2015 }
2016 
nextImpl(store::Item_t & result,PlanState & planState) const2017 bool FnAnalyzeStringIterator::nextImpl(
2018     store::Item_t& result,
2019     PlanState& planState) const
2020 {
2021   bool is_input_stream = false;
2022   zstring input;
2023   std::istream *instream = NULL;
2024 #define STREAMBUF_CHUNK_SIZE    4*1024
2025   class SmartCharPtr
2026   {
2027   public:
2028     char *ptr;
2029     SmartCharPtr() : ptr(NULL) {}
2030     ~SmartCharPtr() {if(ptr) ::free(ptr);}
2031   };
2032   SmartCharPtr    streambuf;
2033   zstring::size_type   streambuf_allocated_size = 0;
2034   zstring::size_type   streambuf_read = 0;
2035   //zstring::size_type   streambuf_beg = 0;
2036   zstring xquery_pattern;
2037   zstring flags;
2038   store::Item_t item;
2039 
2040   PlanIteratorState* state;
2041   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
2042 
2043   if (consumeNext(item, theChildren[0].getp(), planState))
2044   {
2045     if(!item->isStreamable())
2046     {
2047       item->getStringValue2(input);
2048     }
2049     else
2050     {
2051       instream = &item->getStream();
2052       is_input_stream = true;
2053     }
2054   }
2055 
2056   if (!consumeNext(item, theChildren[1].getp(), planState))
2057     ZORBA_ASSERT (false);
2058 
2059   item->getStringValue2(xquery_pattern);
2060 
2061   if(theChildren.size() == 3)
2062   {
2063     if (!consumeNext(item, theChildren[2].getp(), planState))
2064       ZORBA_ASSERT (false);
2065 
2066     item->getStringValue2(flags);
2067   }
2068 
2069   try
2070   {
2071     zstring lib_pattern;
2072     convert_xquery_re( xquery_pattern, &lib_pattern, flags.c_str() );
2073 
2074     if(is_input_stream)
2075     {
2076       streambuf.ptr = (char*)malloc(STREAMBUF_CHUNK_SIZE);
2077       streambuf_allocated_size = STREAMBUF_CHUNK_SIZE;
2078       instream->read(streambuf.ptr, streambuf_allocated_size);
2079       streambuf_read = (unsigned int)instream->gcount();
2080       if(streambuf_read == STREAMBUF_CHUNK_SIZE)
2081       {
2082 	// Note: const_reverse_iterator would work here, but does not
2083 	// compile with gcc 4.0.1 (which is the version in Xcode on MacOS 10.5).
2084         zstring::reverse_iterator xqit = xquery_pattern.rbegin();
2085         if((xqit != xquery_pattern.rend()) && (flags.find('m') == std::string::npos))
2086         {
2087           if(*xqit == '$')
2088           {
2089             xqit++;
2090             int bslashes = 0;
2091             while(xqit != xquery_pattern.rend())
2092             {
2093               if(*xqit == '\\')
2094                 bslashes++;
2095               else
2096                 break;
2097             }
2098             if(bslashes%2 == 0)
2099             {
2100               //better read all instream
2101               do{
2102                 streambuf.ptr = (char*)realloc(streambuf.ptr, streambuf_allocated_size+STREAMBUF_CHUNK_SIZE);
2103                 streambuf_allocated_size += STREAMBUF_CHUNK_SIZE;
2104                 instream->read(streambuf.ptr + streambuf_read, STREAMBUF_CHUNK_SIZE);
2105                 streambuf_read += (unsigned int)instream->gcount();
2106               }while(instream->gcount() == STREAMBUF_CHUNK_SIZE);
2107             }
2108           }
2109         }
2110       }
2111     }
2112 
2113     unicode::regex    rx;
2114     rx.compile(lib_pattern, flags.c_str());
2115     int   nr_pattern_groups = rx.get_pattern_group_count();
2116     std::vector<int>    group_parent;
2117     computePatternGroupsParents(xquery_pattern, group_parent);
2118 
2119     //see if regex can match empty strings
2120     bool   reachedEnd = false;
2121     rx.set_string("", 0);
2122     if(rx.find_next_match(&reachedEnd))
2123     {
2124       throw XQUERY_EXCEPTION(
2125         err::FORX0003, ERROR_PARAMS( lib_pattern )
2126       );
2127 
2128     }
2129 
2130     store::Item_t null_parent;
2131     store::Item_t result_element_name;
2132     store::Item_t untyped_type_name;
2133     store::NsBindings   ns_binding;
2134     zstring baseURI;
2135     GENV_ITEMFACTORY->createQName(untyped_type_name,
2136                                   XML_SCHEMA_NS, XML_SCHEMA_PREFIX, "untyped");
2137     GENV_ITEMFACTORY->createQName(result_element_name,
2138                                   static_context::W3C_FN_NS, "fn", "analyze-string-result");
2139     GENV_ITEMFACTORY->createElementNode(result, NULL, result_element_name, untyped_type_name, false, false, ns_binding, baseURI);
2140 
2141     int nr_retry = 0;
2142     reachedEnd = false;
2143     do
2144     {
2145       const char *instr;
2146       if(!is_input_stream)
2147       {
2148         rx.set_string(input.data(), input.size());
2149         instr = input.c_str();
2150         streambuf_read = input.size();
2151       }
2152       else
2153       {
2154         unsigned int reducebytes = 0;
2155         if(!instream->eof())
2156         {
2157           //check the last bytes, maybe it is a truncated utf8 char
2158           unsigned int maxbytes = 6;
2159           if(maxbytes > streambuf_read)
2160             maxbytes = streambuf_read;
2161           for(reducebytes=1;reducebytes<=maxbytes;reducebytes++)
2162           {
2163             utf8::size_type clen = utf8::char_length(streambuf.ptr[streambuf_read-reducebytes]);
2164             if((clen > 1) && (clen > reducebytes))
2165               break;
2166           }
2167           if(reducebytes == (maxbytes+1))
2168             reducebytes = 0;
2169         }
2170         rx.set_string(streambuf.ptr, streambuf_read-reducebytes);
2171         instr = streambuf.ptr;
2172       }
2173       //zstring_p zinstr(instr);
2174       //utf8_string<zstring_p>  utf8_instr(zinstr);
2175       //utf8_string<zstring_p>::const_iterator    utf8_it = utf8_instr.begin();
2176 
2177       //int    match_start1 = 0;
2178       int    match_end1 = 0;
2179       unsigned int    match_end1_bytes = 0;
2180       reachedEnd = false;
2181       while(rx.find_next_match(&reachedEnd))
2182       {
2183         int    match_start2;
2184         int    match_end2;
2185 #ifndef ZORBA_NO_ICU
2186         match_start2 = rx.get_match_start();
2187         match_end2 = rx.get_match_end();
2188 #else
2189         rx.get_match_start_end_bytes(0, &match_start2, &match_end2);
2190 #endif
2191         ZORBA_ASSERT(match_start2 >= 0);
2192 
2193         if(is_input_stream && reachedEnd && !instream->eof())
2194         {
2195           //load some more data, maybe the match will be different
2196           break;
2197         }
2198 
2199         //construct the fn:non-match
2200         if(match_start2 > match_end1)
2201         {
2202           addNonMatchElement(result, match_end1, match_end1_bytes, match_start2, 0, instr);
2203         }
2204 
2205         //construct the fn:match
2206         addMatchElement(result, match_start2, match_end1_bytes, match_end2, instr, rx, group_parent, nr_pattern_groups);
2207         match_end1 = match_end2;
2208       }
2209 
2210       if(is_input_stream && !instream->eof())
2211       {
2212         //load some more data, maybe the match will be different
2213         if(match_end1_bytes)
2214         {
2215           memmove(streambuf.ptr, streambuf.ptr+match_end1_bytes, streambuf_read-match_end1_bytes);
2216           streambuf_read -= match_end1_bytes;
2217           nr_retry = 0;
2218         }
2219         else
2220           nr_retry++;
2221         if(!match_end1_bytes && (nr_retry == 2))
2222         {
2223           if(streambuf_allocated_size > streambuf_read)
2224           {
2225             instream->read(streambuf.ptr + streambuf_read, streambuf_allocated_size - streambuf_read);
2226             streambuf_read += (unsigned int)instream->gcount();
2227           }
2228           //better read all instream
2229           while(!instream->eof())
2230           {
2231             streambuf.ptr = (char*)realloc(streambuf.ptr, streambuf_allocated_size+STREAMBUF_CHUNK_SIZE);
2232             instream->read(streambuf.ptr + streambuf_read, STREAMBUF_CHUNK_SIZE);
2233             streambuf_read += (unsigned int)instream->gcount();
2234             streambuf_allocated_size += STREAMBUF_CHUNK_SIZE;
2235           }
2236         }
2237         else
2238         {
2239           //read some more data from instream
2240           if(streambuf_allocated_size > streambuf_read)
2241           {
2242             instream->read(streambuf.ptr + streambuf_read, streambuf_allocated_size - streambuf_read);
2243             streambuf_read += (unsigned int)instream->gcount();
2244           }
2245           else
2246           {
2247             streambuf.ptr = (char*)realloc(streambuf.ptr, streambuf_allocated_size+STREAMBUF_CHUNK_SIZE);
2248             instream->read(streambuf.ptr + streambuf_read, STREAMBUF_CHUNK_SIZE);
2249             streambuf_read += (unsigned int)instream->gcount();
2250             streambuf_allocated_size += STREAMBUF_CHUNK_SIZE;
2251           }
2252         }
2253         reachedEnd = false;
2254       }
2255       else
2256       {
2257         if(match_end1_bytes < streambuf_read)
2258           addNonMatchElement(result, match_end1, match_end1_bytes, 0, streambuf_read, instr);
2259         if(is_input_stream && instream->eof())
2260           reachedEnd = true;
2261       }
2262 
2263     }while(is_input_stream && !reachedEnd);
2264   }
2265   catch(XQueryException& ex)
2266   {
2267     set_source( ex, loc );
2268     throw;
2269   }
2270 
2271   STACK_PUSH(true, state);
2272 
2273   STACK_END(state);
2274 }
2275 
2276 
2277 /**
2278  *______________________________________________________________________
2279  *
2280  * http://www.zorba-xquery.com/modules/string
2281  * string:materialize
2282  */
2283 
nextImpl(store::Item_t & result,PlanState & planState) const2284 bool StringMaterializeIterator::nextImpl(
2285     store::Item_t& result,
2286     PlanState& planState) const
2287 {
2288   store::Item_t item;
2289   zstring       lString;
2290 
2291   PlanIteratorState* state;
2292   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
2293 
2294 #ifndef NDEBUG
2295   assert(consumeNext(item, theChildren[0].getp(), planState));
2296 #else
2297   consumeNext(item, theChildren[0].getp(), planState);
2298 #endif
2299   if (item->isStreamable()) {
2300     lString = item->getString();
2301     STACK_PUSH(GENV_ITEMFACTORY->createString(result, lString), state);
2302   } else {
2303     result = item;
2304     STACK_PUSH(result != 0 , state);
2305   }
2306 
2307   STACK_END(state);
2308 }
2309 
2310 /**
2311  *______________________________________________________________________
2312  *
2313  * http://www.zorba-xquery.com/modules/string
2314  * string:materialize
2315  */
nextImpl(store::Item_t & result,PlanState & planState) const2316 bool StringIsStreamableIterator::nextImpl(
2317     store::Item_t& result,
2318     PlanState& planState) const
2319 {
2320   store::Item_t item;
2321 
2322   PlanIteratorState* state;
2323   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
2324 
2325 #ifndef NDEBUG
2326   assert(consumeNext(item, theChildren[0].getp(), planState));
2327 #else
2328   consumeNext(item, theChildren[0].getp(), planState);
2329 #endif
2330   STACK_PUSH(GENV_ITEMFACTORY->createBoolean(result, item->isStreamable()), state);
2331 
2332   STACK_END(state);
2333 }
2334 
2335 /**
2336  *______________________________________________________________________
2337  *
2338  * http://www.zorba-xquery.com/modules/string
2339  * string:split
2340  */
nextImpl(store::Item_t & result,PlanState & planState) const2341 bool StringSplitIterator::nextImpl(
2342     store::Item_t& result,
2343     PlanState& planState) const
2344 {
2345   store::Item_t item;
2346   size_t lNewPos = 0;
2347   zstring lToken;
2348   zstring lPartialMatch;
2349 
2350   StringSplitIteratorState* state;
2351   DEFAULT_STACK_INIT(StringSplitIteratorState, state, planState);
2352 
2353   // init phase, get input string and tokens
2354   consumeNext(item, theChildren[0].getp(), planState);
2355 
2356   if (item->isStreamable())
2357   {
2358     state->theIStream = &item->getStream();
2359   }
2360   else
2361   {
2362     state->theIStream = 0;
2363     item->getStringValue2(state->theInput);
2364   }
2365 
2366   consumeNext(item, theChildren[1].getp(), planState);
2367 
2368   item->getStringValue2(state->theSeparator);
2369 
2370   // working phase, do the tokenization
2371   if (state->theIStream)
2372   {
2373     while ( !state->theIStream->eof() )
2374     {
2375       utf8::encoded_char_type ec;
2376       memset( ec, '\0' , sizeof(ec) );
2377       utf8::storage_type *p;
2378       p = ec;
2379 
2380       if ( utf8::read( *state->theIStream, ec ) != utf8::npos )
2381       {
2382         if (state->theSeparator.compare(lNewPos, 1, ec) == 0)
2383         {
2384           if (++lNewPos == state->theSeparator.length())
2385           {
2386             GENV_ITEMFACTORY->createString(result, lToken);
2387             STACK_PUSH(true, state);
2388           }
2389           else
2390           {
2391             lPartialMatch.append(ec);
2392           }
2393         }
2394         else
2395         {
2396           lToken.append(lPartialMatch);
2397           lToken.append(ec);
2398         }
2399       }
2400       else
2401       {
2402         if (state->theIStream->good())
2403         {
2404           char buf[ 6 /* bytes at most */ * 5 /* chars per byte */ ], *b = buf;
2405           bool first = true;
2406           for ( ; *p; ++p ) {
2407             if ( first )
2408               first = false;
2409             else
2410               *b++ = ',';
2411             ::strcpy( b, "0x" );          b += 2;
2412             ::sprintf( b, "%0hhX", *p );  b += 2;
2413           }
2414           throw XQUERY_EXCEPTION(
2415             zerr::ZXQD0006_INVALID_UTF8_BYTE_SEQUENCE,
2416             ERROR_PARAMS( buf ),
2417             ERROR_LOC( loc )
2418           );
2419         }
2420         if (!lToken.empty())
2421         {
2422           GENV_ITEMFACTORY->createString(result, lToken);
2423           STACK_PUSH(true, state);
2424         }
2425         break;
2426       }
2427     }
2428   }
2429   else
2430   {
2431     while (true)
2432     {
2433       if (state->theNextStartPos == zstring::npos)
2434       {
2435         break;
2436       }
2437 
2438       lNewPos =
2439         state->theInput.find(state->theSeparator, state->theNextStartPos);
2440       if (lNewPos != zstring::npos)
2441       {
2442         zstring lSubStr = state->theInput.substr(
2443             state->theNextStartPos,
2444             lNewPos - state->theNextStartPos);
2445         GENV_ITEMFACTORY->createString(result, lSubStr);
2446         state->theNextStartPos =
2447           lNewPos==state->theInput.length() - state->theSeparator.length()
2448           ? zstring::npos
2449           : lNewPos + state->theSeparator.length();
2450       }
2451       else
2452       {
2453         zstring lSubStr = state->theInput.substr(state->theNextStartPos);
2454         GENV_ITEMFACTORY->createString(result, lSubStr);
2455         state->theNextStartPos = zstring::npos;
2456       }
2457       STACK_PUSH(true, state);
2458     }
2459   }
2460 
2461   STACK_END(state);
2462 }
2463 } // namespace zorba
2464 /* vim:set et sw=2 ts=2: */
2465