1 /* Copyright(C) 2004-2005 Brazil
2 
3   This library is free software; you can redistribute it and/or
4   modify it under the terms of the GNU Lesser General Public
5   License as published by the Free Software Foundation; either
6   version 2.1 of the License, or (at your option) any later version.
7 
8   This library is distributed in the hope that it will be useful,
9   but WITHOUT ANY WARRANTY; without even the implied warranty of
10   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11   Lesser General Public License for more details.
12 
13   You should have received a copy of the GNU Lesser General Public
14   License along with this library; if not, write to the Free Software
15   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 */
17 #include "senna_in.h"
18 #include <string.h>
19 #include <stddef.h>
20 #include "snip.h"
21 #include "ctx.h"
22 
23 #if !defined MAX
24 #define MAX(a, b) ((a) > (b) ? (a) : (b))
25 #endif
26 
27 #if !defined MIN
28 #define MIN(a, b) ((a) < (b) ? (a) : (b))
29 #endif
30 
31 static int
sen_bm_check_euc(const unsigned char * x,const size_t y)32 sen_bm_check_euc(const unsigned char *x, const size_t y)
33 {
34   const unsigned char *p;
35   for (p = x + y - 1; p >= x && *p >= 0x80U; p--);
36   return (int) ((x + y - p) & 1);
37 }
38 
39 static int
sen_bm_check_sjis(const unsigned char * x,const size_t y)40 sen_bm_check_sjis(const unsigned char *x, const size_t y)
41 {
42   const unsigned char *p;
43   for (p = x + y - 1; p >= x; p--)
44     if ((*p < 0x81U) || (*p > 0x9fU && *p < 0xe0U) || (*p > 0xfcU))
45       break;
46   return (int) ((x + y - p) & 1);
47 }
48 
49 /*
50 static void
51 sen_bm_suffixes(const unsigned char *x, size_t m, size_t *suff)
52 {
53   size_t f, g;
54   intptr_t i;
55   f = 0;
56   suff[m - 1] = m;
57   g = m - 1;
58   for (i = m - 2; i >= 0; --i) {
59     if (i > (intptr_t) g && suff[i + m - 1 - f] < i - g)
60       suff[i] = suff[i + m - 1 - f];
61     else {
62       if (i < (intptr_t) g)
63         g = i;
64       f = i;
65       while (g > 0 && x[g] == x[g + m - 1 - f])
66         --g;
67       suff[i] = f - g;
68     }
69   }
70 }
71 */
72 
73 static void
sen_bm_preBmBc(const unsigned char * x,size_t m,size_t * bmBc)74 sen_bm_preBmBc(const unsigned char *x, size_t m, size_t *bmBc)
75 {
76   size_t i;
77   for (i = 0; i < ASIZE; ++i) {
78     bmBc[i] = m;
79   }
80   for (i = 0; i < m - 1; ++i) {
81     bmBc[(unsigned int) x[i]] = m - (i + 1);
82   }
83 }
84 
85 #define SEN_BM_COMPARE \
86   if (object->checks[found]) { \
87     size_t offset = cond->last_offset, found_alpha_head = cond->found_alpha_head; \
88     /* calc real offset */\
89     for (i = cond->last_found; i < found; i++) { \
90       if (object->checks[i] > 0) { \
91         found_alpha_head = i; \
92         offset += object->checks[i]; \
93       } \
94     } \
95     /* if real offset is in a character, move it the head of the character */ \
96     if (object->checks[found] < 0) { \
97       offset -= object->checks[found_alpha_head]; \
98       cond->last_found = found_alpha_head; \
99     } else { \
100       cond->last_found = found; \
101     } \
102     cond->start_offset = cond->last_offset = offset; \
103     if (flags & SEN_SNIP_SKIP_LEADING_SPACES) { \
104       while (cond->start_offset < object->orig_blen && \
105              (i = sen_isspace(object->orig + cond->start_offset, \
106                               object->encoding))) { cond->start_offset += i; } \
107     } \
108     for (i = cond->last_found; i < found + m; i++) { \
109       if (object->checks[i] > 0) { \
110         offset += object->checks[i]; \
111       } \
112     } \
113     cond->end_offset = offset; \
114     cond->found = found + shift; \
115     cond->found_alpha_head = found_alpha_head; \
116     /* printf("bm: cond:%p found:%zd last_found:%zd st_off:%zd ed_off:%zd\n", cond, cond->found,cond->last_found,cond->start_offset,cond->end_offset); */ \
117     return; \
118   }
119 
120 #define SEN_BM_BM_COMPARE \
121 { \
122   if (p[-2] == ck) { \
123     for (i = 3; i <= m && p[-(intptr_t)i] == cp[-(intptr_t)i]; ++i) { \
124     } \
125     if (i > m) { \
126       found = p - y - m; \
127       SEN_BM_COMPARE; \
128     } \
129   } \
130 }
131 
132 void
sen_bm_tunedbm(snip_cond * cond,sen_nstr * object,int flags)133 sen_bm_tunedbm(snip_cond *cond, sen_nstr *object, int flags)
134 {
135   register unsigned char *limit, ck;
136   register const unsigned char *p, *cp;
137   register size_t *bmBc, delta1, i;
138 
139   const unsigned char *x;
140   unsigned char *y;
141   size_t shift, found;
142 
143   const size_t n = object->norm_blen, m = cond->keyword->norm_blen;
144 
145   y = (unsigned char *) object->norm;
146   if (m == 1) {
147     if (n > cond->found) {
148       shift = 1;
149       p = memchr(y + cond->found, cond->keyword->norm[0], n - cond->found);
150       if (p != NULL) {
151         found = p - y;
152         SEN_BM_COMPARE;
153       }
154     }
155     cond->stopflag = SNIPCOND_STOP;
156     return;
157   }
158 
159   x = (unsigned char *) cond->keyword->norm;
160   bmBc = cond->bmBc;
161   shift = cond->shift;
162 
163   /* Restart */
164   p = y + m + cond->found;
165   cp = x + m;
166   ck = cp[-2];
167 
168   /* 12 means 1(initial offset) + 10 (in loop) + 1 (shift) */
169   if (n - cond->found > 12 * m) {
170     limit = y + n - 11 * m;
171     while (p <= limit) {
172       p += bmBc[p[-1]];
173       if(!(delta1 = bmBc[p[-1]])) {
174         goto check;
175       }
176       p += delta1;
177       p += bmBc[p[-1]];
178       p += bmBc[p[-1]];
179       if(!(delta1 = bmBc[p[-1]])) {
180         goto check;
181       }
182       p += delta1;
183       p += bmBc[p[-1]];
184       p += bmBc[p[-1]];
185       if(!(delta1 = bmBc[p[-1]])) {
186         goto check;
187       }
188       p += delta1;
189       p += bmBc[p[-1]];
190       p += bmBc[p[-1]];
191       continue;
192     check:
193       SEN_BM_BM_COMPARE;
194       p += shift;
195     }
196   }
197   /* limit check + search */
198   limit = y + n;
199   while(p <= limit) {
200     if (!(delta1 = bmBc[p[-1]])) {
201       SEN_BM_BM_COMPARE;
202       p += shift;
203     }
204     p += delta1;
205   }
206   cond->stopflag = SNIPCOND_STOP;
207 }
208 
209 static size_t
count_mapped_chars(const char * str,const char * end)210 count_mapped_chars(const char *str, const char *end)
211 {
212   const char *p;
213   size_t dl;
214 
215   dl = 0;
216   for (p = str; p != end; p++) {
217     switch (*p) {
218     case '<':
219     case '>':
220       dl += 4;                  /* &lt; or &gt; */
221       break;
222     case '&':
223       dl += 5;                  /* &amp; */
224       break;
225     case '"':
226       dl += 6;                  /* &quot; */
227       break;
228     default:
229       dl++;
230       break;
231     }
232   }
233   return dl;
234 }
235 
236 sen_rc
sen_snip_cond_close(snip_cond * cond)237 sen_snip_cond_close(snip_cond *cond)
238 {
239   if (!cond) {
240     return sen_invalid_argument;
241   }
242   if (cond->keyword) {
243     sen_nstr_close(cond->keyword);
244   }
245   return sen_success;
246 }
247 
248 sen_rc
sen_snip_cond_init(snip_cond * sc,const char * keyword,unsigned int keyword_len,sen_encoding enc,int flags)249 sen_snip_cond_init(snip_cond *sc, const char *keyword, unsigned int keyword_len,
250                 sen_encoding enc, int flags)
251 {
252   size_t norm_blen;
253   memset(sc, 0, sizeof(snip_cond));
254   if (flags & SEN_SNIP_NORMALIZE) {
255     if (!(sc->keyword = sen_nstr_open(keyword, keyword_len,
256                                       enc, SEN_STR_REMOVEBLANK))) {
257       SEN_LOG(sen_log_alert, "sen_nstr_open on snip_cond_init failed !");
258       return sen_memory_exhausted;
259     }
260   } else {
261     if (!(sc->keyword = sen_fakenstr_open(keyword, keyword_len,
262                                           enc, SEN_STR_REMOVEBLANK))) {
263       SEN_LOG(sen_log_alert, "sen_fakenstr_open on snip_cond_init failed !");
264       return sen_memory_exhausted;
265     }
266   }
267   norm_blen = sc->keyword->norm_blen; /* byte length, not cond->keyword->length */
268   if (!norm_blen) {
269     sen_snip_cond_close(sc);
270     return sen_invalid_argument;
271   }
272   if (norm_blen != 1) {
273     sen_bm_preBmBc((unsigned char *)sc->keyword->norm, norm_blen, sc->bmBc);
274     sc->shift = sc->bmBc[(unsigned char)sc->keyword->norm[norm_blen - 1]];
275     sc->bmBc[(unsigned char)sc->keyword->norm[norm_blen - 1]] = 0;
276   }
277   return sen_success;
278 }
279 
280 void
sen_snip_cond_reinit(snip_cond * cond)281 sen_snip_cond_reinit(snip_cond *cond)
282 {
283   cond->found = 0;
284   cond->last_found = 0;
285   cond->last_offset = 0;
286   cond->start_offset = 0;
287   cond->end_offset = 0;
288 
289   cond->count = 0;
290   cond->stopflag = SNIPCOND_NONSTOP;
291 }
292 
293 sen_rc
sen_snip_add_cond(sen_snip * snip,const char * keyword,unsigned int keyword_len,const char * opentag,unsigned int opentag_len,const char * closetag,unsigned int closetag_len)294 sen_snip_add_cond(sen_snip *snip,
295                   const char *keyword, unsigned int keyword_len,
296                   const char *opentag, unsigned int opentag_len,
297                   const char *closetag, unsigned int closetag_len)
298 {
299   sen_rc rc;
300   snip_cond *cond;
301   sen_ctx *ctx = &sen_gctx; /* todo : replace it with the local ctx */
302 
303   if (!snip || !keyword || !keyword_len || snip->cond_len >= MAX_SNIP_COND_COUNT) {
304     return sen_invalid_argument;
305   }
306   cond = snip->cond + snip->cond_len;
307   if ((rc = sen_snip_cond_init(cond, keyword, keyword_len,
308                                snip->encoding, snip->flags))) {
309     return rc;
310   }
311   if (cond->keyword->norm_blen > snip->width) {
312     sen_snip_cond_close(cond);
313     return sen_invalid_argument;
314   }
315   if (opentag) {
316     if (snip->flags & SEN_SNIP_COPY_TAG) {
317       char *t = SEN_MALLOC(opentag_len + 1);
318       if (!t) {
319         sen_snip_cond_close(cond);
320         return sen_memory_exhausted;
321       }
322       memcpy(t, opentag, opentag_len);
323       t[opentag_len]= '\0'; /* not required, but for ql use */
324       cond->opentag = t;
325     } else {
326       cond->opentag = opentag;
327     }
328     cond->opentag_len = opentag_len;
329   } else {
330     cond->opentag = snip->defaultopentag;
331     cond->opentag_len = snip->defaultopentag_len;
332   }
333   if (closetag) {
334     if (snip->flags & SEN_SNIP_COPY_TAG) {
335       char *t = SEN_MALLOC(closetag_len + 1);
336       if (!t) {
337         if (opentag) { SEN_FREE((void *)cond->opentag); }
338         return sen_memory_exhausted;
339       }
340       memcpy(t, closetag, closetag_len);
341       t[closetag_len]= '\0'; /* not required, but for ql use */
342       cond->closetag = t;
343     } else {
344       cond->closetag = closetag;
345     }
346     cond->closetag_len = closetag_len;
347   } else {
348     cond->closetag = snip->defaultclosetag;
349     cond->closetag_len = snip->defaultclosetag_len;
350   }
351   snip->cond_len++;
352   return sen_success;
353 }
354 
355 static size_t
sen_snip_find_firstbyte(const char * string,sen_encoding encoding,size_t offset,size_t doffset)356 sen_snip_find_firstbyte(const char *string, sen_encoding encoding, size_t offset,
357                         size_t doffset)
358 {
359   switch (encoding) {
360   case sen_enc_euc_jp:
361     while (!(sen_bm_check_euc((unsigned char *) string, offset)))
362       offset += doffset;
363     break;
364   case sen_enc_sjis:
365     if (!(sen_bm_check_sjis((unsigned char *) string, offset)))
366       offset += doffset;
367     break;
368   case sen_enc_utf8:
369     while (string[offset] <= (char)0xc0)
370       offset += doffset;
371     break;
372   default:
373     break;
374   }
375   return offset;
376 }
377 
378 sen_snip *
sen_snip_open(sen_encoding encoding,int flags,unsigned int width,unsigned int max_results,const char * defaultopentag,unsigned int defaultopentag_len,const char * defaultclosetag,unsigned int defaultclosetag_len,sen_snip_mapping * mapping)379 sen_snip_open(sen_encoding encoding, int flags, unsigned int width,
380               unsigned int max_results,
381               const char *defaultopentag, unsigned int defaultopentag_len,
382               const char *defaultclosetag, unsigned int defaultclosetag_len,
383               sen_snip_mapping *mapping)
384 {
385   sen_ctx *ctx = &sen_gctx; /* todo : replace it with the local ctx */
386   sen_snip *ret = NULL;
387   if (!(ret = SEN_MALLOC(sizeof(sen_snip)))) {
388     SEN_LOG(sen_log_alert, "sen_snip allocation failed on sen_snip_open");
389     return NULL;
390   }
391   if (max_results > MAX_SNIP_RESULT_COUNT || max_results == 0) {
392     SEN_LOG(sen_log_warning, "max_results is invalid on sen_snip_open");
393     return NULL;
394   }
395   ret->encoding = encoding;
396   ret->flags = flags;
397   ret->width = width;
398   ret->max_results = max_results;
399   if (flags & SEN_SNIP_COPY_TAG) {
400     char *t;
401     t = SEN_MALLOC(defaultopentag_len + 1);
402     if (!t) {
403       SEN_FREE(ret);
404       return NULL;
405     }
406     memcpy(t, defaultopentag, defaultopentag_len);
407     t[defaultopentag_len]= '\0'; /* not required, but for ql use */
408     ret->defaultopentag = t;
409 
410     t = SEN_MALLOC(defaultclosetag_len + 1);
411     if (!t) {
412       SEN_FREE((void *)ret->defaultopentag);
413       SEN_FREE(ret);
414       return NULL;
415     }
416     memcpy(t, defaultclosetag, defaultclosetag_len);
417     t[defaultclosetag_len]= '\0'; /* not required, but for ql use */
418     ret->defaultclosetag = t;
419   } else {
420     ret->defaultopentag = defaultopentag;
421     ret->defaultclosetag = defaultclosetag;
422   }
423   ret->defaultopentag_len = defaultopentag_len;
424   ret->defaultclosetag_len = defaultclosetag_len;
425   ret->cond_len = 0;
426   ret->mapping = mapping;
427   ret->nstr = NULL;
428   ret->tag_count = 0;
429   ret->snip_count = 0;
430 
431   return ret;
432 }
433 
434 static sen_rc
exec_clean(sen_snip * snip)435 exec_clean(sen_snip *snip)
436 {
437   snip_cond *cond, *cond_end;
438   if (snip->nstr) {
439     sen_nstr_close(snip->nstr);
440     snip->nstr = NULL;
441   }
442   snip->tag_count = 0;
443   snip->snip_count = 0;
444   for (cond = snip->cond, cond_end = cond + snip->cond_len;
445        cond < cond_end; cond++) {
446     sen_snip_cond_reinit(cond);
447   }
448   return sen_success;
449 }
450 
451 sen_rc
sen_snip_close(sen_snip * snip)452 sen_snip_close(sen_snip *snip)
453 {
454   sen_ctx *ctx = &sen_gctx; /* todo : replace it with the local ctx */
455   snip_cond *cond, *cond_end;
456   if (!snip) { return sen_invalid_argument; }
457   if (snip->flags & SEN_SNIP_COPY_TAG) {
458     int i;
459     snip_cond *sc;
460     const char *dot = snip->defaultopentag, *dct = snip->defaultclosetag;
461     for (i = snip->cond_len, sc = snip->cond; i; i--, sc++) {
462       if (sc->opentag != dot) { SEN_FREE((void *)sc->opentag); }
463       if (sc->closetag != dct) { SEN_FREE((void *)sc->closetag); }
464     }
465     if (dot) { SEN_FREE((void *)dot); }
466     if (dct) { SEN_FREE((void *)dct); }
467   }
468   if (snip->nstr) {
469     sen_nstr_close(snip->nstr);
470   }
471   for (cond = snip->cond, cond_end = cond + snip->cond_len;
472        cond < cond_end; cond++) {
473     sen_snip_cond_close(cond);
474   }
475   SEN_FREE(snip);
476   return sen_success;
477 }
478 
479 sen_rc
sen_snip_exec(sen_snip * snip,const char * string,unsigned int string_len,unsigned int * nresults,unsigned int * max_tagged_len)480 sen_snip_exec(sen_snip *snip, const char *string, unsigned int string_len,
481               unsigned int *nresults, unsigned int *max_tagged_len)
482 {
483   size_t i;
484   if (!snip || !string) {
485     return sen_invalid_argument;
486   }
487   exec_clean(snip);
488   *nresults = 0;
489   if (snip->flags & SEN_SNIP_NORMALIZE) {
490     snip->nstr =
491       sen_nstr_open(string, string_len, snip->encoding,
492                     SEN_STR_WITH_CHECKS | SEN_STR_REMOVEBLANK);
493   } else {
494     snip->nstr =
495       sen_fakenstr_open(string, string_len, snip->encoding,
496                         SEN_STR_WITH_CHECKS | SEN_STR_REMOVEBLANK);
497   }
498   if (!snip->nstr) {
499     exec_clean(snip);
500     SEN_LOG(sen_log_alert, "sen_nstr_open on sen_snip_exec failed !");
501     return sen_memory_exhausted;
502   }
503   for (i = 0; i < snip->cond_len; i++) {
504     sen_bm_tunedbm(snip->cond + i, snip->nstr, snip->flags);
505   }
506 
507   {
508     _snip_tag_result *tag_result = snip->tag_result;
509     _snip_result *snip_result = snip->snip_result;
510     size_t last_end_offset = 0, last_last_end_offset = 0;
511     unsigned int unfound_cond_count = snip->cond_len;
512 
513     *max_tagged_len = 0;
514     while (1) {
515       size_t tagged_len = 0, last_tag_end = 0;
516       int_least8_t all_stop = 1, found_cond = 0;
517       snip_result->tag_count = 0;
518 
519       while (1) {
520         size_t min_start_offset = (size_t) -1;
521         size_t max_end_offset = 0;
522         snip_cond *cond = NULL;
523 
524         /* get condition which have minimum offset and is not stopped */
525         for (i = 0; i < snip->cond_len; i++) {
526           if (snip->cond[i].stopflag == SNIPCOND_NONSTOP &&
527               (min_start_offset > snip->cond[i].start_offset ||
528                (min_start_offset == snip->cond[i].start_offset &&
529                 max_end_offset < snip->cond[i].end_offset))) {
530             min_start_offset = snip->cond[i].start_offset;
531             max_end_offset = snip->cond[i].end_offset;
532             cond = &snip->cond[i];
533           }
534         }
535         if (!cond) {
536           break;
537         }
538         /* check whether condtion is the first condition in snippet */
539         if (snip_result->tag_count == 0) {
540           /* skip condition if the number of rest snippet field is smaller than */
541           /* the number of unfound keywords. */
542           if (snip->max_results - *nresults <= unfound_cond_count && cond->count > 0) {
543             int_least8_t exclude_other_cond = 1;
544             for (i = 0; i < snip->cond_len; i++) {
545               if ((snip->cond + i) != cond
546                   && snip->cond[i].end_offset <= cond->start_offset + snip->width
547                   && snip->cond[i].count == 0) {
548                 exclude_other_cond = 0;
549               }
550             }
551             if (exclude_other_cond) {
552               sen_bm_tunedbm(cond, snip->nstr, snip->flags);
553               continue;
554             }
555           }
556           snip_result->start_offset = cond->start_offset;
557           snip_result->first_tag_result_idx = snip->tag_count;
558         } else {
559           if (cond->start_offset >= snip_result->start_offset + snip->width) {
560             break;
561           }
562           /* check nesting to make valid HTML */
563           /* ToDo: allow <test><te>te</te><st>st</st></test> */
564           if (cond->start_offset < last_tag_end) {
565             sen_bm_tunedbm(cond, snip->nstr, snip->flags);
566             continue;
567           }
568         }
569         if (cond->end_offset > snip_result->start_offset + snip->width) {
570           /* If a keyword gets across a snippet, */
571           /* it was skipped and never to be tagged. */
572           cond->stopflag = SNIPCOND_ACROSS;
573           sen_bm_tunedbm(cond, snip->nstr, snip->flags);
574         } else {
575           found_cond = 1;
576           if (cond->count == 0) {
577             unfound_cond_count--;
578           }
579           cond->count++;
580           last_end_offset = cond->end_offset;
581 
582           tag_result->cond = cond;
583           tag_result->start_offset = cond->start_offset;
584           tag_result->end_offset = last_tag_end = cond->end_offset;
585 
586           snip_result->tag_count++;
587           tag_result++;
588           tagged_len += cond->opentag_len + cond->closetag_len;
589           if (++snip->tag_count >= MAX_SNIP_TAG_COUNT) {
590             break;
591           }
592           sen_bm_tunedbm(cond, snip->nstr, snip->flags);
593         }
594       }
595       if (!found_cond) {
596         break;
597       }
598       if (snip_result->start_offset + last_end_offset < snip->width) {
599         snip_result->start_offset = 0;
600       } else {
601         snip_result->start_offset =
602           MAX(MIN
603               ((snip_result->start_offset + last_end_offset - snip->width) / 2,
604                string_len - snip->width), last_last_end_offset);
605       }
606       snip_result->start_offset =
607         sen_snip_find_firstbyte(string, snip->encoding, snip_result->start_offset, 1);
608 
609       snip_result->end_offset = snip_result->start_offset + snip->width;
610       if (snip_result->end_offset < string_len) {
611         snip_result->end_offset =
612           sen_snip_find_firstbyte(string, snip->encoding, snip_result->end_offset, -1);
613       } else {
614         snip_result->end_offset = string_len;
615       }
616       last_last_end_offset = snip_result->end_offset;
617 
618       if (snip->mapping == (sen_snip_mapping *) -1) {
619         tagged_len +=
620           count_mapped_chars(&string[snip_result->start_offset],
621                              &string[snip_result->end_offset]) + 1;
622       } else {
623         tagged_len += snip_result->end_offset - snip_result->start_offset + 1;
624       }
625 
626       *max_tagged_len = MAX(*max_tagged_len, tagged_len);
627 
628       snip_result->last_tag_result_idx = snip->tag_count - 1;
629       (*nresults)++;
630       snip_result++;
631 
632       if (*nresults == snip->max_results || snip->tag_count == MAX_SNIP_TAG_COUNT) {
633         break;
634       }
635       for (i = 0; i < snip->cond_len; i++) {
636         if (snip->cond[i].stopflag != SNIPCOND_STOP) {
637           all_stop = 0;
638           snip->cond[i].stopflag = SNIPCOND_NONSTOP;
639         }
640       }
641       if (all_stop) {
642         break;
643       }
644     }
645   }
646   snip->snip_count = *nresults;
647   snip->string = string;
648 
649   snip->max_tagged_len = *max_tagged_len;
650 
651   return sen_success;
652 }
653 
654 sen_rc
sen_snip_get_result(sen_snip * snip,const unsigned int index,char * result,unsigned int * result_len)655 sen_snip_get_result(sen_snip *snip, const unsigned int index, char *result, unsigned int *result_len)
656 {
657   char *p;
658   size_t i, j, k;
659   _snip_result *sres;
660 
661   if (snip->snip_count <= index || !snip->nstr) {
662     return sen_invalid_argument;
663   }
664 
665   SEN_ASSERT(snip->snip_count != 0 && snip->tag_count != 0);
666 
667   sres = &snip->snip_result[index];
668   j = sres->first_tag_result_idx;
669   for (p = result, i = sres->start_offset; i < sres->end_offset; i++) {
670     for (; j <= sres->last_tag_result_idx && snip->tag_result[j].start_offset == i; j++) {
671       if (snip->tag_result[j].end_offset > sres->end_offset) {
672         continue;
673       }
674       memcpy(p, snip->tag_result[j].cond->opentag, snip->tag_result[j].cond->opentag_len);
675       p += snip->tag_result[j].cond->opentag_len;
676     }
677 
678     if (snip->mapping == (sen_snip_mapping *) -1) {
679       switch (snip->string[i]) {
680       case '<':
681         *p++ = '&';
682         *p++ = 'l';
683         *p++ = 't';
684         *p++ = ';';
685         break;
686       case '>':
687         *p++ = '&';
688         *p++ = 'g';
689         *p++ = 't';
690         *p++ = ';';
691         break;
692       case '&':
693         *p++ = '&';
694         *p++ = 'a';
695         *p++ = 'm';
696         *p++ = 'p';
697         *p++ = ';';
698         break;
699       case '"':
700         *p++ = '&';
701         *p++ = 'q';
702         *p++ = 'u';
703         *p++ = 'o';
704         *p++ = 't';
705         *p++ = ';';
706         break;
707       default:
708         *p++ = snip->string[i];
709         break;
710       }
711     } else {
712       *p++ = snip->string[i];
713     }
714 
715     for (k = sres->last_tag_result_idx;
716          snip->tag_result[k].end_offset <= sres->end_offset; k--) {
717       /* TODO: avoid all loop */
718       if (snip->tag_result[k].end_offset == i + 1) {
719         memcpy(p, snip->tag_result[k].cond->closetag,
720                snip->tag_result[k].cond->closetag_len);
721         p += snip->tag_result[k].cond->closetag_len;
722       }
723       if (k <= sres->first_tag_result_idx) {
724         break;
725       }
726     };
727   }
728   *p = '\0';
729 
730   if(result_len) { *result_len = (unsigned int)(p - result); }
731   SEN_ASSERT((unsigned int)(p - result) <= snip->max_tagged_len);
732 
733   return sen_success;
734 }
735