1 /* Copyright(C) 2004-2005 Brazil
2
3 This library is free software; you can redistribute it and/or
4 modify it under the terms of the GNU Lesser General Public
5 License as published by the Free Software Foundation; either
6 version 2.1 of the License, or (at your option) any later version.
7
8 This library is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 Lesser General Public License for more details.
12
13 You should have received a copy of the GNU Lesser General Public
14 License along with this library; if not, write to the Free Software
15 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17 #include "senna_in.h"
18 #include <string.h>
19 #include <stddef.h>
20 #include "snip.h"
21 #include "ctx.h"
22
23 #if !defined MAX
24 #define MAX(a, b) ((a) > (b) ? (a) : (b))
25 #endif
26
27 #if !defined MIN
28 #define MIN(a, b) ((a) < (b) ? (a) : (b))
29 #endif
30
31 static int
sen_bm_check_euc(const unsigned char * x,const size_t y)32 sen_bm_check_euc(const unsigned char *x, const size_t y)
33 {
34 const unsigned char *p;
35 for (p = x + y - 1; p >= x && *p >= 0x80U; p--);
36 return (int) ((x + y - p) & 1);
37 }
38
39 static int
sen_bm_check_sjis(const unsigned char * x,const size_t y)40 sen_bm_check_sjis(const unsigned char *x, const size_t y)
41 {
42 const unsigned char *p;
43 for (p = x + y - 1; p >= x; p--)
44 if ((*p < 0x81U) || (*p > 0x9fU && *p < 0xe0U) || (*p > 0xfcU))
45 break;
46 return (int) ((x + y - p) & 1);
47 }
48
49 /*
50 static void
51 sen_bm_suffixes(const unsigned char *x, size_t m, size_t *suff)
52 {
53 size_t f, g;
54 intptr_t i;
55 f = 0;
56 suff[m - 1] = m;
57 g = m - 1;
58 for (i = m - 2; i >= 0; --i) {
59 if (i > (intptr_t) g && suff[i + m - 1 - f] < i - g)
60 suff[i] = suff[i + m - 1 - f];
61 else {
62 if (i < (intptr_t) g)
63 g = i;
64 f = i;
65 while (g > 0 && x[g] == x[g + m - 1 - f])
66 --g;
67 suff[i] = f - g;
68 }
69 }
70 }
71 */
72
73 static void
sen_bm_preBmBc(const unsigned char * x,size_t m,size_t * bmBc)74 sen_bm_preBmBc(const unsigned char *x, size_t m, size_t *bmBc)
75 {
76 size_t i;
77 for (i = 0; i < ASIZE; ++i) {
78 bmBc[i] = m;
79 }
80 for (i = 0; i < m - 1; ++i) {
81 bmBc[(unsigned int) x[i]] = m - (i + 1);
82 }
83 }
84
85 #define SEN_BM_COMPARE \
86 if (object->checks[found]) { \
87 size_t offset = cond->last_offset, found_alpha_head = cond->found_alpha_head; \
88 /* calc real offset */\
89 for (i = cond->last_found; i < found; i++) { \
90 if (object->checks[i] > 0) { \
91 found_alpha_head = i; \
92 offset += object->checks[i]; \
93 } \
94 } \
95 /* if real offset is in a character, move it the head of the character */ \
96 if (object->checks[found] < 0) { \
97 offset -= object->checks[found_alpha_head]; \
98 cond->last_found = found_alpha_head; \
99 } else { \
100 cond->last_found = found; \
101 } \
102 cond->start_offset = cond->last_offset = offset; \
103 if (flags & SEN_SNIP_SKIP_LEADING_SPACES) { \
104 while (cond->start_offset < object->orig_blen && \
105 (i = sen_isspace(object->orig + cond->start_offset, \
106 object->encoding))) { cond->start_offset += i; } \
107 } \
108 for (i = cond->last_found; i < found + m; i++) { \
109 if (object->checks[i] > 0) { \
110 offset += object->checks[i]; \
111 } \
112 } \
113 cond->end_offset = offset; \
114 cond->found = found + shift; \
115 cond->found_alpha_head = found_alpha_head; \
116 /* printf("bm: cond:%p found:%zd last_found:%zd st_off:%zd ed_off:%zd\n", cond, cond->found,cond->last_found,cond->start_offset,cond->end_offset); */ \
117 return; \
118 }
119
120 #define SEN_BM_BM_COMPARE \
121 { \
122 if (p[-2] == ck) { \
123 for (i = 3; i <= m && p[-(intptr_t)i] == cp[-(intptr_t)i]; ++i) { \
124 } \
125 if (i > m) { \
126 found = p - y - m; \
127 SEN_BM_COMPARE; \
128 } \
129 } \
130 }
131
132 void
sen_bm_tunedbm(snip_cond * cond,sen_nstr * object,int flags)133 sen_bm_tunedbm(snip_cond *cond, sen_nstr *object, int flags)
134 {
135 register unsigned char *limit, ck;
136 register const unsigned char *p, *cp;
137 register size_t *bmBc, delta1, i;
138
139 const unsigned char *x;
140 unsigned char *y;
141 size_t shift, found;
142
143 const size_t n = object->norm_blen, m = cond->keyword->norm_blen;
144
145 y = (unsigned char *) object->norm;
146 if (m == 1) {
147 if (n > cond->found) {
148 shift = 1;
149 p = memchr(y + cond->found, cond->keyword->norm[0], n - cond->found);
150 if (p != NULL) {
151 found = p - y;
152 SEN_BM_COMPARE;
153 }
154 }
155 cond->stopflag = SNIPCOND_STOP;
156 return;
157 }
158
159 x = (unsigned char *) cond->keyword->norm;
160 bmBc = cond->bmBc;
161 shift = cond->shift;
162
163 /* Restart */
164 p = y + m + cond->found;
165 cp = x + m;
166 ck = cp[-2];
167
168 /* 12 means 1(initial offset) + 10 (in loop) + 1 (shift) */
169 if (n - cond->found > 12 * m) {
170 limit = y + n - 11 * m;
171 while (p <= limit) {
172 p += bmBc[p[-1]];
173 if(!(delta1 = bmBc[p[-1]])) {
174 goto check;
175 }
176 p += delta1;
177 p += bmBc[p[-1]];
178 p += bmBc[p[-1]];
179 if(!(delta1 = bmBc[p[-1]])) {
180 goto check;
181 }
182 p += delta1;
183 p += bmBc[p[-1]];
184 p += bmBc[p[-1]];
185 if(!(delta1 = bmBc[p[-1]])) {
186 goto check;
187 }
188 p += delta1;
189 p += bmBc[p[-1]];
190 p += bmBc[p[-1]];
191 continue;
192 check:
193 SEN_BM_BM_COMPARE;
194 p += shift;
195 }
196 }
197 /* limit check + search */
198 limit = y + n;
199 while(p <= limit) {
200 if (!(delta1 = bmBc[p[-1]])) {
201 SEN_BM_BM_COMPARE;
202 p += shift;
203 }
204 p += delta1;
205 }
206 cond->stopflag = SNIPCOND_STOP;
207 }
208
209 static size_t
count_mapped_chars(const char * str,const char * end)210 count_mapped_chars(const char *str, const char *end)
211 {
212 const char *p;
213 size_t dl;
214
215 dl = 0;
216 for (p = str; p != end; p++) {
217 switch (*p) {
218 case '<':
219 case '>':
220 dl += 4; /* < or > */
221 break;
222 case '&':
223 dl += 5; /* & */
224 break;
225 case '"':
226 dl += 6; /* " */
227 break;
228 default:
229 dl++;
230 break;
231 }
232 }
233 return dl;
234 }
235
236 sen_rc
sen_snip_cond_close(snip_cond * cond)237 sen_snip_cond_close(snip_cond *cond)
238 {
239 if (!cond) {
240 return sen_invalid_argument;
241 }
242 if (cond->keyword) {
243 sen_nstr_close(cond->keyword);
244 }
245 return sen_success;
246 }
247
248 sen_rc
sen_snip_cond_init(snip_cond * sc,const char * keyword,unsigned int keyword_len,sen_encoding enc,int flags)249 sen_snip_cond_init(snip_cond *sc, const char *keyword, unsigned int keyword_len,
250 sen_encoding enc, int flags)
251 {
252 size_t norm_blen;
253 memset(sc, 0, sizeof(snip_cond));
254 if (flags & SEN_SNIP_NORMALIZE) {
255 if (!(sc->keyword = sen_nstr_open(keyword, keyword_len,
256 enc, SEN_STR_REMOVEBLANK))) {
257 SEN_LOG(sen_log_alert, "sen_nstr_open on snip_cond_init failed !");
258 return sen_memory_exhausted;
259 }
260 } else {
261 if (!(sc->keyword = sen_fakenstr_open(keyword, keyword_len,
262 enc, SEN_STR_REMOVEBLANK))) {
263 SEN_LOG(sen_log_alert, "sen_fakenstr_open on snip_cond_init failed !");
264 return sen_memory_exhausted;
265 }
266 }
267 norm_blen = sc->keyword->norm_blen; /* byte length, not cond->keyword->length */
268 if (!norm_blen) {
269 sen_snip_cond_close(sc);
270 return sen_invalid_argument;
271 }
272 if (norm_blen != 1) {
273 sen_bm_preBmBc((unsigned char *)sc->keyword->norm, norm_blen, sc->bmBc);
274 sc->shift = sc->bmBc[(unsigned char)sc->keyword->norm[norm_blen - 1]];
275 sc->bmBc[(unsigned char)sc->keyword->norm[norm_blen - 1]] = 0;
276 }
277 return sen_success;
278 }
279
280 void
sen_snip_cond_reinit(snip_cond * cond)281 sen_snip_cond_reinit(snip_cond *cond)
282 {
283 cond->found = 0;
284 cond->last_found = 0;
285 cond->last_offset = 0;
286 cond->start_offset = 0;
287 cond->end_offset = 0;
288
289 cond->count = 0;
290 cond->stopflag = SNIPCOND_NONSTOP;
291 }
292
293 sen_rc
sen_snip_add_cond(sen_snip * snip,const char * keyword,unsigned int keyword_len,const char * opentag,unsigned int opentag_len,const char * closetag,unsigned int closetag_len)294 sen_snip_add_cond(sen_snip *snip,
295 const char *keyword, unsigned int keyword_len,
296 const char *opentag, unsigned int opentag_len,
297 const char *closetag, unsigned int closetag_len)
298 {
299 sen_rc rc;
300 snip_cond *cond;
301 sen_ctx *ctx = &sen_gctx; /* todo : replace it with the local ctx */
302
303 if (!snip || !keyword || !keyword_len || snip->cond_len >= MAX_SNIP_COND_COUNT) {
304 return sen_invalid_argument;
305 }
306 cond = snip->cond + snip->cond_len;
307 if ((rc = sen_snip_cond_init(cond, keyword, keyword_len,
308 snip->encoding, snip->flags))) {
309 return rc;
310 }
311 if (cond->keyword->norm_blen > snip->width) {
312 sen_snip_cond_close(cond);
313 return sen_invalid_argument;
314 }
315 if (opentag) {
316 if (snip->flags & SEN_SNIP_COPY_TAG) {
317 char *t = SEN_MALLOC(opentag_len + 1);
318 if (!t) {
319 sen_snip_cond_close(cond);
320 return sen_memory_exhausted;
321 }
322 memcpy(t, opentag, opentag_len);
323 t[opentag_len]= '\0'; /* not required, but for ql use */
324 cond->opentag = t;
325 } else {
326 cond->opentag = opentag;
327 }
328 cond->opentag_len = opentag_len;
329 } else {
330 cond->opentag = snip->defaultopentag;
331 cond->opentag_len = snip->defaultopentag_len;
332 }
333 if (closetag) {
334 if (snip->flags & SEN_SNIP_COPY_TAG) {
335 char *t = SEN_MALLOC(closetag_len + 1);
336 if (!t) {
337 if (opentag) { SEN_FREE((void *)cond->opentag); }
338 return sen_memory_exhausted;
339 }
340 memcpy(t, closetag, closetag_len);
341 t[closetag_len]= '\0'; /* not required, but for ql use */
342 cond->closetag = t;
343 } else {
344 cond->closetag = closetag;
345 }
346 cond->closetag_len = closetag_len;
347 } else {
348 cond->closetag = snip->defaultclosetag;
349 cond->closetag_len = snip->defaultclosetag_len;
350 }
351 snip->cond_len++;
352 return sen_success;
353 }
354
355 static size_t
sen_snip_find_firstbyte(const char * string,sen_encoding encoding,size_t offset,size_t doffset)356 sen_snip_find_firstbyte(const char *string, sen_encoding encoding, size_t offset,
357 size_t doffset)
358 {
359 switch (encoding) {
360 case sen_enc_euc_jp:
361 while (!(sen_bm_check_euc((unsigned char *) string, offset)))
362 offset += doffset;
363 break;
364 case sen_enc_sjis:
365 if (!(sen_bm_check_sjis((unsigned char *) string, offset)))
366 offset += doffset;
367 break;
368 case sen_enc_utf8:
369 while (string[offset] <= (char)0xc0)
370 offset += doffset;
371 break;
372 default:
373 break;
374 }
375 return offset;
376 }
377
378 sen_snip *
sen_snip_open(sen_encoding encoding,int flags,unsigned int width,unsigned int max_results,const char * defaultopentag,unsigned int defaultopentag_len,const char * defaultclosetag,unsigned int defaultclosetag_len,sen_snip_mapping * mapping)379 sen_snip_open(sen_encoding encoding, int flags, unsigned int width,
380 unsigned int max_results,
381 const char *defaultopentag, unsigned int defaultopentag_len,
382 const char *defaultclosetag, unsigned int defaultclosetag_len,
383 sen_snip_mapping *mapping)
384 {
385 sen_ctx *ctx = &sen_gctx; /* todo : replace it with the local ctx */
386 sen_snip *ret = NULL;
387 if (!(ret = SEN_MALLOC(sizeof(sen_snip)))) {
388 SEN_LOG(sen_log_alert, "sen_snip allocation failed on sen_snip_open");
389 return NULL;
390 }
391 if (max_results > MAX_SNIP_RESULT_COUNT || max_results == 0) {
392 SEN_LOG(sen_log_warning, "max_results is invalid on sen_snip_open");
393 return NULL;
394 }
395 ret->encoding = encoding;
396 ret->flags = flags;
397 ret->width = width;
398 ret->max_results = max_results;
399 if (flags & SEN_SNIP_COPY_TAG) {
400 char *t;
401 t = SEN_MALLOC(defaultopentag_len + 1);
402 if (!t) {
403 SEN_FREE(ret);
404 return NULL;
405 }
406 memcpy(t, defaultopentag, defaultopentag_len);
407 t[defaultopentag_len]= '\0'; /* not required, but for ql use */
408 ret->defaultopentag = t;
409
410 t = SEN_MALLOC(defaultclosetag_len + 1);
411 if (!t) {
412 SEN_FREE((void *)ret->defaultopentag);
413 SEN_FREE(ret);
414 return NULL;
415 }
416 memcpy(t, defaultclosetag, defaultclosetag_len);
417 t[defaultclosetag_len]= '\0'; /* not required, but for ql use */
418 ret->defaultclosetag = t;
419 } else {
420 ret->defaultopentag = defaultopentag;
421 ret->defaultclosetag = defaultclosetag;
422 }
423 ret->defaultopentag_len = defaultopentag_len;
424 ret->defaultclosetag_len = defaultclosetag_len;
425 ret->cond_len = 0;
426 ret->mapping = mapping;
427 ret->nstr = NULL;
428 ret->tag_count = 0;
429 ret->snip_count = 0;
430
431 return ret;
432 }
433
434 static sen_rc
exec_clean(sen_snip * snip)435 exec_clean(sen_snip *snip)
436 {
437 snip_cond *cond, *cond_end;
438 if (snip->nstr) {
439 sen_nstr_close(snip->nstr);
440 snip->nstr = NULL;
441 }
442 snip->tag_count = 0;
443 snip->snip_count = 0;
444 for (cond = snip->cond, cond_end = cond + snip->cond_len;
445 cond < cond_end; cond++) {
446 sen_snip_cond_reinit(cond);
447 }
448 return sen_success;
449 }
450
451 sen_rc
sen_snip_close(sen_snip * snip)452 sen_snip_close(sen_snip *snip)
453 {
454 sen_ctx *ctx = &sen_gctx; /* todo : replace it with the local ctx */
455 snip_cond *cond, *cond_end;
456 if (!snip) { return sen_invalid_argument; }
457 if (snip->flags & SEN_SNIP_COPY_TAG) {
458 int i;
459 snip_cond *sc;
460 const char *dot = snip->defaultopentag, *dct = snip->defaultclosetag;
461 for (i = snip->cond_len, sc = snip->cond; i; i--, sc++) {
462 if (sc->opentag != dot) { SEN_FREE((void *)sc->opentag); }
463 if (sc->closetag != dct) { SEN_FREE((void *)sc->closetag); }
464 }
465 if (dot) { SEN_FREE((void *)dot); }
466 if (dct) { SEN_FREE((void *)dct); }
467 }
468 if (snip->nstr) {
469 sen_nstr_close(snip->nstr);
470 }
471 for (cond = snip->cond, cond_end = cond + snip->cond_len;
472 cond < cond_end; cond++) {
473 sen_snip_cond_close(cond);
474 }
475 SEN_FREE(snip);
476 return sen_success;
477 }
478
479 sen_rc
sen_snip_exec(sen_snip * snip,const char * string,unsigned int string_len,unsigned int * nresults,unsigned int * max_tagged_len)480 sen_snip_exec(sen_snip *snip, const char *string, unsigned int string_len,
481 unsigned int *nresults, unsigned int *max_tagged_len)
482 {
483 size_t i;
484 if (!snip || !string) {
485 return sen_invalid_argument;
486 }
487 exec_clean(snip);
488 *nresults = 0;
489 if (snip->flags & SEN_SNIP_NORMALIZE) {
490 snip->nstr =
491 sen_nstr_open(string, string_len, snip->encoding,
492 SEN_STR_WITH_CHECKS | SEN_STR_REMOVEBLANK);
493 } else {
494 snip->nstr =
495 sen_fakenstr_open(string, string_len, snip->encoding,
496 SEN_STR_WITH_CHECKS | SEN_STR_REMOVEBLANK);
497 }
498 if (!snip->nstr) {
499 exec_clean(snip);
500 SEN_LOG(sen_log_alert, "sen_nstr_open on sen_snip_exec failed !");
501 return sen_memory_exhausted;
502 }
503 for (i = 0; i < snip->cond_len; i++) {
504 sen_bm_tunedbm(snip->cond + i, snip->nstr, snip->flags);
505 }
506
507 {
508 _snip_tag_result *tag_result = snip->tag_result;
509 _snip_result *snip_result = snip->snip_result;
510 size_t last_end_offset = 0, last_last_end_offset = 0;
511 unsigned int unfound_cond_count = snip->cond_len;
512
513 *max_tagged_len = 0;
514 while (1) {
515 size_t tagged_len = 0, last_tag_end = 0;
516 int_least8_t all_stop = 1, found_cond = 0;
517 snip_result->tag_count = 0;
518
519 while (1) {
520 size_t min_start_offset = (size_t) -1;
521 size_t max_end_offset = 0;
522 snip_cond *cond = NULL;
523
524 /* get condition which have minimum offset and is not stopped */
525 for (i = 0; i < snip->cond_len; i++) {
526 if (snip->cond[i].stopflag == SNIPCOND_NONSTOP &&
527 (min_start_offset > snip->cond[i].start_offset ||
528 (min_start_offset == snip->cond[i].start_offset &&
529 max_end_offset < snip->cond[i].end_offset))) {
530 min_start_offset = snip->cond[i].start_offset;
531 max_end_offset = snip->cond[i].end_offset;
532 cond = &snip->cond[i];
533 }
534 }
535 if (!cond) {
536 break;
537 }
538 /* check whether condtion is the first condition in snippet */
539 if (snip_result->tag_count == 0) {
540 /* skip condition if the number of rest snippet field is smaller than */
541 /* the number of unfound keywords. */
542 if (snip->max_results - *nresults <= unfound_cond_count && cond->count > 0) {
543 int_least8_t exclude_other_cond = 1;
544 for (i = 0; i < snip->cond_len; i++) {
545 if ((snip->cond + i) != cond
546 && snip->cond[i].end_offset <= cond->start_offset + snip->width
547 && snip->cond[i].count == 0) {
548 exclude_other_cond = 0;
549 }
550 }
551 if (exclude_other_cond) {
552 sen_bm_tunedbm(cond, snip->nstr, snip->flags);
553 continue;
554 }
555 }
556 snip_result->start_offset = cond->start_offset;
557 snip_result->first_tag_result_idx = snip->tag_count;
558 } else {
559 if (cond->start_offset >= snip_result->start_offset + snip->width) {
560 break;
561 }
562 /* check nesting to make valid HTML */
563 /* ToDo: allow <test><te>te</te><st>st</st></test> */
564 if (cond->start_offset < last_tag_end) {
565 sen_bm_tunedbm(cond, snip->nstr, snip->flags);
566 continue;
567 }
568 }
569 if (cond->end_offset > snip_result->start_offset + snip->width) {
570 /* If a keyword gets across a snippet, */
571 /* it was skipped and never to be tagged. */
572 cond->stopflag = SNIPCOND_ACROSS;
573 sen_bm_tunedbm(cond, snip->nstr, snip->flags);
574 } else {
575 found_cond = 1;
576 if (cond->count == 0) {
577 unfound_cond_count--;
578 }
579 cond->count++;
580 last_end_offset = cond->end_offset;
581
582 tag_result->cond = cond;
583 tag_result->start_offset = cond->start_offset;
584 tag_result->end_offset = last_tag_end = cond->end_offset;
585
586 snip_result->tag_count++;
587 tag_result++;
588 tagged_len += cond->opentag_len + cond->closetag_len;
589 if (++snip->tag_count >= MAX_SNIP_TAG_COUNT) {
590 break;
591 }
592 sen_bm_tunedbm(cond, snip->nstr, snip->flags);
593 }
594 }
595 if (!found_cond) {
596 break;
597 }
598 if (snip_result->start_offset + last_end_offset < snip->width) {
599 snip_result->start_offset = 0;
600 } else {
601 snip_result->start_offset =
602 MAX(MIN
603 ((snip_result->start_offset + last_end_offset - snip->width) / 2,
604 string_len - snip->width), last_last_end_offset);
605 }
606 snip_result->start_offset =
607 sen_snip_find_firstbyte(string, snip->encoding, snip_result->start_offset, 1);
608
609 snip_result->end_offset = snip_result->start_offset + snip->width;
610 if (snip_result->end_offset < string_len) {
611 snip_result->end_offset =
612 sen_snip_find_firstbyte(string, snip->encoding, snip_result->end_offset, -1);
613 } else {
614 snip_result->end_offset = string_len;
615 }
616 last_last_end_offset = snip_result->end_offset;
617
618 if (snip->mapping == (sen_snip_mapping *) -1) {
619 tagged_len +=
620 count_mapped_chars(&string[snip_result->start_offset],
621 &string[snip_result->end_offset]) + 1;
622 } else {
623 tagged_len += snip_result->end_offset - snip_result->start_offset + 1;
624 }
625
626 *max_tagged_len = MAX(*max_tagged_len, tagged_len);
627
628 snip_result->last_tag_result_idx = snip->tag_count - 1;
629 (*nresults)++;
630 snip_result++;
631
632 if (*nresults == snip->max_results || snip->tag_count == MAX_SNIP_TAG_COUNT) {
633 break;
634 }
635 for (i = 0; i < snip->cond_len; i++) {
636 if (snip->cond[i].stopflag != SNIPCOND_STOP) {
637 all_stop = 0;
638 snip->cond[i].stopflag = SNIPCOND_NONSTOP;
639 }
640 }
641 if (all_stop) {
642 break;
643 }
644 }
645 }
646 snip->snip_count = *nresults;
647 snip->string = string;
648
649 snip->max_tagged_len = *max_tagged_len;
650
651 return sen_success;
652 }
653
654 sen_rc
sen_snip_get_result(sen_snip * snip,const unsigned int index,char * result,unsigned int * result_len)655 sen_snip_get_result(sen_snip *snip, const unsigned int index, char *result, unsigned int *result_len)
656 {
657 char *p;
658 size_t i, j, k;
659 _snip_result *sres;
660
661 if (snip->snip_count <= index || !snip->nstr) {
662 return sen_invalid_argument;
663 }
664
665 SEN_ASSERT(snip->snip_count != 0 && snip->tag_count != 0);
666
667 sres = &snip->snip_result[index];
668 j = sres->first_tag_result_idx;
669 for (p = result, i = sres->start_offset; i < sres->end_offset; i++) {
670 for (; j <= sres->last_tag_result_idx && snip->tag_result[j].start_offset == i; j++) {
671 if (snip->tag_result[j].end_offset > sres->end_offset) {
672 continue;
673 }
674 memcpy(p, snip->tag_result[j].cond->opentag, snip->tag_result[j].cond->opentag_len);
675 p += snip->tag_result[j].cond->opentag_len;
676 }
677
678 if (snip->mapping == (sen_snip_mapping *) -1) {
679 switch (snip->string[i]) {
680 case '<':
681 *p++ = '&';
682 *p++ = 'l';
683 *p++ = 't';
684 *p++ = ';';
685 break;
686 case '>':
687 *p++ = '&';
688 *p++ = 'g';
689 *p++ = 't';
690 *p++ = ';';
691 break;
692 case '&':
693 *p++ = '&';
694 *p++ = 'a';
695 *p++ = 'm';
696 *p++ = 'p';
697 *p++ = ';';
698 break;
699 case '"':
700 *p++ = '&';
701 *p++ = 'q';
702 *p++ = 'u';
703 *p++ = 'o';
704 *p++ = 't';
705 *p++ = ';';
706 break;
707 default:
708 *p++ = snip->string[i];
709 break;
710 }
711 } else {
712 *p++ = snip->string[i];
713 }
714
715 for (k = sres->last_tag_result_idx;
716 snip->tag_result[k].end_offset <= sres->end_offset; k--) {
717 /* TODO: avoid all loop */
718 if (snip->tag_result[k].end_offset == i + 1) {
719 memcpy(p, snip->tag_result[k].cond->closetag,
720 snip->tag_result[k].cond->closetag_len);
721 p += snip->tag_result[k].cond->closetag_len;
722 }
723 if (k <= sres->first_tag_result_idx) {
724 break;
725 }
726 };
727 }
728 *p = '\0';
729
730 if(result_len) { *result_len = (unsigned int)(p - result); }
731 SEN_ASSERT((unsigned int)(p - result) <= snip->max_tagged_len);
732
733 return sen_success;
734 }
735