1 /* This file is part of the Zebra server.
2 Copyright (C) 2004-2013 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17
18 */
19
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
rpn_char_map_handler(void * vp,const char ** from,int len)45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, from, len, 0);
49 #if 0
50 if (out && *out)
51 {
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
54 while (*outp)
55 {
56 yaz_log(YLOG_LOG, "%02X", *outp);
57 outp++;
58 }
59 }
60 #endif
61 return out;
62 }
63
rpn_char_map_prepare(struct zebra_register * reg,zebra_map_t zm,struct rpn_char_map_info * map_info)64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65 struct rpn_char_map_info *map_info)
66 {
67 map_info->zm = zm;
68 if (zebra_maps_is_icu(zm))
69 dict_grep_cmap(reg->dict, 0, 0);
70 else
71 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
72 }
73
74 #define TERM_COUNT
75
76 struct grep_info {
77 #ifdef TERM_COUNT
78 int *term_no;
79 #endif
80 ISAM_P *isam_p_buf;
81 int isam_p_size;
82 int isam_p_indx;
83 int trunc_max;
84 ZebraHandle zh;
85 const char *index_type;
86 ZebraSet termset;
87 };
88
add_isam_p(const char * name,const char * info,struct grep_info * p)89 static int add_isam_p(const char *name, const char *info,
90 struct grep_info *p)
91 {
92 if (!log_level_set)
93 {
94 log_level_rpn = yaz_log_module_level("rpn");
95 log_level_set = 1;
96 }
97 /* we may have to stop this madness.. NOTE: -1 so that if
98 truncmax == trunxlimit we do *not* generate result sets */
99 if (p->isam_p_indx >= p->trunc_max - 1)
100 return 1;
101
102 if (p->isam_p_indx == p->isam_p_size)
103 {
104 ISAM_P *new_isam_p_buf;
105 #ifdef TERM_COUNT
106 int *new_term_no;
107 #endif
108 p->isam_p_size = 2*p->isam_p_size + 100;
109 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110 p->isam_p_size);
111 if (p->isam_p_buf)
112 {
113 memcpy(new_isam_p_buf, p->isam_p_buf,
114 p->isam_p_indx * sizeof(*p->isam_p_buf));
115 xfree(p->isam_p_buf);
116 }
117 p->isam_p_buf = new_isam_p_buf;
118
119 #ifdef TERM_COUNT
120 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121 if (p->term_no)
122 {
123 memcpy(new_term_no, p->isam_p_buf,
124 p->isam_p_indx * sizeof(*p->term_no));
125 xfree(p->term_no);
126 }
127 p->term_no = new_term_no;
128 #endif
129 }
130 assert(*info == sizeof(*p->isam_p_buf));
131 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
132
133 if (p->termset)
134 {
135 const char *db;
136 char term_tmp[IT_MAX_WORD];
137 int ord = 0;
138 const char *index_name;
139 int len = key_SU_decode(&ord, (const unsigned char *) name);
140
141 zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143 zebraExplain_lookup_ord(p->zh->reg->zei,
144 ord, 0 /* index_type */, &db, &index_name);
145 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
146
147 resultSetAddTerm(p->zh, p->termset, name[len], db,
148 index_name, term_tmp);
149 }
150 (p->isam_p_indx)++;
151 return 0;
152 }
153
grep_handle(char * name,const char * info,void * p)154 static int grep_handle(char *name, const char *info, void *p)
155 {
156 return add_isam_p(name, info, (struct grep_info *) p);
157 }
158
term_pre(zebra_map_t zm,const char ** src,const char * ct1,int first)159 static int term_pre(zebra_map_t zm, const char **src,
160 const char *ct1, int first)
161 {
162 const char *s1, *s0 = *src;
163 const char **map;
164
165 /* skip white space */
166 while (*s0)
167 {
168 if (ct1 && strchr(ct1, *s0))
169 break;
170 s1 = s0;
171 map = zebra_maps_input(zm, &s1, strlen(s1), first);
172 if (**map != *CHR_SPACE)
173 break;
174 s0 = s1;
175 }
176 *src = s0;
177 return *s0;
178 }
179
180
esc_str(char * out_buf,size_t out_size,const char * in_buf,int in_size)181 static void esc_str(char *out_buf, size_t out_size,
182 const char *in_buf, int in_size)
183 {
184 int k;
185
186 assert(out_buf);
187 assert(in_buf);
188 assert(out_size > 20);
189 *out_buf = '\0';
190 for (k = 0; k < in_size; k++)
191 {
192 int c = in_buf[k] & 0xff;
193 int pc;
194 if (c < 32 || c > 126)
195 pc = '?';
196 else
197 pc = c;
198 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
199 if (strlen(out_buf) > out_size-20)
200 {
201 strcat(out_buf, "..");
202 break;
203 }
204 }
205 }
206
207 #define REGEX_CHARS " ^[]()|.*+?!\"$\\"
208
add_non_space(const char * start,const char * end,WRBUF term_dict,WRBUF display_term,const char ** map,int q_map_match)209 static void add_non_space(const char *start, const char *end,
210 WRBUF term_dict,
211 WRBUF display_term,
212 const char **map, int q_map_match)
213 {
214 size_t sz = end - start;
215
216 wrbuf_write(display_term, start, sz);
217 if (!q_map_match)
218 {
219 while (start < end)
220 {
221 if (strchr(REGEX_CHARS, *start))
222 wrbuf_putc(term_dict, '\\');
223 wrbuf_putc(term_dict, *start);
224 start++;
225 }
226 }
227 else
228 {
229 char tmpbuf[80];
230 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
231
232 wrbuf_puts(term_dict, map[0]);
233 }
234 }
235
236
237 /* ICU sort keys seem to be of the form
238 basechars \x01 accents \x01 length
239 For now we'll just right truncate from basechars . This
240 may give false hits due to accents not being used.
241 */
icu_basechars(const char * buf,size_t i)242 static size_t icu_basechars(const char *buf, size_t i)
243 {
244 while (i > 0 && buf[--i] != '\x01') /* skip length */
245 ;
246 while (i > 0 && buf[--i] != '\x01') /* skip accents */
247 ;
248 return i; /* only basechars left */
249 }
250
term_102_icu(zebra_map_t zm,const char ** src,WRBUF term_dict,int space_split,WRBUF display_term)251 static int term_102_icu(zebra_map_t zm,
252 const char **src, WRBUF term_dict, int space_split,
253 WRBUF display_term)
254 {
255 int no_terms = 0;
256 const char *s0 = *src, *s1;
257 while (*s0 == ' ')
258 s0++;
259 s1 = s0;
260 for (;;)
261 {
262 if (*s1 == ' ' && space_split)
263 break;
264 else if (*s1 && !strchr(REGEX_CHARS "-", *s1))
265 s1++;
266 else
267 {
268 /* EOF or regex reserved char */
269 if (s0 != s1)
270 {
271 const char *res_buf = 0;
272 size_t res_len = 0;
273 const char *display_buf;
274 size_t display_len;
275
276 zebra_map_tokenize_start(zm, s0, s1 - s0);
277
278 if (zebra_map_tokenize_next(zm, &res_buf, &res_len,
279 &display_buf, &display_len))
280 {
281 size_t i;
282 res_len = icu_basechars(res_buf, res_len);
283 for (i = 0; i < res_len; i++)
284 {
285 if (strchr(REGEX_CHARS "\\", res_buf[i]))
286 wrbuf_putc(term_dict, '\\');
287 if (res_buf[i] < 32)
288 wrbuf_putc(term_dict, '\x01');
289
290 wrbuf_putc(term_dict, res_buf[i]);
291 }
292 wrbuf_write(display_term, display_buf, display_len);
293
294 no_terms++;
295 }
296 }
297 if (*s1 == '\0')
298 break;
299
300 wrbuf_putc(term_dict, *s1);
301 wrbuf_putc(display_term, *s1);
302
303 s1++;
304 s0 = s1;
305 }
306 }
307 if (no_terms)
308 wrbuf_puts(term_dict, "\x01\x01.*");
309 *src = s1;
310 return no_terms;
311 }
312
term_100_icu(zebra_map_t zm,const char ** src,WRBUF term_dict,int space_split,WRBUF display_term,int mode)313 static int term_100_icu(zebra_map_t zm,
314 const char **src, WRBUF term_dict, int space_split,
315 WRBUF display_term,
316 int mode)
317 {
318 size_t i;
319 const char *res_buf = 0;
320 size_t res_len = 0;
321 const char *display_buf;
322 size_t display_len;
323 const char *s0 = *src, *s1;
324
325 while (*s0 == ' ')
326 s0++;
327
328 if (*s0 == '\0')
329 return 0;
330
331 if (space_split)
332 {
333 s1 = s0;
334 while (*s1 && *s1 != ' ')
335 s1++;
336 }
337 else
338 s1 = s0 + strlen(s0);
339
340 *src = s1;
341
342 zebra_map_tokenize_start(zm, s0, s1 - s0);
343
344 if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
345 &display_buf, &display_len))
346 {
347 return 0;
348 }
349 wrbuf_write(display_term, display_buf, display_len);
350 if (mode)
351 {
352 res_len = icu_basechars(res_buf, res_len);
353 }
354 if (mode & 2)
355 wrbuf_puts(term_dict, ".*");
356 for (i = 0; i < res_len; i++)
357 {
358 if (strchr(REGEX_CHARS "\\", res_buf[i]))
359 wrbuf_putc(term_dict, '\\');
360 if (res_buf[i] < 32)
361 wrbuf_putc(term_dict, '\x01');
362
363 wrbuf_putc(term_dict, res_buf[i]);
364 }
365 if (mode & 1)
366 wrbuf_puts(term_dict, ".*");
367 else if (mode)
368 wrbuf_puts(term_dict, "\x01\x01.*");
369 return 1;
370 }
371
372 /* term_100: handle term, where trunc = none(no operators at all) */
term_100(zebra_map_t zm,const char ** src,WRBUF term_dict,int space_split,WRBUF display_term)373 static int term_100(zebra_map_t zm,
374 const char **src, WRBUF term_dict, int space_split,
375 WRBUF display_term)
376 {
377 const char *s0;
378 const char **map;
379 int i = 0;
380
381 const char *space_start = 0;
382 const char *space_end = 0;
383
384 if (!term_pre(zm, src, 0, !space_split))
385 return 0;
386 s0 = *src;
387 while (*s0)
388 {
389 const char *s1 = s0;
390 int q_map_match = 0;
391 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
392 if (space_split)
393 {
394 if (**map == *CHR_SPACE)
395 break;
396 }
397 else /* complete subfield only. */
398 {
399 if (**map == *CHR_SPACE)
400 { /* save space mapping for later .. */
401 space_start = s1;
402 space_end = s0;
403 continue;
404 }
405 else if (space_start)
406 { /* reload last space */
407 while (space_start < space_end)
408 {
409 if (strchr(REGEX_CHARS, *space_start))
410 wrbuf_putc(term_dict, '\\');
411 wrbuf_putc(display_term, *space_start);
412 wrbuf_putc(term_dict, *space_start);
413 space_start++;
414
415 }
416 /* and reset */
417 space_start = space_end = 0;
418 }
419 }
420 i++;
421
422 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
423 }
424 *src = s0;
425 return i;
426 }
427
428 /* term_101: handle term, where trunc = Process # */
term_101(zebra_map_t zm,const char ** src,WRBUF term_dict,int space_split,WRBUF display_term)429 static int term_101(zebra_map_t zm,
430 const char **src, WRBUF term_dict, int space_split,
431 WRBUF display_term)
432 {
433 const char *s0;
434 const char **map;
435 int i = 0;
436
437 if (!term_pre(zm, src, "#", !space_split))
438 return 0;
439 s0 = *src;
440 while (*s0)
441 {
442 if (*s0 == '#')
443 {
444 i++;
445 wrbuf_puts(term_dict, ".*");
446 wrbuf_putc(display_term, *s0);
447 s0++;
448 }
449 else
450 {
451 const char *s1 = s0;
452 int q_map_match = 0;
453 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
454 if (space_split && **map == *CHR_SPACE)
455 break;
456
457 i++;
458 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
459 }
460 }
461 *src = s0;
462 return i;
463 }
464
465 /* term_103: handle term, where trunc = re-2 (regular expressions) */
term_103(zebra_map_t zm,const char ** src,WRBUF term_dict,int * errors,int space_split,WRBUF display_term)466 static int term_103(zebra_map_t zm, const char **src,
467 WRBUF term_dict, int *errors, int space_split,
468 WRBUF display_term)
469 {
470 int i = 0;
471 const char *s0;
472 const char **map;
473
474 if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
475 return 0;
476 s0 = *src;
477 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
478 isdigit(((const unsigned char *)s0)[1]))
479 {
480 *errors = s0[1] - '0';
481 s0 += 3;
482 if (*errors > 3)
483 *errors = 3;
484 }
485 while (*s0)
486 {
487 if (strchr("^\\()[].*+?|-", *s0))
488 {
489 wrbuf_putc(display_term, *s0);
490 wrbuf_putc(term_dict, *s0);
491 s0++;
492 i++;
493 }
494 else
495 {
496 const char *s1 = s0;
497 int q_map_match = 0;
498 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
499 if (space_split && **map == *CHR_SPACE)
500 break;
501
502 i++;
503 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
504 }
505 }
506 *src = s0;
507
508 return i;
509 }
510
511 /* term_103: handle term, where trunc = re-1 (regular expressions) */
term_102(zebra_map_t zm,const char ** src,WRBUF term_dict,int space_split,WRBUF display_term)512 static int term_102(zebra_map_t zm, const char **src,
513 WRBUF term_dict, int space_split, WRBUF display_term)
514 {
515 return term_103(zm, src, term_dict, NULL, space_split, display_term);
516 }
517
518
519 /* term_104: handle term, process ?n * # */
term_104(zebra_map_t zm,const char ** src,WRBUF term_dict,int space_split,WRBUF display_term)520 static int term_104(zebra_map_t zm, const char **src,
521 WRBUF term_dict, int space_split, WRBUF display_term)
522 {
523 const char *s0;
524 const char **map;
525 int i = 0;
526
527 if (!term_pre(zm, src, "?*#", !space_split))
528 return 0;
529 s0 = *src;
530 while (*s0)
531 {
532 if (*s0 == '?')
533 {
534 i++;
535 wrbuf_putc(display_term, *s0);
536 s0++;
537 if (*s0 >= '0' && *s0 <= '9')
538 {
539 int limit = 0;
540 while (*s0 >= '0' && *s0 <= '9')
541 {
542 limit = limit * 10 + (*s0 - '0');
543 wrbuf_putc(display_term, *s0);
544 s0++;
545 }
546 if (limit > 20)
547 limit = 20;
548 while (--limit >= 0)
549 {
550 wrbuf_puts(term_dict, ".?");
551 }
552 }
553 else
554 {
555 wrbuf_puts(term_dict, ".*");
556 }
557 }
558 else if (*s0 == '*')
559 {
560 i++;
561 wrbuf_puts(term_dict, ".*");
562 wrbuf_putc(display_term, *s0);
563 s0++;
564 }
565 else if (*s0 == '#')
566 {
567 i++;
568 wrbuf_puts(term_dict, ".");
569 wrbuf_putc(display_term, *s0);
570 s0++;
571 }
572 else
573 {
574 const char *s1 = s0;
575 int q_map_match = 0;
576 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
577 if (space_split && **map == *CHR_SPACE)
578 break;
579
580 i++;
581 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
582 }
583 }
584 *src = s0;
585 return i;
586 }
587
588 /* term_105/106: handle term, process * ! and possibly right_truncate */
term_105(zebra_map_t zm,const char ** src,WRBUF term_dict,int space_split,WRBUF display_term,int right_truncate)589 static int term_105(zebra_map_t zm, const char **src,
590 WRBUF term_dict, int space_split,
591 WRBUF display_term, int right_truncate)
592 {
593 const char *s0;
594 const char **map;
595 int i = 0;
596
597 if (!term_pre(zm, src, "\\*!", !space_split))
598 return 0;
599 s0 = *src;
600 while (*s0)
601 {
602 if (*s0 == '*')
603 {
604 i++;
605 wrbuf_puts(term_dict, ".*");
606 wrbuf_putc(display_term, *s0);
607 s0++;
608 }
609 else if (*s0 == '!')
610 {
611 i++;
612 wrbuf_putc(term_dict, '.');
613 wrbuf_putc(display_term, *s0);
614 s0++;
615 }
616 else if (*s0 == '\\')
617 {
618 i++;
619 wrbuf_puts(term_dict, "\\\\");
620 wrbuf_putc(display_term, *s0);
621 s0++;
622 }
623 else
624 {
625 const char *s1 = s0;
626 int q_map_match = 0;
627 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
628 if (space_split && **map == *CHR_SPACE)
629 break;
630
631 i++;
632 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
633 }
634 }
635 if (right_truncate)
636 wrbuf_puts(term_dict, ".*");
637 *src = s0;
638 return i;
639 }
640
641
642 /* gen_regular_rel - generate regular expression from relation
643 * val: border value (inclusive)
644 * islt: 1 if <=; 0 if >=.
645 */
gen_regular_rel(WRBUF term_dict,int val,int islt)646 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
647 {
648 char dst_buf[20*5*20]; /* assuming enough for expansion */
649 char *dst = dst_buf;
650 int dst_p;
651 int w, d, i;
652 int pos = 0;
653 char numstr[20];
654
655 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
656 if (val >= 0)
657 {
658 if (islt)
659 strcpy(dst, "(-[0-9]+|(");
660 else
661 strcpy(dst, "((");
662 }
663 else
664 {
665 if (!islt)
666 {
667 strcpy(dst, "([0-9]+|-(");
668 islt = 1;
669 }
670 else
671 {
672 strcpy(dst, "(-(");
673 islt = 0;
674 }
675 val = -val;
676 }
677 dst_p = strlen(dst);
678 sprintf(numstr, "%d", val);
679 for (w = strlen(numstr); --w >= 0; pos++)
680 {
681 d = numstr[w];
682 if (pos > 0)
683 {
684 if (islt)
685 {
686 if (d == '0')
687 continue;
688 d--;
689 }
690 else
691 {
692 if (d == '9')
693 continue;
694 d++;
695 }
696 }
697
698 strcpy(dst + dst_p, numstr);
699 dst_p = strlen(dst) - pos - 1;
700
701 if (islt)
702 {
703 if (d != '0')
704 {
705 dst[dst_p++] = '[';
706 dst[dst_p++] = '0';
707 dst[dst_p++] = '-';
708 dst[dst_p++] = d;
709 dst[dst_p++] = ']';
710 }
711 else
712 dst[dst_p++] = d;
713 }
714 else
715 {
716 if (d != '9')
717 {
718 dst[dst_p++] = '[';
719 dst[dst_p++] = d;
720 dst[dst_p++] = '-';
721 dst[dst_p++] = '9';
722 dst[dst_p++] = ']';
723 }
724 else
725 dst[dst_p++] = d;
726 }
727 for (i = 0; i < pos; i++)
728 {
729 dst[dst_p++] = '[';
730 dst[dst_p++] = '0';
731 dst[dst_p++] = '-';
732 dst[dst_p++] = '9';
733 dst[dst_p++] = ']';
734 }
735 dst[dst_p++] = '|';
736 }
737 dst[dst_p] = '\0';
738 if (islt)
739 {
740 /* match everything less than 10^(pos-1) */
741 strcat(dst, "0*");
742 for (i = 1; i < pos; i++)
743 strcat(dst, "[0-9]?");
744 }
745 else
746 {
747 /* match everything greater than 10^pos */
748 for (i = 0; i <= pos; i++)
749 strcat(dst, "[0-9]");
750 strcat(dst, "[0-9]*");
751 }
752 strcat(dst, "))");
753 wrbuf_puts(term_dict, dst);
754 }
755
string_rel_add_char(WRBUF term_p,WRBUF wsrc,int * indx)756 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
757 {
758 const char *src = wrbuf_cstr(wsrc);
759 if (src[*indx] == '\\')
760 {
761 wrbuf_putc(term_p, src[*indx]);
762 (*indx)++;
763 }
764 wrbuf_putc(term_p, src[*indx]);
765 (*indx)++;
766 }
767
768 /*
769 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
770 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
771 * >= abc ([b-].*|a[c-].*|ab[c-].*)
772 * ([^-a].*|a[^-b].*|ab[c-].*)
773 * < abc ([-0].*|a[-a].*|ab[-b].*)
774 * ([^a-].*|a[^b-].*|ab[^c-].*)
775 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
776 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
777 */
string_relation(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char ** term_sub,WRBUF term_dict,const Odr_oid * attributeSet,zebra_map_t zm,int space_split,WRBUF display_term,int * error_code)778 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
779 const char **term_sub, WRBUF term_dict,
780 const Odr_oid *attributeSet,
781 zebra_map_t zm, int space_split,
782 WRBUF display_term,
783 int *error_code)
784 {
785 AttrType relation;
786 int relation_value;
787 int i;
788 WRBUF term_component = wrbuf_alloc();
789
790 attr_init_APT(&relation, zapt, 2);
791 relation_value = attr_find(&relation, NULL);
792
793 *error_code = 0;
794 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
795 switch (relation_value)
796 {
797 case 1:
798 if (!term_100(zm, term_sub, term_component, space_split, display_term))
799 {
800 wrbuf_destroy(term_component);
801 return 0;
802 }
803 yaz_log(log_level_rpn, "Relation <");
804
805 wrbuf_putc(term_dict, '(');
806 for (i = 0; i < wrbuf_len(term_component); )
807 {
808 int j = 0;
809
810 if (i)
811 wrbuf_putc(term_dict, '|');
812 while (j < i)
813 string_rel_add_char(term_dict, term_component, &j);
814
815 wrbuf_putc(term_dict, '[');
816
817 wrbuf_putc(term_dict, '^');
818
819 wrbuf_putc(term_dict, 1);
820 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
821
822 string_rel_add_char(term_dict, term_component, &i);
823 wrbuf_putc(term_dict, '-');
824
825 wrbuf_putc(term_dict, ']');
826 wrbuf_putc(term_dict, '.');
827 wrbuf_putc(term_dict, '*');
828 }
829 wrbuf_putc(term_dict, ')');
830 break;
831 case 2:
832 if (!term_100(zm, term_sub, term_component, space_split, display_term))
833 {
834 wrbuf_destroy(term_component);
835 return 0;
836 }
837 yaz_log(log_level_rpn, "Relation <=");
838
839 wrbuf_putc(term_dict, '(');
840 for (i = 0; i < wrbuf_len(term_component); )
841 {
842 int j = 0;
843
844 while (j < i)
845 string_rel_add_char(term_dict, term_component, &j);
846 wrbuf_putc(term_dict, '[');
847
848 wrbuf_putc(term_dict, '^');
849
850 wrbuf_putc(term_dict, 1);
851 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
852
853 string_rel_add_char(term_dict, term_component, &i);
854 wrbuf_putc(term_dict, '-');
855
856 wrbuf_putc(term_dict, ']');
857 wrbuf_putc(term_dict, '.');
858 wrbuf_putc(term_dict, '*');
859
860 wrbuf_putc(term_dict, '|');
861 }
862 for (i = 0; i < wrbuf_len(term_component); )
863 string_rel_add_char(term_dict, term_component, &i);
864 wrbuf_putc(term_dict, ')');
865 break;
866 case 5:
867 if (!term_100(zm, term_sub, term_component, space_split, display_term))
868 {
869 wrbuf_destroy(term_component);
870 return 0;
871 }
872 yaz_log(log_level_rpn, "Relation >");
873
874 wrbuf_putc(term_dict, '(');
875 for (i = 0; i < wrbuf_len(term_component); )
876 {
877 int j = 0;
878
879 while (j < i)
880 string_rel_add_char(term_dict, term_component, &j);
881 wrbuf_putc(term_dict, '[');
882
883 wrbuf_putc(term_dict, '^');
884 wrbuf_putc(term_dict, '-');
885 string_rel_add_char(term_dict, term_component, &i);
886
887 wrbuf_putc(term_dict, ']');
888 wrbuf_putc(term_dict, '.');
889 wrbuf_putc(term_dict, '*');
890
891 wrbuf_putc(term_dict, '|');
892 }
893 for (i = 0; i < wrbuf_len(term_component); )
894 string_rel_add_char(term_dict, term_component, &i);
895 wrbuf_putc(term_dict, '.');
896 wrbuf_putc(term_dict, '+');
897 wrbuf_putc(term_dict, ')');
898 break;
899 case 4:
900 if (!term_100(zm, term_sub, term_component, space_split, display_term))
901 {
902 wrbuf_destroy(term_component);
903 return 0;
904 }
905 yaz_log(log_level_rpn, "Relation >=");
906
907 wrbuf_putc(term_dict, '(');
908 for (i = 0; i < wrbuf_len(term_component); )
909 {
910 int j = 0;
911
912 if (i)
913 wrbuf_putc(term_dict, '|');
914 while (j < i)
915 string_rel_add_char(term_dict, term_component, &j);
916 wrbuf_putc(term_dict, '[');
917
918 if (i < wrbuf_len(term_component)-1)
919 {
920 wrbuf_putc(term_dict, '^');
921 wrbuf_putc(term_dict, '-');
922 string_rel_add_char(term_dict, term_component, &i);
923 }
924 else
925 {
926 string_rel_add_char(term_dict, term_component, &i);
927 wrbuf_putc(term_dict, '-');
928 }
929 wrbuf_putc(term_dict, ']');
930 wrbuf_putc(term_dict, '.');
931 wrbuf_putc(term_dict, '*');
932 }
933 wrbuf_putc(term_dict, ')');
934 break;
935 case 3:
936 case 102:
937 case -1:
938 if (!**term_sub)
939 return 1;
940 yaz_log(log_level_rpn, "Relation =");
941 if (!term_100(zm, term_sub, term_component, space_split, display_term))
942 {
943 wrbuf_destroy(term_component);
944 return 0;
945 }
946 wrbuf_puts(term_dict, "(");
947 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
948 wrbuf_puts(term_dict, ")");
949 break;
950 case 103:
951 yaz_log(log_level_rpn, "Relation always matches");
952 /* skip to end of term (we don't care what it is) */
953 while (**term_sub != '\0')
954 (*term_sub)++;
955 break;
956 default:
957 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
958 wrbuf_destroy(term_component);
959 return 0;
960 }
961 wrbuf_destroy(term_component);
962 return 1;
963 }
964
965 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
966 const char **term_sub,
967 WRBUF term_dict,
968 const Odr_oid *attributeSet, NMEM stream,
969 struct grep_info *grep_info,
970 const char *index_type, int complete_flag,
971 WRBUF display_term,
972 const char *xpath_use,
973 struct ord_list **ol,
974 zebra_map_t zm);
975
zebra_term_limits_APT(ZebraHandle zh,Z_AttributesPlusTerm * zapt,zint * hits_limit_value,const char ** term_ref_id_str,NMEM nmem)976 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
977 Z_AttributesPlusTerm *zapt,
978 zint *hits_limit_value,
979 const char **term_ref_id_str,
980 NMEM nmem)
981 {
982 AttrType term_ref_id_attr;
983 AttrType hits_limit_attr;
984 int term_ref_id_int;
985 zint hits_limit_from_attr;
986
987 attr_init_APT(&hits_limit_attr, zapt, 11);
988 hits_limit_from_attr = attr_find(&hits_limit_attr, NULL);
989
990 attr_init_APT(&term_ref_id_attr, zapt, 10);
991 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
992 if (term_ref_id_int >= 0)
993 {
994 char *res = nmem_malloc(nmem, 20);
995 sprintf(res, "%d", term_ref_id_int);
996 *term_ref_id_str = res;
997 }
998 if (hits_limit_from_attr != -1)
999 *hits_limit_value = hits_limit_from_attr;
1000
1001 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
1002 *term_ref_id_str ? *term_ref_id_str : "none",
1003 *hits_limit_value);
1004 return ZEBRA_OK;
1005 }
1006
1007 /** \brief search for term (which may be truncated)
1008 */
search_term(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char ** term_sub,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,struct grep_info * grep_info,const char * index_type,int complete_flag,const char * rank_type,const char * xpath_use,NMEM rset_nmem,RSET * rset,struct rset_key_control * kc,zebra_map_t zm)1009 static ZEBRA_RES search_term(ZebraHandle zh,
1010 Z_AttributesPlusTerm *zapt,
1011 const char **term_sub,
1012 const Odr_oid *attributeSet,
1013 zint hits_limit, NMEM stream,
1014 struct grep_info *grep_info,
1015 const char *index_type, int complete_flag,
1016 const char *rank_type,
1017 const char *xpath_use,
1018 NMEM rset_nmem,
1019 RSET *rset,
1020 struct rset_key_control *kc,
1021 zebra_map_t zm)
1022 {
1023 ZEBRA_RES res;
1024 struct ord_list *ol;
1025 zint hits_limit_value = hits_limit;
1026 const char *term_ref_id_str = 0;
1027 WRBUF term_dict = wrbuf_alloc();
1028 WRBUF display_term = wrbuf_alloc();
1029 *rset = 0;
1030 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1031 stream);
1032 grep_info->isam_p_indx = 0;
1033 res = string_term(zh, zapt, term_sub, term_dict,
1034 attributeSet, stream, grep_info,
1035 index_type, complete_flag,
1036 display_term, xpath_use, &ol, zm);
1037 wrbuf_destroy(term_dict);
1038 if (res == ZEBRA_OK && *term_sub)
1039 {
1040 yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
1041 *rset = rset_trunc(zh, grep_info->isam_p_buf,
1042 grep_info->isam_p_indx, wrbuf_buf(display_term),
1043 wrbuf_len(display_term), rank_type,
1044 1 /* preserve pos */,
1045 zapt->term->which, rset_nmem,
1046 kc, kc->scope, ol, index_type, hits_limit_value,
1047 term_ref_id_str);
1048 if (!*rset)
1049 res = ZEBRA_FAIL;
1050 }
1051 wrbuf_destroy(display_term);
1052 return res;
1053 }
1054
string_term(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char ** term_sub,WRBUF term_dict,const Odr_oid * attributeSet,NMEM stream,struct grep_info * grep_info,const char * index_type,int complete_flag,WRBUF display_term,const char * xpath_use,struct ord_list ** ol,zebra_map_t zm)1055 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1056 const char **term_sub,
1057 WRBUF term_dict,
1058 const Odr_oid *attributeSet, NMEM stream,
1059 struct grep_info *grep_info,
1060 const char *index_type, int complete_flag,
1061 WRBUF display_term,
1062 const char *xpath_use,
1063 struct ord_list **ol,
1064 zebra_map_t zm)
1065 {
1066 int r;
1067 AttrType truncation;
1068 int truncation_value;
1069 const char *termp;
1070 struct rpn_char_map_info rcmi;
1071
1072 int space_split = complete_flag ? 0 : 1;
1073 int ord = -1;
1074 int regex_range = 0;
1075 int max_pos, prefix_len = 0;
1076 int relation_error;
1077 char ord_buf[32];
1078 int ord_len, i;
1079
1080 *ol = ord_list_create(stream);
1081
1082 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1083 attr_init_APT(&truncation, zapt, 5);
1084 truncation_value = attr_find(&truncation, NULL);
1085 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1086
1087 termp = *term_sub; /* start of term for each database */
1088
1089 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1090 attributeSet, &ord) != ZEBRA_OK)
1091 {
1092 *term_sub = 0;
1093 return ZEBRA_FAIL;
1094 }
1095
1096 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1097
1098 *ol = ord_list_append(stream, *ol, ord);
1099 ord_len = key_SU_encode(ord, ord_buf);
1100
1101 wrbuf_putc(term_dict, '(');
1102
1103 for (i = 0; i < ord_len; i++)
1104 {
1105 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1106 wrbuf_putc(term_dict, ord_buf[i]);
1107 }
1108 wrbuf_putc(term_dict, ')');
1109
1110 prefix_len = wrbuf_len(term_dict);
1111
1112 if (zebra_maps_is_icu(zm))
1113 {
1114 int relation_value;
1115 AttrType relation;
1116
1117 attr_init_APT(&relation, zapt, 2);
1118 relation_value = attr_find(&relation, NULL);
1119 if (relation_value == 103) /* always matches */
1120 termp += strlen(termp); /* move to end of term */
1121 else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1122 {
1123 /* ICU case */
1124 switch (truncation_value)
1125 {
1126 case -1: /* not specified */
1127 case 100: /* do not truncate */
1128 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1129 {
1130 *term_sub = 0;
1131 return ZEBRA_OK;
1132 }
1133 break;
1134 case 102:
1135 if (!term_102_icu(zm, &termp, term_dict, space_split, display_term))
1136 {
1137 *term_sub = 0;
1138 return ZEBRA_OK;
1139 }
1140 break;
1141 case 1: /* right truncation */
1142 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1143 {
1144 *term_sub = 0;
1145 return ZEBRA_OK;
1146 }
1147 break;
1148 case 2:
1149 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 2))
1150 {
1151 *term_sub = 0;
1152 return ZEBRA_OK;
1153 }
1154 break;
1155 case 3:
1156 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 3))
1157 {
1158 *term_sub = 0;
1159 return ZEBRA_OK;
1160 }
1161 break;
1162 default:
1163 zebra_setError_zint(zh,
1164 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1165 truncation_value);
1166 return ZEBRA_FAIL;
1167 }
1168 }
1169 else
1170 {
1171 zebra_setError_zint(zh,
1172 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1173 relation_value);
1174 return ZEBRA_FAIL;
1175 }
1176 }
1177 else
1178 {
1179 /* non-ICU case. using string.chr and friends */
1180 switch (truncation_value)
1181 {
1182 case -1: /* not specified */
1183 case 100: /* do not truncate */
1184 if (!string_relation(zh, zapt, &termp, term_dict,
1185 attributeSet,
1186 zm, space_split, display_term,
1187 &relation_error))
1188 {
1189 if (relation_error)
1190 {
1191 zebra_setError(zh, relation_error, 0);
1192 return ZEBRA_FAIL;
1193 }
1194 *term_sub = 0;
1195 return ZEBRA_OK;
1196 }
1197 break;
1198 case 1: /* right truncation */
1199 wrbuf_putc(term_dict, '(');
1200 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1201 {
1202 *term_sub = 0;
1203 return ZEBRA_OK;
1204 }
1205 wrbuf_puts(term_dict, ".*)");
1206 break;
1207 case 2: /* left truncation */
1208 wrbuf_puts(term_dict, "(.*");
1209 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1210 {
1211 *term_sub = 0;
1212 return ZEBRA_OK;
1213 }
1214 wrbuf_putc(term_dict, ')');
1215 break;
1216 case 3: /* left&right truncation */
1217 wrbuf_puts(term_dict, "(.*");
1218 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1219 {
1220 *term_sub = 0;
1221 return ZEBRA_OK;
1222 }
1223 wrbuf_puts(term_dict, ".*)");
1224 break;
1225 case 101: /* process # in term */
1226 wrbuf_putc(term_dict, '(');
1227 if (!term_101(zm, &termp, term_dict, space_split, display_term))
1228 {
1229 *term_sub = 0;
1230 return ZEBRA_OK;
1231 }
1232 wrbuf_puts(term_dict, ")");
1233 break;
1234 case 102: /* Regexp-1 */
1235 wrbuf_putc(term_dict, '(');
1236 if (!term_102(zm, &termp, term_dict, space_split, display_term))
1237 {
1238 *term_sub = 0;
1239 return ZEBRA_OK;
1240 }
1241 wrbuf_putc(term_dict, ')');
1242 break;
1243 case 103: /* Regexp-2 */
1244 regex_range = 1;
1245 wrbuf_putc(term_dict, '(');
1246 if (!term_103(zm, &termp, term_dict, ®ex_range,
1247 space_split, display_term))
1248 {
1249 *term_sub = 0;
1250 return ZEBRA_OK;
1251 }
1252 wrbuf_putc(term_dict, ')');
1253 break;
1254 case 104: /* process ?n * # term */
1255 wrbuf_putc(term_dict, '(');
1256 if (!term_104(zm, &termp, term_dict, space_split, display_term))
1257 {
1258 *term_sub = 0;
1259 return ZEBRA_OK;
1260 }
1261 wrbuf_putc(term_dict, ')');
1262 break;
1263 case 105: /* process * ! in term and right truncate */
1264 wrbuf_putc(term_dict, '(');
1265 if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1266 {
1267 *term_sub = 0;
1268 return ZEBRA_OK;
1269 }
1270 wrbuf_putc(term_dict, ')');
1271 break;
1272 case 106: /* process * ! in term */
1273 wrbuf_putc(term_dict, '(');
1274 if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1275 {
1276 *term_sub = 0;
1277 return ZEBRA_OK;
1278 }
1279 wrbuf_putc(term_dict, ')');
1280 break;
1281 default:
1282 zebra_setError_zint(zh,
1283 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1284 truncation_value);
1285 return ZEBRA_FAIL;
1286 }
1287 }
1288 if (1)
1289 {
1290 char buf[1000];
1291 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1292 esc_str(buf, sizeof(buf), input, strlen(input));
1293 }
1294 {
1295 WRBUF pr_wr = wrbuf_alloc();
1296
1297 wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1298 yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1299 wrbuf_destroy(pr_wr);
1300 }
1301 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1302 grep_info, &max_pos,
1303 ord_len /* number of "exact" chars */,
1304 grep_handle);
1305 if (r == 1)
1306 zebra_set_partial_result(zh);
1307 else if (r)
1308 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1309 *term_sub = termp;
1310 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1311 return ZEBRA_OK;
1312 }
1313
1314
1315
grep_info_delete(struct grep_info * grep_info)1316 static void grep_info_delete(struct grep_info *grep_info)
1317 {
1318 #ifdef TERM_COUNT
1319 xfree(grep_info->term_no);
1320 #endif
1321 xfree(grep_info->isam_p_buf);
1322 }
1323
grep_info_prepare(ZebraHandle zh,Z_AttributesPlusTerm * zapt,struct grep_info * grep_info,const char * index_type)1324 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1325 Z_AttributesPlusTerm *zapt,
1326 struct grep_info *grep_info,
1327 const char *index_type)
1328 {
1329 #ifdef TERM_COUNT
1330 grep_info->term_no = 0;
1331 #endif
1332 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1333 grep_info->isam_p_size = 0;
1334 grep_info->isam_p_buf = NULL;
1335 grep_info->zh = zh;
1336 grep_info->index_type = index_type;
1337 grep_info->termset = 0;
1338 if (zapt)
1339 {
1340 AttrType truncmax;
1341 int truncmax_value;
1342
1343 attr_init_APT(&truncmax, zapt, 13);
1344 truncmax_value = attr_find(&truncmax, NULL);
1345 if (truncmax_value != -1)
1346 grep_info->trunc_max = truncmax_value;
1347 }
1348 if (zapt)
1349 {
1350 AttrType termset;
1351 int termset_value_numeric;
1352 const char *termset_value_string;
1353
1354 attr_init_APT(&termset, zapt, 8);
1355 termset_value_numeric =
1356 attr_find_ex(&termset, NULL, &termset_value_string);
1357 if (termset_value_numeric != -1)
1358 {
1359 #if TERMSET_DISABLE
1360 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1361 return ZEBRA_FAIL;
1362 #else
1363 char resname[32];
1364 const char *termset_name = 0;
1365 if (termset_value_numeric != -2)
1366 {
1367
1368 sprintf(resname, "%d", termset_value_numeric);
1369 termset_name = resname;
1370 }
1371 else
1372 termset_name = termset_value_string;
1373 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1374 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1375 if (!grep_info->termset)
1376 {
1377 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1378 return ZEBRA_FAIL;
1379 }
1380 #endif
1381 }
1382 }
1383 return ZEBRA_OK;
1384 }
1385
search_terms_chrmap(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char * termz,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,const char * index_type,int complete_flag,const char * rank_type,const char * xpath_use,NMEM rset_nmem,RSET ** result_sets,int * num_result_sets,struct rset_key_control * kc,zebra_map_t zm)1386 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1387 Z_AttributesPlusTerm *zapt,
1388 const char *termz,
1389 const Odr_oid *attributeSet,
1390 zint hits_limit,
1391 NMEM stream,
1392 const char *index_type, int complete_flag,
1393 const char *rank_type,
1394 const char *xpath_use,
1395 NMEM rset_nmem,
1396 RSET **result_sets, int *num_result_sets,
1397 struct rset_key_control *kc,
1398 zebra_map_t zm)
1399 {
1400 struct grep_info grep_info;
1401 const char *termp = termz;
1402 int alloc_sets = 0;
1403
1404 *num_result_sets = 0;
1405 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1406 return ZEBRA_FAIL;
1407 while (1)
1408 {
1409 ZEBRA_RES res;
1410
1411 if (alloc_sets == *num_result_sets)
1412 {
1413 int add = 10;
1414 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1415 sizeof(*rnew));
1416 if (alloc_sets)
1417 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1418 alloc_sets = alloc_sets + add;
1419 *result_sets = rnew;
1420 }
1421 res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1422 stream, &grep_info,
1423 index_type, complete_flag,
1424 rank_type,
1425 xpath_use, rset_nmem,
1426 &(*result_sets)[*num_result_sets],
1427 kc, zm);
1428 if (res != ZEBRA_OK)
1429 {
1430 int i;
1431 for (i = 0; i < *num_result_sets; i++)
1432 rset_delete((*result_sets)[i]);
1433 grep_info_delete(&grep_info);
1434 return res;
1435 }
1436 if ((*result_sets)[*num_result_sets] == 0)
1437 break;
1438 (*num_result_sets)++;
1439
1440 if (!*termp)
1441 break;
1442 }
1443 grep_info_delete(&grep_info);
1444 return ZEBRA_OK;
1445 }
1446
1447 /**
1448 \brief Create result set(s) for list of terms
1449 \param zh Zebra Handle
1450 \param zapt Attributes Plust Term (RPN leaf)
1451 \param termz term as used in query but converted to UTF-8
1452 \param attributeSet default attribute set
1453 \param stream memory for result
1454 \param index_type register type ("w", "p",..)
1455 \param complete_flag whether it's phrases or not
1456 \param rank_type term flags for ranking
1457 \param xpath_use use attribute for X-Path (-1 for no X-path)
1458 \param rset_nmem memory for result sets
1459 \param result_sets output result set for each term in list (output)
1460 \param num_result_sets number of output result sets
1461 \param kc rset key control to be used for created result sets
1462 */
search_terms_list(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char * termz,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,const char * index_type,int complete_flag,const char * rank_type,const char * xpath_use,NMEM rset_nmem,RSET ** result_sets,int * num_result_sets,struct rset_key_control * kc)1463 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1464 Z_AttributesPlusTerm *zapt,
1465 const char *termz,
1466 const Odr_oid *attributeSet,
1467 zint hits_limit,
1468 NMEM stream,
1469 const char *index_type, int complete_flag,
1470 const char *rank_type,
1471 const char *xpath_use,
1472 NMEM rset_nmem,
1473 RSET **result_sets, int *num_result_sets,
1474 struct rset_key_control *kc)
1475 {
1476 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1477 return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1478 stream, index_type, complete_flag,
1479 rank_type, xpath_use,
1480 rset_nmem, result_sets, num_result_sets,
1481 kc, zm);
1482 }
1483
1484
1485 /** \brief limit a search by position - returns result set
1486 */
search_position(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const Odr_oid * attributeSet,const char * index_type,NMEM rset_nmem,RSET * rset,struct rset_key_control * kc)1487 static ZEBRA_RES search_position(ZebraHandle zh,
1488 Z_AttributesPlusTerm *zapt,
1489 const Odr_oid *attributeSet,
1490 const char *index_type,
1491 NMEM rset_nmem,
1492 RSET *rset,
1493 struct rset_key_control *kc)
1494 {
1495 int position_value;
1496 AttrType position;
1497 int ord = -1;
1498 char ord_buf[32];
1499 char term_dict[100];
1500 int ord_len;
1501 char *val;
1502 ISAM_P isam_p;
1503 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1504
1505 attr_init_APT(&position, zapt, 3);
1506 position_value = attr_find(&position, NULL);
1507 switch(position_value)
1508 {
1509 case 3:
1510 case -1:
1511 return ZEBRA_OK;
1512 case 1:
1513 case 2:
1514 break;
1515 default:
1516 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1517 position_value);
1518 return ZEBRA_FAIL;
1519 }
1520
1521
1522 if (!zebra_maps_is_first_in_field(zm))
1523 {
1524 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1525 position_value);
1526 return ZEBRA_FAIL;
1527 }
1528
1529 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1530 attributeSet, &ord) != ZEBRA_OK)
1531 {
1532 return ZEBRA_FAIL;
1533 }
1534 ord_len = key_SU_encode(ord, ord_buf);
1535 memcpy(term_dict, ord_buf, ord_len);
1536 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1537 val = dict_lookup(zh->reg->dict, term_dict);
1538 if (val)
1539 {
1540 assert(*val == sizeof(ISAM_P));
1541 memcpy(&isam_p, val+1, sizeof(isam_p));
1542
1543 *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1544 isam_p, 0);
1545 }
1546 return ZEBRA_OK;
1547 }
1548
1549 /** \brief returns result set for phrase search
1550 */
rpn_search_APT_phrase(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char * termz_org,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,const char * index_type,int complete_flag,const char * rank_type,const char * xpath_use,NMEM rset_nmem,RSET * rset,struct rset_key_control * kc)1551 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1552 Z_AttributesPlusTerm *zapt,
1553 const char *termz_org,
1554 const Odr_oid *attributeSet,
1555 zint hits_limit,
1556 NMEM stream,
1557 const char *index_type,
1558 int complete_flag,
1559 const char *rank_type,
1560 const char *xpath_use,
1561 NMEM rset_nmem,
1562 RSET *rset,
1563 struct rset_key_control *kc)
1564 {
1565 RSET *result_sets = 0;
1566 int num_result_sets = 0;
1567 ZEBRA_RES res =
1568 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1569 stream, index_type, complete_flag,
1570 rank_type, xpath_use,
1571 rset_nmem,
1572 &result_sets, &num_result_sets, kc);
1573
1574 if (res != ZEBRA_OK)
1575 return res;
1576
1577 if (num_result_sets > 0)
1578 {
1579 RSET first_set = 0;
1580 res = search_position(zh, zapt, attributeSet,
1581 index_type,
1582 rset_nmem, &first_set,
1583 kc);
1584 if (res != ZEBRA_OK)
1585 {
1586 int i;
1587 for (i = 0; i < num_result_sets; i++)
1588 rset_delete(result_sets[i]);
1589 return res;
1590 }
1591 if (first_set)
1592 {
1593 RSET *nsets = nmem_malloc(stream,
1594 sizeof(RSET) * (num_result_sets+1));
1595 nsets[0] = first_set;
1596 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1597 result_sets = nsets;
1598 num_result_sets++;
1599 }
1600 }
1601 if (num_result_sets == 0)
1602 *rset = rset_create_null(rset_nmem, kc, 0);
1603 else if (num_result_sets == 1)
1604 *rset = result_sets[0];
1605 else
1606 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1607 num_result_sets, result_sets,
1608 1 /* ordered */, 0 /* exclusion */,
1609 3 /* relation */, 1 /* distance */);
1610 if (!*rset)
1611 return ZEBRA_FAIL;
1612 return ZEBRA_OK;
1613 }
1614
1615 /** \brief returns result set for or-list search
1616 */
rpn_search_APT_or_list(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char * termz_org,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,const char * index_type,int complete_flag,const char * rank_type,const char * xpath_use,NMEM rset_nmem,RSET * rset,struct rset_key_control * kc)1617 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1618 Z_AttributesPlusTerm *zapt,
1619 const char *termz_org,
1620 const Odr_oid *attributeSet,
1621 zint hits_limit,
1622 NMEM stream,
1623 const char *index_type,
1624 int complete_flag,
1625 const char *rank_type,
1626 const char *xpath_use,
1627 NMEM rset_nmem,
1628 RSET *rset,
1629 struct rset_key_control *kc)
1630 {
1631 RSET *result_sets = 0;
1632 int num_result_sets = 0;
1633 int i;
1634 ZEBRA_RES res =
1635 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1636 stream, index_type, complete_flag,
1637 rank_type, xpath_use,
1638 rset_nmem,
1639 &result_sets, &num_result_sets, kc);
1640 if (res != ZEBRA_OK)
1641 return res;
1642
1643 for (i = 0; i < num_result_sets; i++)
1644 {
1645 RSET first_set = 0;
1646 res = search_position(zh, zapt, attributeSet,
1647 index_type,
1648 rset_nmem, &first_set,
1649 kc);
1650 if (res != ZEBRA_OK)
1651 {
1652 for (i = 0; i < num_result_sets; i++)
1653 rset_delete(result_sets[i]);
1654 return res;
1655 }
1656
1657 if (first_set)
1658 {
1659 RSET tmp_set[2];
1660
1661 tmp_set[0] = first_set;
1662 tmp_set[1] = result_sets[i];
1663
1664 result_sets[i] = rset_create_prox(
1665 rset_nmem, kc, kc->scope,
1666 2, tmp_set,
1667 1 /* ordered */, 0 /* exclusion */,
1668 3 /* relation */, 1 /* distance */);
1669 }
1670 }
1671 if (num_result_sets == 0)
1672 *rset = rset_create_null(rset_nmem, kc, 0);
1673 else if (num_result_sets == 1)
1674 *rset = result_sets[0];
1675 else
1676 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1677 num_result_sets, result_sets);
1678 if (!*rset)
1679 return ZEBRA_FAIL;
1680 return ZEBRA_OK;
1681 }
1682
1683 /** \brief returns result set for and-list search
1684 */
rpn_search_APT_and_list(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char * termz_org,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,const char * index_type,int complete_flag,const char * rank_type,const char * xpath_use,NMEM rset_nmem,RSET * rset,struct rset_key_control * kc)1685 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1686 Z_AttributesPlusTerm *zapt,
1687 const char *termz_org,
1688 const Odr_oid *attributeSet,
1689 zint hits_limit,
1690 NMEM stream,
1691 const char *index_type,
1692 int complete_flag,
1693 const char *rank_type,
1694 const char *xpath_use,
1695 NMEM rset_nmem,
1696 RSET *rset,
1697 struct rset_key_control *kc)
1698 {
1699 RSET *result_sets = 0;
1700 int num_result_sets = 0;
1701 int i;
1702 ZEBRA_RES res =
1703 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1704 stream, index_type, complete_flag,
1705 rank_type, xpath_use,
1706 rset_nmem,
1707 &result_sets, &num_result_sets,
1708 kc);
1709 if (res != ZEBRA_OK)
1710 return res;
1711 for (i = 0; i < num_result_sets; i++)
1712 {
1713 RSET first_set = 0;
1714 res = search_position(zh, zapt, attributeSet,
1715 index_type,
1716 rset_nmem, &first_set,
1717 kc);
1718 if (res != ZEBRA_OK)
1719 {
1720 for (i = 0; i < num_result_sets; i++)
1721 rset_delete(result_sets[i]);
1722 return res;
1723 }
1724
1725 if (first_set)
1726 {
1727 RSET tmp_set[2];
1728
1729 tmp_set[0] = first_set;
1730 tmp_set[1] = result_sets[i];
1731
1732 result_sets[i] = rset_create_prox(
1733 rset_nmem, kc, kc->scope,
1734 2, tmp_set,
1735 1 /* ordered */, 0 /* exclusion */,
1736 3 /* relation */, 1 /* distance */);
1737 }
1738 }
1739
1740
1741 if (num_result_sets == 0)
1742 *rset = rset_create_null(rset_nmem, kc, 0);
1743 else if (num_result_sets == 1)
1744 *rset = result_sets[0];
1745 else
1746 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1747 num_result_sets, result_sets);
1748 if (!*rset)
1749 return ZEBRA_FAIL;
1750 return ZEBRA_OK;
1751 }
1752
numeric_relation(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char ** term_sub,WRBUF term_dict,const Odr_oid * attributeSet,struct grep_info * grep_info,int * max_pos,zebra_map_t zm,WRBUF display_term,int * error_code)1753 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1754 const char **term_sub,
1755 WRBUF term_dict,
1756 const Odr_oid *attributeSet,
1757 struct grep_info *grep_info,
1758 int *max_pos,
1759 zebra_map_t zm,
1760 WRBUF display_term,
1761 int *error_code)
1762 {
1763 AttrType relation;
1764 int relation_value;
1765 int term_value;
1766 int r;
1767 WRBUF term_num = wrbuf_alloc();
1768
1769 *error_code = 0;
1770 attr_init_APT(&relation, zapt, 2);
1771 relation_value = attr_find(&relation, NULL);
1772
1773 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1774
1775 switch (relation_value)
1776 {
1777 case 1:
1778 yaz_log(log_level_rpn, "Relation <");
1779 if (!term_100(zm, term_sub, term_num, 1, display_term))
1780 {
1781 wrbuf_destroy(term_num);
1782 return 0;
1783 }
1784 term_value = atoi(wrbuf_cstr(term_num));
1785 gen_regular_rel(term_dict, term_value-1, 1);
1786 break;
1787 case 2:
1788 yaz_log(log_level_rpn, "Relation <=");
1789 if (!term_100(zm, term_sub, term_num, 1, display_term))
1790 {
1791 wrbuf_destroy(term_num);
1792 return 0;
1793 }
1794 term_value = atoi(wrbuf_cstr(term_num));
1795 gen_regular_rel(term_dict, term_value, 1);
1796 break;
1797 case 4:
1798 yaz_log(log_level_rpn, "Relation >=");
1799 if (!term_100(zm, term_sub, term_num, 1, display_term))
1800 {
1801 wrbuf_destroy(term_num);
1802 return 0;
1803 }
1804 term_value = atoi(wrbuf_cstr(term_num));
1805 gen_regular_rel(term_dict, term_value, 0);
1806 break;
1807 case 5:
1808 yaz_log(log_level_rpn, "Relation >");
1809 if (!term_100(zm, term_sub, term_num, 1, display_term))
1810 {
1811 wrbuf_destroy(term_num);
1812 return 0;
1813 }
1814 term_value = atoi(wrbuf_cstr(term_num));
1815 gen_regular_rel(term_dict, term_value+1, 0);
1816 break;
1817 case -1:
1818 case 3:
1819 yaz_log(log_level_rpn, "Relation =");
1820 if (!term_100(zm, term_sub, term_num, 1, display_term))
1821 {
1822 wrbuf_destroy(term_num);
1823 return 0;
1824 }
1825 term_value = atoi(wrbuf_cstr(term_num));
1826 wrbuf_printf(term_dict, "(0*%d)", term_value);
1827 break;
1828 case 103:
1829 /* term_tmp untouched.. */
1830 while (**term_sub != '\0')
1831 (*term_sub)++;
1832 break;
1833 default:
1834 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1835 wrbuf_destroy(term_num);
1836 return 0;
1837 }
1838 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1839 0, grep_info, max_pos, 0, grep_handle);
1840
1841 if (r == 1)
1842 zebra_set_partial_result(zh);
1843 else if (r)
1844 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1845 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1846 wrbuf_destroy(term_num);
1847 return 1;
1848 }
1849
numeric_term(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char ** term_sub,WRBUF term_dict,const Odr_oid * attributeSet,NMEM stream,struct grep_info * grep_info,const char * index_type,int complete_flag,WRBUF display_term,const char * xpath_use,struct ord_list ** ol)1850 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1851 const char **term_sub,
1852 WRBUF term_dict,
1853 const Odr_oid *attributeSet, NMEM stream,
1854 struct grep_info *grep_info,
1855 const char *index_type, int complete_flag,
1856 WRBUF display_term,
1857 const char *xpath_use,
1858 struct ord_list **ol)
1859 {
1860 const char *termp;
1861 struct rpn_char_map_info rcmi;
1862 int max_pos;
1863 int relation_error = 0;
1864 int ord, ord_len, i;
1865 char ord_buf[32];
1866 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1867
1868 *ol = ord_list_create(stream);
1869
1870 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1871
1872 termp = *term_sub;
1873
1874 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1875 attributeSet, &ord) != ZEBRA_OK)
1876 {
1877 return ZEBRA_FAIL;
1878 }
1879
1880 wrbuf_rewind(term_dict);
1881
1882 *ol = ord_list_append(stream, *ol, ord);
1883
1884 ord_len = key_SU_encode(ord, ord_buf);
1885
1886 wrbuf_putc(term_dict, '(');
1887 for (i = 0; i < ord_len; i++)
1888 {
1889 wrbuf_putc(term_dict, 1);
1890 wrbuf_putc(term_dict, ord_buf[i]);
1891 }
1892 wrbuf_putc(term_dict, ')');
1893
1894 if (!numeric_relation(zh, zapt, &termp, term_dict,
1895 attributeSet, grep_info, &max_pos, zm,
1896 display_term, &relation_error))
1897 {
1898 if (relation_error)
1899 {
1900 zebra_setError(zh, relation_error, 0);
1901 return ZEBRA_FAIL;
1902 }
1903 *term_sub = 0;
1904 return ZEBRA_OK;
1905 }
1906 *term_sub = termp;
1907 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1908 return ZEBRA_OK;
1909 }
1910
1911
rpn_search_APT_numeric(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char * termz,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,const char * index_type,int complete_flag,const char * rank_type,const char * xpath_use,NMEM rset_nmem,RSET * rset,struct rset_key_control * kc)1912 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1913 Z_AttributesPlusTerm *zapt,
1914 const char *termz,
1915 const Odr_oid *attributeSet,
1916 zint hits_limit,
1917 NMEM stream,
1918 const char *index_type,
1919 int complete_flag,
1920 const char *rank_type,
1921 const char *xpath_use,
1922 NMEM rset_nmem,
1923 RSET *rset,
1924 struct rset_key_control *kc)
1925 {
1926 const char *termp = termz;
1927 RSET *result_sets = 0;
1928 int num_result_sets = 0;
1929 ZEBRA_RES res;
1930 struct grep_info grep_info;
1931 int alloc_sets = 0;
1932 zint hits_limit_value = hits_limit;
1933 const char *term_ref_id_str = 0;
1934
1935 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1936 stream);
1937
1938 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1939 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1940 return ZEBRA_FAIL;
1941 while (1)
1942 {
1943 struct ord_list *ol;
1944 WRBUF term_dict = wrbuf_alloc();
1945 WRBUF display_term = wrbuf_alloc();
1946 if (alloc_sets == num_result_sets)
1947 {
1948 int add = 10;
1949 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1950 sizeof(*rnew));
1951 if (alloc_sets)
1952 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1953 alloc_sets = alloc_sets + add;
1954 result_sets = rnew;
1955 }
1956 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1957 grep_info.isam_p_indx = 0;
1958 res = numeric_term(zh, zapt, &termp, term_dict,
1959 attributeSet, stream, &grep_info,
1960 index_type, complete_flag,
1961 display_term, xpath_use, &ol);
1962 wrbuf_destroy(term_dict);
1963 if (res == ZEBRA_FAIL || termp == 0)
1964 {
1965 wrbuf_destroy(display_term);
1966 break;
1967 }
1968 yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1969 result_sets[num_result_sets] =
1970 rset_trunc(zh, grep_info.isam_p_buf,
1971 grep_info.isam_p_indx, wrbuf_buf(display_term),
1972 wrbuf_len(display_term), rank_type,
1973 0 /* preserve position */,
1974 zapt->term->which, rset_nmem,
1975 kc, kc->scope, ol, index_type,
1976 hits_limit_value,
1977 term_ref_id_str);
1978 wrbuf_destroy(display_term);
1979 if (!result_sets[num_result_sets])
1980 break;
1981 num_result_sets++;
1982 if (!*termp)
1983 break;
1984 }
1985 grep_info_delete(&grep_info);
1986
1987 if (res != ZEBRA_OK)
1988 return res;
1989 if (num_result_sets == 0)
1990 *rset = rset_create_null(rset_nmem, kc, 0);
1991 else if (num_result_sets == 1)
1992 *rset = result_sets[0];
1993 else
1994 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1995 num_result_sets, result_sets);
1996 if (!*rset)
1997 return ZEBRA_FAIL;
1998 return ZEBRA_OK;
1999 }
2000
rpn_search_APT_local(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char * termz,const Odr_oid * attributeSet,NMEM stream,const char * rank_type,NMEM rset_nmem,RSET * rset,struct rset_key_control * kc)2001 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
2002 Z_AttributesPlusTerm *zapt,
2003 const char *termz,
2004 const Odr_oid *attributeSet,
2005 NMEM stream,
2006 const char *rank_type, NMEM rset_nmem,
2007 RSET *rset,
2008 struct rset_key_control *kc)
2009 {
2010 Record rec;
2011 zint sysno = atozint(termz);
2012
2013 if (sysno <= 0)
2014 sysno = 0;
2015 rec = rec_get(zh->reg->records, sysno);
2016 if (!rec)
2017 sysno = 0;
2018
2019 rec_free(&rec);
2020
2021 if (sysno <= 0)
2022 {
2023 *rset = rset_create_null(rset_nmem, kc, 0);
2024 }
2025 else
2026 {
2027 RSFD rsfd;
2028 struct it_key key;
2029 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
2030 res_get(zh->res, "setTmpDir"), 0);
2031 rsfd = rset_open(*rset, RSETF_WRITE);
2032
2033 key.mem[0] = sysno;
2034 key.mem[1] = 1;
2035 key.len = 2;
2036 rset_write(rsfd, &key);
2037 rset_close(rsfd);
2038 }
2039 return ZEBRA_OK;
2040 }
2041
rpn_sort_spec(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const Odr_oid * attributeSet,NMEM stream,Z_SortKeySpecList * sort_sequence,const char * rank_type,NMEM rset_nmem,RSET * rset,struct rset_key_control * kc)2042 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2043 const Odr_oid *attributeSet, NMEM stream,
2044 Z_SortKeySpecList *sort_sequence,
2045 const char *rank_type,
2046 NMEM rset_nmem,
2047 RSET *rset,
2048 struct rset_key_control *kc)
2049 {
2050 int i;
2051 int sort_relation_value;
2052 AttrType sort_relation_type;
2053 Z_SortKeySpec *sks;
2054 Z_SortKey *sk;
2055 char termz[20];
2056
2057 attr_init_APT(&sort_relation_type, zapt, 7);
2058 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2059
2060 if (!sort_sequence->specs)
2061 {
2062 sort_sequence->num_specs = 10;
2063 sort_sequence->specs = (Z_SortKeySpec **)
2064 nmem_malloc(stream, sort_sequence->num_specs *
2065 sizeof(*sort_sequence->specs));
2066 for (i = 0; i < sort_sequence->num_specs; i++)
2067 sort_sequence->specs[i] = 0;
2068 }
2069 if (zapt->term->which != Z_Term_general)
2070 i = 0;
2071 else
2072 i = atoi_n((char *) zapt->term->u.general->buf,
2073 zapt->term->u.general->len);
2074 if (i >= sort_sequence->num_specs)
2075 i = 0;
2076 sprintf(termz, "%d", i);
2077
2078 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2079 sks->sortElement = (Z_SortElement *)
2080 nmem_malloc(stream, sizeof(*sks->sortElement));
2081 sks->sortElement->which = Z_SortElement_generic;
2082 sk = sks->sortElement->u.generic = (Z_SortKey *)
2083 nmem_malloc(stream, sizeof(*sk));
2084 sk->which = Z_SortKey_sortAttributes;
2085 sk->u.sortAttributes = (Z_SortAttributes *)
2086 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2087
2088 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
2089 sk->u.sortAttributes->list = zapt->attributes;
2090
2091 sks->sortRelation = (Odr_int *)
2092 nmem_malloc(stream, sizeof(*sks->sortRelation));
2093 if (sort_relation_value == 1)
2094 *sks->sortRelation = Z_SortKeySpec_ascending;
2095 else if (sort_relation_value == 2)
2096 *sks->sortRelation = Z_SortKeySpec_descending;
2097 else
2098 *sks->sortRelation = Z_SortKeySpec_ascending;
2099
2100 sks->caseSensitivity = (Odr_int *)
2101 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2102 *sks->caseSensitivity = 0;
2103
2104 sks->which = Z_SortKeySpec_null;
2105 sks->u.null = odr_nullval ();
2106 sort_sequence->specs[i] = sks;
2107 *rset = rset_create_null(rset_nmem, kc, 0);
2108 return ZEBRA_OK;
2109 }
2110
2111
rpn_check_xpath(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const Odr_oid * attributeSet,struct xpath_location_step * xpath,int max,NMEM mem)2112 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2113 const Odr_oid *attributeSet,
2114 struct xpath_location_step *xpath, int max,
2115 NMEM mem)
2116 {
2117 const Odr_oid *curAttributeSet = attributeSet;
2118 AttrType use;
2119 const char *use_string = 0;
2120
2121 attr_init_APT(&use, zapt, 1);
2122 attr_find_ex(&use, &curAttributeSet, &use_string);
2123
2124 if (!use_string || *use_string != '/')
2125 return -1;
2126
2127 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2128 }
2129
2130
2131
xpath_trunc(ZebraHandle zh,NMEM stream,const char * index_type,const char * term,const char * xpath_use,NMEM rset_nmem,struct rset_key_control * kc)2132 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2133 const char *index_type, const char *term,
2134 const char *xpath_use,
2135 NMEM rset_nmem,
2136 struct rset_key_control *kc)
2137 {
2138 struct grep_info grep_info;
2139 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2140 zinfo_index_category_index,
2141 index_type, xpath_use);
2142 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2143 return rset_create_null(rset_nmem, kc, 0);
2144
2145 if (ord < 0)
2146 return rset_create_null(rset_nmem, kc, 0);
2147 else
2148 {
2149 int i, max_pos;
2150 char ord_buf[32];
2151 RSET rset;
2152 WRBUF term_dict = wrbuf_alloc();
2153 int ord_len = key_SU_encode(ord, ord_buf);
2154 int term_type = Z_Term_characterString;
2155 const char *flags = "void";
2156
2157 wrbuf_putc(term_dict, '(');
2158 for (i = 0; i < ord_len; i++)
2159 {
2160 wrbuf_putc(term_dict, 1);
2161 wrbuf_putc(term_dict, ord_buf[i]);
2162 }
2163 wrbuf_putc(term_dict, ')');
2164 wrbuf_puts(term_dict, term);
2165
2166 grep_info.isam_p_indx = 0;
2167 dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2168 &grep_info, &max_pos, 0, grep_handle);
2169 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2170 grep_info.isam_p_indx);
2171 rset = rset_trunc(zh, grep_info.isam_p_buf,
2172 grep_info.isam_p_indx, term, strlen(term),
2173 flags, 1, term_type, rset_nmem,
2174 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2175 0 /* term_ref_id_str */);
2176 grep_info_delete(&grep_info);
2177 wrbuf_destroy(term_dict);
2178 return rset;
2179 }
2180 }
2181
2182 static
rpn_search_xpath(ZebraHandle zh,NMEM stream,const char * rank_type,RSET rset,int xpath_len,struct xpath_location_step * xpath,NMEM rset_nmem,RSET * rset_out,struct rset_key_control * kc)2183 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2184 NMEM stream, const char *rank_type, RSET rset,
2185 int xpath_len, struct xpath_location_step *xpath,
2186 NMEM rset_nmem,
2187 RSET *rset_out,
2188 struct rset_key_control *kc)
2189 {
2190 int i;
2191 int always_matches = rset ? 0 : 1;
2192
2193 if (xpath_len < 0)
2194 {
2195 *rset_out = rset;
2196 return ZEBRA_OK;
2197 }
2198
2199 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2200 for (i = 0; i < xpath_len; i++)
2201 {
2202 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2203
2204 }
2205
2206 /*
2207 //a -> a/.*
2208 //a/b -> b/a/.*
2209 /a -> a/
2210 /a/b -> b/a/
2211
2212 / -> none
2213
2214 a[@attr = value]/b[@other = othervalue]
2215
2216 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2217 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2218 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2219 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2220 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2221 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2222
2223 */
2224
2225 dict_grep_cmap(zh->reg->dict, 0, 0);
2226
2227 {
2228 int level = xpath_len;
2229 int first_path = 1;
2230
2231 while (--level >= 0)
2232 {
2233 WRBUF xpath_rev = wrbuf_alloc();
2234 int i;
2235 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2236
2237 for (i = level; i >= 1; --i)
2238 {
2239 const char *cp = xpath[i].part;
2240 if (*cp)
2241 {
2242 for (; *cp; cp++)
2243 {
2244 if (*cp == '*')
2245 wrbuf_puts(xpath_rev, "[^/]*");
2246 else if (*cp == ' ')
2247 wrbuf_puts(xpath_rev, "\001 ");
2248 else
2249 wrbuf_putc(xpath_rev, *cp);
2250
2251 /* wrbuf_putc does not null-terminate , but
2252 wrbuf_puts below ensures it does.. so xpath_rev
2253 is OK iff length is > 0 */
2254 }
2255 wrbuf_puts(xpath_rev, "/");
2256 }
2257 else if (i == 1) /* // case */
2258 wrbuf_puts(xpath_rev, ".*");
2259 }
2260 if (xpath[level].predicate &&
2261 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2262 xpath[level].predicate->u.relation.name[0])
2263 {
2264 WRBUF wbuf = wrbuf_alloc();
2265 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2266 if (xpath[level].predicate->u.relation.value)
2267 {
2268 const char *cp = xpath[level].predicate->u.relation.value;
2269 wrbuf_putc(wbuf, '=');
2270
2271 while (*cp)
2272 {
2273 if (strchr(REGEX_CHARS, *cp))
2274 wrbuf_putc(wbuf, '\\');
2275 wrbuf_putc(wbuf, *cp);
2276 cp++;
2277 }
2278 }
2279 rset_attr = xpath_trunc(
2280 zh, stream, "0", wrbuf_cstr(wbuf),
2281 ZEBRA_XPATH_ATTR_NAME,
2282 rset_nmem, kc);
2283 wrbuf_destroy(wbuf);
2284 }
2285 else
2286 {
2287 if (!first_path)
2288 {
2289 wrbuf_destroy(xpath_rev);
2290 continue;
2291 }
2292 }
2293 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2294 wrbuf_cstr(xpath_rev));
2295 if (wrbuf_len(xpath_rev))
2296 {
2297 rset_start_tag = xpath_trunc(zh, stream, "0",
2298 wrbuf_cstr(xpath_rev),
2299 ZEBRA_XPATH_ELM_BEGIN,
2300 rset_nmem, kc);
2301 if (always_matches)
2302 rset = rset_start_tag;
2303 else
2304 {
2305 rset_end_tag = xpath_trunc(zh, stream, "0",
2306 wrbuf_cstr(xpath_rev),
2307 ZEBRA_XPATH_ELM_END,
2308 rset_nmem, kc);
2309
2310 rset = rset_create_between(rset_nmem, kc, kc->scope,
2311 rset_start_tag, rset,
2312 rset_end_tag, rset_attr);
2313 }
2314 }
2315 wrbuf_destroy(xpath_rev);
2316 first_path = 0;
2317 }
2318 }
2319 *rset_out = rset;
2320 return ZEBRA_OK;
2321 }
2322
2323 #define MAX_XPATH_STEPS 10
2324
2325 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2326 Z_AttributesPlusTerm *zapt,
2327 const Odr_oid *attributeSet,
2328 zint hits_limit, NMEM stream,
2329 Z_SortKeySpecList *sort_sequence,
2330 NMEM rset_nmem,
2331 RSET *rset,
2332 struct rset_key_control *kc);
2333
rpn_search_APT(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,Z_SortKeySpecList * sort_sequence,int num_bases,const char ** basenames,NMEM rset_nmem,RSET * rset,struct rset_key_control * kc)2334 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2335 const Odr_oid *attributeSet,
2336 zint hits_limit, NMEM stream,
2337 Z_SortKeySpecList *sort_sequence,
2338 int num_bases, const char **basenames,
2339 NMEM rset_nmem,
2340 RSET *rset,
2341 struct rset_key_control *kc)
2342 {
2343 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2344 ZEBRA_RES res = ZEBRA_OK;
2345 int i;
2346 for (i = 0; i < num_bases; i++)
2347 {
2348
2349 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2350 {
2351 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2352 basenames[i]);
2353 res = ZEBRA_FAIL;
2354 break;
2355 }
2356 res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2357 sort_sequence,
2358 rset_nmem, rsets+i, kc);
2359 if (res != ZEBRA_OK)
2360 break;
2361 }
2362 if (res != ZEBRA_OK)
2363 { /* must clean up the already created sets */
2364 while (--i >= 0)
2365 rset_delete(rsets[i]);
2366 *rset = 0;
2367 }
2368 else
2369 {
2370 if (num_bases == 1)
2371 *rset = rsets[0];
2372 else if (num_bases == 0)
2373 *rset = rset_create_null(rset_nmem, kc, 0);
2374 else
2375 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2376 num_bases, rsets);
2377 }
2378 return res;
2379 }
2380
rpn_search_database(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,Z_SortKeySpecList * sort_sequence,NMEM rset_nmem,RSET * rset,struct rset_key_control * kc)2381 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2382 Z_AttributesPlusTerm *zapt,
2383 const Odr_oid *attributeSet,
2384 zint hits_limit, NMEM stream,
2385 Z_SortKeySpecList *sort_sequence,
2386 NMEM rset_nmem,
2387 RSET *rset,
2388 struct rset_key_control *kc)
2389 {
2390 ZEBRA_RES res = ZEBRA_OK;
2391 const char *index_type;
2392 char *search_type = NULL;
2393 char rank_type[128];
2394 int complete_flag;
2395 int sort_flag;
2396 char termz[IT_MAX_WORD+1];
2397 int xpath_len;
2398 const char *xpath_use = 0;
2399 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2400
2401 if (!log_level_set)
2402 {
2403 log_level_rpn = yaz_log_module_level("rpn");
2404 log_level_set = 1;
2405 }
2406 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2407 rank_type, &complete_flag, &sort_flag);
2408
2409 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2410 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2411 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2412 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2413
2414 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2415 return ZEBRA_FAIL;
2416
2417 if (sort_flag)
2418 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2419 rank_type, rset_nmem, rset, kc);
2420 /* consider if an X-Path query is used */
2421 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2422 xpath, MAX_XPATH_STEPS, stream);
2423 if (xpath_len >= 0)
2424 {
2425 if (xpath[xpath_len-1].part[0] == '@')
2426 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2427 else
2428 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2429
2430 if (1)
2431 {
2432 AttrType relation;
2433 int relation_value;
2434
2435 attr_init_APT(&relation, zapt, 2);
2436 relation_value = attr_find(&relation, NULL);
2437
2438 if (relation_value == 103) /* alwaysmatches */
2439 {
2440 *rset = 0; /* signal no "term" set */
2441 return rpn_search_xpath(zh, stream, rank_type, *rset,
2442 xpath_len, xpath, rset_nmem, rset, kc);
2443 }
2444 }
2445 }
2446
2447 /* search using one of the various search type strategies
2448 termz is our UTF-8 search term
2449 attributeSet is top-level default attribute set
2450 stream is ODR for search
2451 reg_id is the register type
2452 complete_flag is 1 for complete subfield, 0 for incomplete
2453 xpath_use is use-attribute to be used for X-Path search, 0 for none
2454 */
2455 if (!strcmp(search_type, "phrase"))
2456 {
2457 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2458 stream,
2459 index_type, complete_flag, rank_type,
2460 xpath_use,
2461 rset_nmem,
2462 rset, kc);
2463 }
2464 else if (!strcmp(search_type, "and-list"))
2465 {
2466 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2467 stream,
2468 index_type, complete_flag, rank_type,
2469 xpath_use,
2470 rset_nmem,
2471 rset, kc);
2472 }
2473 else if (!strcmp(search_type, "or-list"))
2474 {
2475 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2476 stream,
2477 index_type, complete_flag, rank_type,
2478 xpath_use,
2479 rset_nmem,
2480 rset, kc);
2481 }
2482 else if (!strcmp(search_type, "local"))
2483 {
2484 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2485 rank_type, rset_nmem, rset, kc);
2486 }
2487 else if (!strcmp(search_type, "numeric"))
2488 {
2489 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit,
2490 stream,
2491 index_type, complete_flag, rank_type,
2492 xpath_use,
2493 rset_nmem,
2494 rset, kc);
2495 }
2496 else
2497 {
2498 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2499 res = ZEBRA_FAIL;
2500 }
2501 if (res != ZEBRA_OK)
2502 return res;
2503 if (!*rset)
2504 return ZEBRA_FAIL;
2505 return rpn_search_xpath(zh, stream, rank_type, *rset,
2506 xpath_len, xpath, rset_nmem, rset, kc);
2507 }
2508
2509 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2510 const Odr_oid *attributeSet,
2511 zint hits_limit,
2512 NMEM stream, NMEM rset_nmem,
2513 Z_SortKeySpecList *sort_sequence,
2514 int num_bases, const char **basenames,
2515 RSET **result_sets, int *num_result_sets,
2516 Z_Operator *parent_op,
2517 struct rset_key_control *kc);
2518
rpn_get_top_approx_limit(ZebraHandle zh,Z_RPNStructure * zs,zint * approx_limit)2519 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2520 zint *approx_limit)
2521 {
2522 ZEBRA_RES res = ZEBRA_OK;
2523 if (zs->which == Z_RPNStructure_complex)
2524 {
2525 if (res == ZEBRA_OK)
2526 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2527 approx_limit);
2528 if (res == ZEBRA_OK)
2529 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2530 approx_limit);
2531 }
2532 else if (zs->which == Z_RPNStructure_simple)
2533 {
2534 if (zs->u.simple->which == Z_Operand_APT)
2535 {
2536 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2537 AttrType global_hits_limit_attr;
2538 int l;
2539
2540 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2541
2542 l = attr_find(&global_hits_limit_attr, NULL);
2543 if (l != -1)
2544 *approx_limit = l;
2545 }
2546 }
2547 return res;
2548 }
2549
rpn_search_top(ZebraHandle zh,Z_RPNStructure * zs,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,NMEM rset_nmem,Z_SortKeySpecList * sort_sequence,int num_bases,const char ** basenames,RSET * result_set)2550 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2551 const Odr_oid *attributeSet,
2552 zint hits_limit,
2553 NMEM stream, NMEM rset_nmem,
2554 Z_SortKeySpecList *sort_sequence,
2555 int num_bases, const char **basenames,
2556 RSET *result_set)
2557 {
2558 RSET *result_sets = 0;
2559 int num_result_sets = 0;
2560 ZEBRA_RES res;
2561 struct rset_key_control *kc = zebra_key_control_create(zh);
2562
2563 res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2564 stream, rset_nmem,
2565 sort_sequence,
2566 num_bases, basenames,
2567 &result_sets, &num_result_sets,
2568 0 /* no parent op */,
2569 kc);
2570 if (res != ZEBRA_OK)
2571 {
2572 int i;
2573 for (i = 0; i < num_result_sets; i++)
2574 rset_delete(result_sets[i]);
2575 *result_set = 0;
2576 }
2577 else
2578 {
2579 assert(num_result_sets == 1);
2580 assert(result_sets);
2581 assert(*result_sets);
2582 *result_set = *result_sets;
2583 }
2584 (*kc->dec)(kc);
2585 return res;
2586 }
2587
rpn_search_structure(ZebraHandle zh,Z_RPNStructure * zs,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,NMEM rset_nmem,Z_SortKeySpecList * sort_sequence,int num_bases,const char ** basenames,RSET ** result_sets,int * num_result_sets,Z_Operator * parent_op,struct rset_key_control * kc)2588 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2589 const Odr_oid *attributeSet, zint hits_limit,
2590 NMEM stream, NMEM rset_nmem,
2591 Z_SortKeySpecList *sort_sequence,
2592 int num_bases, const char **basenames,
2593 RSET **result_sets, int *num_result_sets,
2594 Z_Operator *parent_op,
2595 struct rset_key_control *kc)
2596 {
2597 *num_result_sets = 0;
2598 if (zs->which == Z_RPNStructure_complex)
2599 {
2600 ZEBRA_RES res;
2601 Z_Operator *zop = zs->u.complex->roperator;
2602 RSET *result_sets_l = 0;
2603 int num_result_sets_l = 0;
2604 RSET *result_sets_r = 0;
2605 int num_result_sets_r = 0;
2606
2607 res = rpn_search_structure(zh, zs->u.complex->s1,
2608 attributeSet, hits_limit, stream, rset_nmem,
2609 sort_sequence,
2610 num_bases, basenames,
2611 &result_sets_l, &num_result_sets_l,
2612 zop, kc);
2613 if (res != ZEBRA_OK)
2614 {
2615 int i;
2616 for (i = 0; i < num_result_sets_l; i++)
2617 rset_delete(result_sets_l[i]);
2618 return res;
2619 }
2620 res = rpn_search_structure(zh, zs->u.complex->s2,
2621 attributeSet, hits_limit, stream, rset_nmem,
2622 sort_sequence,
2623 num_bases, basenames,
2624 &result_sets_r, &num_result_sets_r,
2625 zop, kc);
2626 if (res != ZEBRA_OK)
2627 {
2628 int i;
2629 for (i = 0; i < num_result_sets_l; i++)
2630 rset_delete(result_sets_l[i]);
2631 for (i = 0; i < num_result_sets_r; i++)
2632 rset_delete(result_sets_r[i]);
2633 return res;
2634 }
2635
2636 /* make a new list of result for all children */
2637 *num_result_sets = num_result_sets_l + num_result_sets_r;
2638 *result_sets = nmem_malloc(stream, *num_result_sets *
2639 sizeof(**result_sets));
2640 memcpy(*result_sets, result_sets_l,
2641 num_result_sets_l * sizeof(**result_sets));
2642 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2643 num_result_sets_r * sizeof(**result_sets));
2644
2645 if (!parent_op || parent_op->which != zop->which
2646 || (zop->which != Z_Operator_and &&
2647 zop->which != Z_Operator_or))
2648 {
2649 /* parent node different from this one (or non-present) */
2650 /* we must combine result sets now */
2651 RSET rset;
2652 switch (zop->which)
2653 {
2654 case Z_Operator_and:
2655 rset = rset_create_and(rset_nmem, kc,
2656 kc->scope,
2657 *num_result_sets, *result_sets);
2658 break;
2659 case Z_Operator_or:
2660 rset = rset_create_or(rset_nmem, kc,
2661 kc->scope, 0, /* termid */
2662 *num_result_sets, *result_sets);
2663 break;
2664 case Z_Operator_and_not:
2665 rset = rset_create_not(rset_nmem, kc,
2666 kc->scope,
2667 (*result_sets)[0],
2668 (*result_sets)[1]);
2669 break;
2670 case Z_Operator_prox:
2671 if (zop->u.prox->which != Z_ProximityOperator_known)
2672 {
2673 zebra_setError(zh,
2674 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2675 0);
2676 return ZEBRA_FAIL;
2677 }
2678 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2679 {
2680 zebra_setError_zint(zh,
2681 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2682 *zop->u.prox->u.known);
2683 return ZEBRA_FAIL;
2684 }
2685 else
2686 {
2687 rset = rset_create_prox(rset_nmem, kc,
2688 kc->scope,
2689 *num_result_sets, *result_sets,
2690 *zop->u.prox->ordered,
2691 (!zop->u.prox->exclusion ?
2692 0 : *zop->u.prox->exclusion),
2693 *zop->u.prox->relationType,
2694 *zop->u.prox->distance );
2695 }
2696 break;
2697 default:
2698 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2699 return ZEBRA_FAIL;
2700 }
2701 *num_result_sets = 1;
2702 *result_sets = nmem_malloc(stream, *num_result_sets *
2703 sizeof(**result_sets));
2704 (*result_sets)[0] = rset;
2705 }
2706 }
2707 else if (zs->which == Z_RPNStructure_simple)
2708 {
2709 RSET rset;
2710 ZEBRA_RES res;
2711
2712 if (zs->u.simple->which == Z_Operand_APT)
2713 {
2714 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2715 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2716 attributeSet, hits_limit,
2717 stream, sort_sequence,
2718 num_bases, basenames, rset_nmem, &rset,
2719 kc);
2720 if (res != ZEBRA_OK)
2721 return res;
2722 }
2723 else if (zs->u.simple->which == Z_Operand_resultSetId)
2724 {
2725 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2726 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2727 if (!rset)
2728 {
2729 zebra_setError(zh,
2730 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2731 zs->u.simple->u.resultSetId);
2732 return ZEBRA_FAIL;
2733 }
2734 rset_dup(rset);
2735 }
2736 else
2737 {
2738 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2739 return ZEBRA_FAIL;
2740 }
2741 *num_result_sets = 1;
2742 *result_sets = nmem_malloc(stream, *num_result_sets *
2743 sizeof(**result_sets));
2744 (*result_sets)[0] = rset;
2745 }
2746 else
2747 {
2748 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2749 return ZEBRA_FAIL;
2750 }
2751 return ZEBRA_OK;
2752 }
2753
2754
2755
2756 /*
2757 * Local variables:
2758 * c-basic-offset: 4
2759 * c-file-style: "Stroustrup"
2760 * indent-tabs-mode: nil
2761 * End:
2762 * vim: shiftwidth=4 tabstop=8 expandtab
2763 */
2764
2765