1 /* This file is part of the Zebra server.
2    Copyright (C) 2004-2013 Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 
18 */
19 
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32 
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39 
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42 
43 #define TERMSET_DISABLE 1
44 
rpn_char_map_handler(void * vp,const char ** from,int len)45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63 
rpn_char_map_prepare(struct zebra_register * reg,zebra_map_t zm,struct rpn_char_map_info * map_info)64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = zm;
68     if (zebra_maps_is_icu(zm))
69         dict_grep_cmap(reg->dict, 0, 0);
70     else
71         dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
72 }
73 
74 #define TERM_COUNT
75 
76 struct grep_info {
77 #ifdef TERM_COUNT
78     int *term_no;
79 #endif
80     ISAM_P *isam_p_buf;
81     int isam_p_size;
82     int isam_p_indx;
83     int trunc_max;
84     ZebraHandle zh;
85     const char *index_type;
86     ZebraSet termset;
87 };
88 
add_isam_p(const char * name,const char * info,struct grep_info * p)89 static int add_isam_p(const char *name, const char *info,
90                       struct grep_info *p)
91 {
92     if (!log_level_set)
93     {
94         log_level_rpn = yaz_log_module_level("rpn");
95         log_level_set = 1;
96     }
97     /* we may have to stop this madness.. NOTE: -1 so that if
98        truncmax == trunxlimit we do *not* generate result sets */
99     if (p->isam_p_indx >= p->trunc_max - 1)
100         return 1;
101 
102     if (p->isam_p_indx == p->isam_p_size)
103     {
104         ISAM_P *new_isam_p_buf;
105 #ifdef TERM_COUNT
106         int *new_term_no;
107 #endif
108         p->isam_p_size = 2*p->isam_p_size + 100;
109         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110 					    p->isam_p_size);
111         if (p->isam_p_buf)
112         {
113             memcpy(new_isam_p_buf, p->isam_p_buf,
114                    p->isam_p_indx * sizeof(*p->isam_p_buf));
115             xfree(p->isam_p_buf);
116         }
117         p->isam_p_buf = new_isam_p_buf;
118 
119 #ifdef TERM_COUNT
120         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121         if (p->term_no)
122         {
123             memcpy(new_term_no, p->isam_p_buf,
124                    p->isam_p_indx * sizeof(*p->term_no));
125             xfree(p->term_no);
126         }
127         p->term_no = new_term_no;
128 #endif
129     }
130     assert(*info == sizeof(*p->isam_p_buf));
131     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
132 
133     if (p->termset)
134     {
135         const char *db;
136         char term_tmp[IT_MAX_WORD];
137         int ord = 0;
138         const char *index_name;
139         int len = key_SU_decode(&ord, (const unsigned char *) name);
140 
141         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143         zebraExplain_lookup_ord(p->zh->reg->zei,
144                                 ord, 0 /* index_type */, &db, &index_name);
145         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
146 
147         resultSetAddTerm(p->zh, p->termset, name[len], db,
148 			 index_name, term_tmp);
149     }
150     (p->isam_p_indx)++;
151     return 0;
152 }
153 
grep_handle(char * name,const char * info,void * p)154 static int grep_handle(char *name, const char *info, void *p)
155 {
156     return add_isam_p(name, info, (struct grep_info *) p);
157 }
158 
term_pre(zebra_map_t zm,const char ** src,const char * ct1,int first)159 static int term_pre(zebra_map_t zm, const char **src,
160 		    const char *ct1, int first)
161 {
162     const char *s1, *s0 = *src;
163     const char **map;
164 
165     /* skip white space */
166     while (*s0)
167     {
168         if (ct1 && strchr(ct1, *s0))
169             break;
170         s1 = s0;
171         map = zebra_maps_input(zm, &s1, strlen(s1), first);
172         if (**map != *CHR_SPACE)
173             break;
174         s0 = s1;
175     }
176     *src = s0;
177     return *s0;
178 }
179 
180 
esc_str(char * out_buf,size_t out_size,const char * in_buf,int in_size)181 static void esc_str(char *out_buf, size_t out_size,
182 		    const char *in_buf, int in_size)
183 {
184     int k;
185 
186     assert(out_buf);
187     assert(in_buf);
188     assert(out_size > 20);
189     *out_buf = '\0';
190     for (k = 0; k < in_size; k++)
191     {
192 	int c = in_buf[k] & 0xff;
193 	int pc;
194 	if (c < 32 || c > 126)
195 	    pc = '?';
196 	else
197 	    pc = c;
198 	sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
199 	if (strlen(out_buf) > out_size-20)
200 	{
201 	    strcat(out_buf, "..");
202 	    break;
203 	}
204     }
205 }
206 
207 #define REGEX_CHARS " ^[]()|.*+?!\"$\\"
208 
add_non_space(const char * start,const char * end,WRBUF term_dict,WRBUF display_term,const char ** map,int q_map_match)209 static void add_non_space(const char *start, const char *end,
210                           WRBUF term_dict,
211                           WRBUF display_term,
212                           const char **map, int q_map_match)
213 {
214     size_t sz = end - start;
215 
216     wrbuf_write(display_term, start, sz);
217     if (!q_map_match)
218     {
219         while (start < end)
220         {
221             if (strchr(REGEX_CHARS, *start))
222                 wrbuf_putc(term_dict, '\\');
223             wrbuf_putc(term_dict, *start);
224             start++;
225         }
226     }
227     else
228     {
229         char tmpbuf[80];
230         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
231 
232         wrbuf_puts(term_dict, map[0]);
233     }
234 }
235 
236 
237 /* ICU sort keys seem to be of the form
238    basechars \x01 accents \x01 length
239    For now we'll just right truncate from basechars . This
240    may give false hits due to accents not being used.
241 */
icu_basechars(const char * buf,size_t i)242 static size_t icu_basechars(const char *buf, size_t i)
243 {
244     while (i > 0 && buf[--i] != '\x01') /* skip length */
245         ;
246     while (i > 0 && buf[--i] != '\x01') /* skip accents */
247         ;
248     return i; /* only basechars left */
249 }
250 
term_102_icu(zebra_map_t zm,const char ** src,WRBUF term_dict,int space_split,WRBUF display_term)251 static int term_102_icu(zebra_map_t zm,
252                         const char **src, WRBUF term_dict, int space_split,
253                         WRBUF display_term)
254 {
255     int no_terms = 0;
256     const char *s0 = *src, *s1;
257     while (*s0 == ' ')
258         s0++;
259     s1 = s0;
260     for (;;)
261     {
262         if (*s1 == ' ' && space_split)
263             break;
264         else if (*s1 && !strchr(REGEX_CHARS "-", *s1))
265             s1++;
266         else
267         {
268             /* EOF or regex reserved char */
269             if (s0 != s1)
270             {
271                 const char *res_buf = 0;
272                 size_t res_len = 0;
273                 const char *display_buf;
274                 size_t display_len;
275 
276                 zebra_map_tokenize_start(zm, s0, s1 - s0);
277 
278                 if (zebra_map_tokenize_next(zm, &res_buf, &res_len,
279                                             &display_buf, &display_len))
280                 {
281                     size_t i;
282                     res_len = icu_basechars(res_buf, res_len);
283                     for (i = 0; i < res_len; i++)
284                     {
285                         if (strchr(REGEX_CHARS "\\", res_buf[i]))
286                             wrbuf_putc(term_dict, '\\');
287                         if (res_buf[i] < 32)
288                             wrbuf_putc(term_dict, '\x01');
289 
290                         wrbuf_putc(term_dict, res_buf[i]);
291                     }
292                     wrbuf_write(display_term, display_buf, display_len);
293 
294                     no_terms++;
295                 }
296             }
297             if (*s1 == '\0')
298                 break;
299 
300             wrbuf_putc(term_dict, *s1);
301             wrbuf_putc(display_term, *s1);
302 
303             s1++;
304             s0 = s1;
305         }
306     }
307     if (no_terms)
308         wrbuf_puts(term_dict, "\x01\x01.*");
309     *src = s1;
310     return no_terms;
311 }
312 
term_100_icu(zebra_map_t zm,const char ** src,WRBUF term_dict,int space_split,WRBUF display_term,int mode)313 static int term_100_icu(zebra_map_t zm,
314                         const char **src, WRBUF term_dict, int space_split,
315                         WRBUF display_term,
316                         int mode)
317 {
318     size_t i;
319     const char *res_buf = 0;
320     size_t res_len = 0;
321     const char *display_buf;
322     size_t display_len;
323     const char *s0 = *src, *s1;
324 
325     while (*s0 == ' ')
326         s0++;
327 
328     if (*s0 == '\0')
329         return 0;
330 
331     if (space_split)
332     {
333         s1 = s0;
334         while (*s1 && *s1 != ' ')
335             s1++;
336     }
337     else
338         s1 = s0 + strlen(s0);
339 
340     *src = s1;
341 
342     zebra_map_tokenize_start(zm, s0, s1 - s0);
343 
344     if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
345                                  &display_buf, &display_len))
346     {
347         return 0;
348     }
349     wrbuf_write(display_term, display_buf, display_len);
350     if (mode)
351     {
352         res_len = icu_basechars(res_buf, res_len);
353     }
354     if (mode & 2)
355         wrbuf_puts(term_dict, ".*");
356     for (i = 0; i < res_len; i++)
357     {
358         if (strchr(REGEX_CHARS "\\", res_buf[i]))
359             wrbuf_putc(term_dict, '\\');
360         if (res_buf[i] < 32)
361             wrbuf_putc(term_dict, '\x01');
362 
363         wrbuf_putc(term_dict, res_buf[i]);
364     }
365     if (mode & 1)
366         wrbuf_puts(term_dict, ".*");
367     else if (mode)
368         wrbuf_puts(term_dict, "\x01\x01.*");
369     return 1;
370 }
371 
372 /* term_100: handle term, where trunc = none(no operators at all) */
term_100(zebra_map_t zm,const char ** src,WRBUF term_dict,int space_split,WRBUF display_term)373 static int term_100(zebra_map_t zm,
374 		    const char **src, WRBUF term_dict, int space_split,
375 		    WRBUF display_term)
376 {
377     const char *s0;
378     const char **map;
379     int i = 0;
380 
381     const char *space_start = 0;
382     const char *space_end = 0;
383 
384     if (!term_pre(zm, src, 0, !space_split))
385         return 0;
386     s0 = *src;
387     while (*s0)
388     {
389         const char *s1 = s0;
390 	int q_map_match = 0;
391         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
392         if (space_split)
393         {
394             if (**map == *CHR_SPACE)
395                 break;
396         }
397         else  /* complete subfield only. */
398         {
399             if (**map == *CHR_SPACE)
400             {   /* save space mapping for later  .. */
401                 space_start = s1;
402                 space_end = s0;
403                 continue;
404             }
405             else if (space_start)
406             {   /* reload last space */
407                 while (space_start < space_end)
408                 {
409                     if (strchr(REGEX_CHARS, *space_start))
410                         wrbuf_putc(term_dict, '\\');
411                     wrbuf_putc(display_term, *space_start);
412                     wrbuf_putc(term_dict, *space_start);
413                     space_start++;
414 
415                 }
416                 /* and reset */
417                 space_start = space_end = 0;
418             }
419         }
420         i++;
421 
422         add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
423     }
424     *src = s0;
425     return i;
426 }
427 
428 /* term_101: handle term, where trunc = Process # */
term_101(zebra_map_t zm,const char ** src,WRBUF term_dict,int space_split,WRBUF display_term)429 static int term_101(zebra_map_t zm,
430 		    const char **src, WRBUF term_dict, int space_split,
431 		    WRBUF display_term)
432 {
433     const char *s0;
434     const char **map;
435     int i = 0;
436 
437     if (!term_pre(zm, src, "#", !space_split))
438         return 0;
439     s0 = *src;
440     while (*s0)
441     {
442         if (*s0 == '#')
443         {
444             i++;
445             wrbuf_puts(term_dict, ".*");
446             wrbuf_putc(display_term, *s0);
447             s0++;
448         }
449         else
450         {
451 	    const char *s1 = s0;
452 	    int q_map_match = 0;
453 	    map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
454             if (space_split && **map == *CHR_SPACE)
455                 break;
456 
457             i++;
458             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
459         }
460     }
461     *src = s0;
462     return i;
463 }
464 
465 /* term_103: handle term, where trunc = re-2 (regular expressions) */
term_103(zebra_map_t zm,const char ** src,WRBUF term_dict,int * errors,int space_split,WRBUF display_term)466 static int term_103(zebra_map_t zm, const char **src,
467 		    WRBUF term_dict, int *errors, int space_split,
468 		    WRBUF display_term)
469 {
470     int i = 0;
471     const char *s0;
472     const char **map;
473 
474     if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
475         return 0;
476     s0 = *src;
477     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
478         isdigit(((const unsigned char *)s0)[1]))
479     {
480         *errors = s0[1] - '0';
481         s0 += 3;
482         if (*errors > 3)
483             *errors = 3;
484     }
485     while (*s0)
486     {
487         if (strchr("^\\()[].*+?|-", *s0))
488         {
489             wrbuf_putc(display_term, *s0);
490             wrbuf_putc(term_dict, *s0);
491             s0++;
492             i++;
493         }
494         else
495         {
496 	    const char *s1 = s0;
497 	    int q_map_match = 0;
498 	    map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
499             if (space_split && **map == *CHR_SPACE)
500                 break;
501 
502             i++;
503             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
504         }
505     }
506     *src = s0;
507 
508     return i;
509 }
510 
511 /* term_103: handle term, where trunc = re-1 (regular expressions) */
term_102(zebra_map_t zm,const char ** src,WRBUF term_dict,int space_split,WRBUF display_term)512 static int term_102(zebra_map_t zm, const char **src,
513 		    WRBUF term_dict, int space_split, WRBUF display_term)
514 {
515     return term_103(zm, src, term_dict, NULL, space_split, display_term);
516 }
517 
518 
519 /* term_104: handle term, process ?n * # */
term_104(zebra_map_t zm,const char ** src,WRBUF term_dict,int space_split,WRBUF display_term)520 static int term_104(zebra_map_t zm, const char **src,
521                     WRBUF term_dict, int space_split, WRBUF display_term)
522 {
523     const char *s0;
524     const char **map;
525     int i = 0;
526 
527     if (!term_pre(zm, src, "?*#", !space_split))
528         return 0;
529     s0 = *src;
530     while (*s0)
531     {
532         if (*s0 == '?')
533         {
534             i++;
535             wrbuf_putc(display_term, *s0);
536             s0++;
537             if (*s0 >= '0' && *s0 <= '9')
538             {
539                 int limit = 0;
540                 while (*s0 >= '0' && *s0 <= '9')
541                 {
542                     limit = limit * 10 + (*s0 - '0');
543                     wrbuf_putc(display_term, *s0);
544                     s0++;
545                 }
546                 if (limit > 20)
547                     limit = 20;
548                 while (--limit >= 0)
549                 {
550                     wrbuf_puts(term_dict, ".?");
551                 }
552             }
553             else
554             {
555                 wrbuf_puts(term_dict, ".*");
556             }
557         }
558         else if (*s0 == '*')
559         {
560             i++;
561             wrbuf_puts(term_dict, ".*");
562             wrbuf_putc(display_term, *s0);
563             s0++;
564         }
565         else if (*s0 == '#')
566         {
567             i++;
568             wrbuf_puts(term_dict, ".");
569             wrbuf_putc(display_term, *s0);
570             s0++;
571         }
572 	else
573         {
574 	    const char *s1 = s0;
575 	    int q_map_match = 0;
576 	    map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
577             if (space_split && **map == *CHR_SPACE)
578                 break;
579 
580             i++;
581             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
582         }
583     }
584     *src = s0;
585     return i;
586 }
587 
588 /* term_105/106: handle term, process * ! and possibly right_truncate */
term_105(zebra_map_t zm,const char ** src,WRBUF term_dict,int space_split,WRBUF display_term,int right_truncate)589 static int term_105(zebra_map_t zm, const char **src,
590                     WRBUF term_dict, int space_split,
591 		    WRBUF display_term, int right_truncate)
592 {
593     const char *s0;
594     const char **map;
595     int i = 0;
596 
597     if (!term_pre(zm, src, "\\*!", !space_split))
598         return 0;
599     s0 = *src;
600     while (*s0)
601     {
602         if (*s0 == '*')
603         {
604             i++;
605             wrbuf_puts(term_dict, ".*");
606             wrbuf_putc(display_term, *s0);
607             s0++;
608         }
609         else if (*s0 == '!')
610         {
611             i++;
612             wrbuf_putc(term_dict, '.');
613             wrbuf_putc(display_term, *s0);
614             s0++;
615         }
616         else if (*s0 == '\\')
617         {
618             i++;
619             wrbuf_puts(term_dict, "\\\\");
620             wrbuf_putc(display_term, *s0);
621             s0++;
622         }
623 	else
624         {
625 	    const char *s1 = s0;
626 	    int q_map_match = 0;
627 	    map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
628             if (space_split && **map == *CHR_SPACE)
629                 break;
630 
631             i++;
632             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
633         }
634     }
635     if (right_truncate)
636         wrbuf_puts(term_dict, ".*");
637     *src = s0;
638     return i;
639 }
640 
641 
642 /* gen_regular_rel - generate regular expression from relation
643  *  val:     border value (inclusive)
644  *  islt:    1 if <=; 0 if >=.
645  */
gen_regular_rel(WRBUF term_dict,int val,int islt)646 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
647 {
648     char dst_buf[20*5*20]; /* assuming enough for expansion */
649     char *dst = dst_buf;
650     int dst_p;
651     int w, d, i;
652     int pos = 0;
653     char numstr[20];
654 
655     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
656     if (val >= 0)
657     {
658         if (islt)
659             strcpy(dst, "(-[0-9]+|(");
660         else
661             strcpy(dst, "((");
662     }
663     else
664     {
665         if (!islt)
666         {
667             strcpy(dst, "([0-9]+|-(");
668             islt = 1;
669         }
670         else
671         {
672             strcpy(dst, "(-(");
673             islt = 0;
674         }
675         val = -val;
676     }
677     dst_p = strlen(dst);
678     sprintf(numstr, "%d", val);
679     for (w = strlen(numstr); --w >= 0; pos++)
680     {
681         d = numstr[w];
682         if (pos > 0)
683         {
684             if (islt)
685             {
686                 if (d == '0')
687                     continue;
688                 d--;
689             }
690             else
691             {
692                 if (d == '9')
693                     continue;
694                 d++;
695             }
696         }
697 
698         strcpy(dst + dst_p, numstr);
699         dst_p = strlen(dst) - pos - 1;
700 
701         if (islt)
702         {
703             if (d != '0')
704             {
705                 dst[dst_p++] = '[';
706                 dst[dst_p++] = '0';
707                 dst[dst_p++] = '-';
708                 dst[dst_p++] = d;
709                 dst[dst_p++] = ']';
710             }
711             else
712                 dst[dst_p++] = d;
713         }
714         else
715         {
716             if (d != '9')
717             {
718                 dst[dst_p++] = '[';
719                 dst[dst_p++] = d;
720                 dst[dst_p++] = '-';
721                 dst[dst_p++] = '9';
722                 dst[dst_p++] = ']';
723             }
724             else
725                 dst[dst_p++] = d;
726         }
727         for (i = 0; i < pos; i++)
728         {
729             dst[dst_p++] = '[';
730             dst[dst_p++] = '0';
731             dst[dst_p++] = '-';
732             dst[dst_p++] = '9';
733             dst[dst_p++] = ']';
734         }
735         dst[dst_p++] = '|';
736     }
737     dst[dst_p] = '\0';
738     if (islt)
739     {
740         /* match everything less than 10^(pos-1) */
741         strcat(dst, "0*");
742         for (i = 1; i < pos; i++)
743             strcat(dst, "[0-9]?");
744     }
745     else
746     {
747         /* match everything greater than 10^pos */
748         for (i = 0; i <= pos; i++)
749             strcat(dst, "[0-9]");
750         strcat(dst, "[0-9]*");
751     }
752     strcat(dst, "))");
753     wrbuf_puts(term_dict, dst);
754 }
755 
string_rel_add_char(WRBUF term_p,WRBUF wsrc,int * indx)756 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
757 {
758     const char *src = wrbuf_cstr(wsrc);
759     if (src[*indx] == '\\')
760     {
761         wrbuf_putc(term_p, src[*indx]);
762         (*indx)++;
763     }
764     wrbuf_putc(term_p, src[*indx]);
765     (*indx)++;
766 }
767 
768 /*
769  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
770  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
771  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
772  *              ([^-a].*|a[^-b].*|ab[c-].*)
773  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
774  *              ([^a-].*|a[^b-].*|ab[^c-].*)
775  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
776  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
777  */
string_relation(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char ** term_sub,WRBUF term_dict,const Odr_oid * attributeSet,zebra_map_t zm,int space_split,WRBUF display_term,int * error_code)778 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
779 			   const char **term_sub, WRBUF term_dict,
780 			   const Odr_oid *attributeSet,
781 			   zebra_map_t zm, int space_split,
782                            WRBUF display_term,
783 			   int *error_code)
784 {
785     AttrType relation;
786     int relation_value;
787     int i;
788     WRBUF term_component = wrbuf_alloc();
789 
790     attr_init_APT(&relation, zapt, 2);
791     relation_value = attr_find(&relation, NULL);
792 
793     *error_code = 0;
794     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
795     switch (relation_value)
796     {
797     case 1:
798         if (!term_100(zm, term_sub, term_component, space_split, display_term))
799         {
800             wrbuf_destroy(term_component);
801             return 0;
802         }
803         yaz_log(log_level_rpn, "Relation <");
804 
805         wrbuf_putc(term_dict, '(');
806         for (i = 0; i < wrbuf_len(term_component); )
807         {
808             int j = 0;
809 
810             if (i)
811                 wrbuf_putc(term_dict, '|');
812             while (j < i)
813                 string_rel_add_char(term_dict, term_component, &j);
814 
815             wrbuf_putc(term_dict, '[');
816 
817             wrbuf_putc(term_dict, '^');
818 
819             wrbuf_putc(term_dict, 1);
820             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
821 
822             string_rel_add_char(term_dict, term_component, &i);
823             wrbuf_putc(term_dict, '-');
824 
825             wrbuf_putc(term_dict, ']');
826             wrbuf_putc(term_dict, '.');
827             wrbuf_putc(term_dict, '*');
828         }
829         wrbuf_putc(term_dict, ')');
830         break;
831     case 2:
832         if (!term_100(zm, term_sub, term_component, space_split, display_term))
833         {
834             wrbuf_destroy(term_component);
835             return 0;
836         }
837         yaz_log(log_level_rpn, "Relation <=");
838 
839         wrbuf_putc(term_dict, '(');
840         for (i = 0; i < wrbuf_len(term_component); )
841         {
842             int j = 0;
843 
844             while (j < i)
845                 string_rel_add_char(term_dict, term_component, &j);
846             wrbuf_putc(term_dict, '[');
847 
848             wrbuf_putc(term_dict, '^');
849 
850             wrbuf_putc(term_dict, 1);
851             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
852 
853             string_rel_add_char(term_dict, term_component, &i);
854             wrbuf_putc(term_dict, '-');
855 
856             wrbuf_putc(term_dict, ']');
857             wrbuf_putc(term_dict, '.');
858             wrbuf_putc(term_dict, '*');
859 
860             wrbuf_putc(term_dict, '|');
861         }
862         for (i = 0; i < wrbuf_len(term_component); )
863             string_rel_add_char(term_dict, term_component, &i);
864         wrbuf_putc(term_dict, ')');
865         break;
866     case 5:
867         if (!term_100(zm, term_sub, term_component, space_split, display_term))
868         {
869             wrbuf_destroy(term_component);
870             return 0;
871         }
872         yaz_log(log_level_rpn, "Relation >");
873 
874         wrbuf_putc(term_dict, '(');
875         for (i = 0; i < wrbuf_len(term_component); )
876         {
877             int j = 0;
878 
879             while (j < i)
880                 string_rel_add_char(term_dict, term_component, &j);
881             wrbuf_putc(term_dict, '[');
882 
883             wrbuf_putc(term_dict, '^');
884             wrbuf_putc(term_dict, '-');
885             string_rel_add_char(term_dict, term_component, &i);
886 
887             wrbuf_putc(term_dict, ']');
888             wrbuf_putc(term_dict, '.');
889             wrbuf_putc(term_dict, '*');
890 
891             wrbuf_putc(term_dict, '|');
892         }
893         for (i = 0; i < wrbuf_len(term_component); )
894             string_rel_add_char(term_dict, term_component, &i);
895         wrbuf_putc(term_dict, '.');
896         wrbuf_putc(term_dict, '+');
897         wrbuf_putc(term_dict, ')');
898         break;
899     case 4:
900         if (!term_100(zm, term_sub, term_component, space_split, display_term))
901         {
902             wrbuf_destroy(term_component);
903             return 0;
904         }
905         yaz_log(log_level_rpn, "Relation >=");
906 
907         wrbuf_putc(term_dict, '(');
908         for (i = 0; i < wrbuf_len(term_component); )
909         {
910             int j = 0;
911 
912             if (i)
913                 wrbuf_putc(term_dict, '|');
914             while (j < i)
915                 string_rel_add_char(term_dict, term_component, &j);
916             wrbuf_putc(term_dict, '[');
917 
918             if (i < wrbuf_len(term_component)-1)
919             {
920                 wrbuf_putc(term_dict, '^');
921                 wrbuf_putc(term_dict, '-');
922                 string_rel_add_char(term_dict, term_component, &i);
923             }
924             else
925             {
926                 string_rel_add_char(term_dict, term_component, &i);
927                 wrbuf_putc(term_dict, '-');
928             }
929             wrbuf_putc(term_dict, ']');
930             wrbuf_putc(term_dict, '.');
931             wrbuf_putc(term_dict, '*');
932         }
933         wrbuf_putc(term_dict, ')');
934         break;
935     case 3:
936     case 102:
937     case -1:
938         if (!**term_sub)
939             return 1;
940         yaz_log(log_level_rpn, "Relation =");
941         if (!term_100(zm, term_sub, term_component, space_split, display_term))
942         {
943             wrbuf_destroy(term_component);
944             return 0;
945         }
946         wrbuf_puts(term_dict, "(");
947         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
948         wrbuf_puts(term_dict, ")");
949 	break;
950     case 103:
951         yaz_log(log_level_rpn, "Relation always matches");
952         /* skip to end of term (we don't care what it is) */
953         while (**term_sub != '\0')
954             (*term_sub)++;
955         break;
956     default:
957 	*error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
958         wrbuf_destroy(term_component);
959 	return 0;
960     }
961     wrbuf_destroy(term_component);
962     return 1;
963 }
964 
965 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
966 			     const char **term_sub,
967                              WRBUF term_dict,
968 			     const Odr_oid *attributeSet, NMEM stream,
969 			     struct grep_info *grep_info,
970 			     const char *index_type, int complete_flag,
971 			     WRBUF display_term,
972                              const char *xpath_use,
973 			     struct ord_list **ol,
974                              zebra_map_t zm);
975 
zebra_term_limits_APT(ZebraHandle zh,Z_AttributesPlusTerm * zapt,zint * hits_limit_value,const char ** term_ref_id_str,NMEM nmem)976 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
977                                 Z_AttributesPlusTerm *zapt,
978                                 zint *hits_limit_value,
979                                 const char **term_ref_id_str,
980                                 NMEM nmem)
981 {
982     AttrType term_ref_id_attr;
983     AttrType hits_limit_attr;
984     int term_ref_id_int;
985     zint hits_limit_from_attr;
986 
987     attr_init_APT(&hits_limit_attr, zapt, 11);
988     hits_limit_from_attr  = attr_find(&hits_limit_attr, NULL);
989 
990     attr_init_APT(&term_ref_id_attr, zapt, 10);
991     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
992     if (term_ref_id_int >= 0)
993     {
994 	char *res = nmem_malloc(nmem, 20);
995 	sprintf(res, "%d", term_ref_id_int);
996 	*term_ref_id_str = res;
997     }
998     if (hits_limit_from_attr != -1)
999         *hits_limit_value = hits_limit_from_attr;
1000 
1001     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
1002 	    *term_ref_id_str ? *term_ref_id_str : "none",
1003 	    *hits_limit_value);
1004     return ZEBRA_OK;
1005 }
1006 
1007 /** \brief search for term (which may be truncated)
1008  */
search_term(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char ** term_sub,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,struct grep_info * grep_info,const char * index_type,int complete_flag,const char * rank_type,const char * xpath_use,NMEM rset_nmem,RSET * rset,struct rset_key_control * kc,zebra_map_t zm)1009 static ZEBRA_RES search_term(ZebraHandle zh,
1010                              Z_AttributesPlusTerm *zapt,
1011                              const char **term_sub,
1012                              const Odr_oid *attributeSet,
1013                              zint hits_limit, NMEM stream,
1014                              struct grep_info *grep_info,
1015                              const char *index_type, int complete_flag,
1016                              const char *rank_type,
1017                              const char *xpath_use,
1018                              NMEM rset_nmem,
1019                              RSET *rset,
1020                              struct rset_key_control *kc,
1021                              zebra_map_t zm)
1022 {
1023     ZEBRA_RES res;
1024     struct ord_list *ol;
1025     zint hits_limit_value = hits_limit;
1026     const char *term_ref_id_str = 0;
1027     WRBUF term_dict = wrbuf_alloc();
1028     WRBUF display_term = wrbuf_alloc();
1029     *rset = 0;
1030     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1031                           stream);
1032     grep_info->isam_p_indx = 0;
1033     res = string_term(zh, zapt, term_sub, term_dict,
1034                       attributeSet, stream, grep_info,
1035 		      index_type, complete_flag,
1036 		      display_term, xpath_use, &ol, zm);
1037     wrbuf_destroy(term_dict);
1038     if (res == ZEBRA_OK && *term_sub)
1039     {
1040         yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
1041         *rset = rset_trunc(zh, grep_info->isam_p_buf,
1042                            grep_info->isam_p_indx, wrbuf_buf(display_term),
1043                            wrbuf_len(display_term), rank_type,
1044                            1 /* preserve pos */,
1045                            zapt->term->which, rset_nmem,
1046                            kc, kc->scope, ol, index_type, hits_limit_value,
1047                            term_ref_id_str);
1048         if (!*rset)
1049             res = ZEBRA_FAIL;
1050     }
1051     wrbuf_destroy(display_term);
1052     return res;
1053 }
1054 
string_term(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char ** term_sub,WRBUF term_dict,const Odr_oid * attributeSet,NMEM stream,struct grep_info * grep_info,const char * index_type,int complete_flag,WRBUF display_term,const char * xpath_use,struct ord_list ** ol,zebra_map_t zm)1055 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1056 			     const char **term_sub,
1057                              WRBUF term_dict,
1058 			     const Odr_oid *attributeSet, NMEM stream,
1059 			     struct grep_info *grep_info,
1060 			     const char *index_type, int complete_flag,
1061 			     WRBUF display_term,
1062                              const char *xpath_use,
1063 			     struct ord_list **ol,
1064                              zebra_map_t zm)
1065 {
1066     int r;
1067     AttrType truncation;
1068     int truncation_value;
1069     const char *termp;
1070     struct rpn_char_map_info rcmi;
1071 
1072     int space_split = complete_flag ? 0 : 1;
1073     int ord = -1;
1074     int regex_range = 0;
1075     int max_pos, prefix_len = 0;
1076     int relation_error;
1077     char ord_buf[32];
1078     int ord_len, i;
1079 
1080     *ol = ord_list_create(stream);
1081 
1082     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1083     attr_init_APT(&truncation, zapt, 5);
1084     truncation_value = attr_find(&truncation, NULL);
1085     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1086 
1087     termp = *term_sub; /* start of term for each database */
1088 
1089     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1090                           attributeSet, &ord) != ZEBRA_OK)
1091     {
1092         *term_sub = 0;
1093         return ZEBRA_FAIL;
1094     }
1095 
1096     wrbuf_rewind(term_dict); /* new dictionary regexp term */
1097 
1098     *ol = ord_list_append(stream, *ol, ord);
1099     ord_len = key_SU_encode(ord, ord_buf);
1100 
1101     wrbuf_putc(term_dict, '(');
1102 
1103     for (i = 0; i < ord_len; i++)
1104     {
1105         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1106         wrbuf_putc(term_dict, ord_buf[i]);
1107     }
1108     wrbuf_putc(term_dict, ')');
1109 
1110     prefix_len = wrbuf_len(term_dict);
1111 
1112     if (zebra_maps_is_icu(zm))
1113     {
1114         int relation_value;
1115         AttrType relation;
1116 
1117         attr_init_APT(&relation, zapt, 2);
1118         relation_value = attr_find(&relation, NULL);
1119         if (relation_value == 103) /* always matches */
1120             termp += strlen(termp); /* move to end of term */
1121         else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1122         {
1123             /* ICU case */
1124             switch (truncation_value)
1125             {
1126             case -1:         /* not specified */
1127             case 100:        /* do not truncate */
1128                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1129                 {
1130                     *term_sub = 0;
1131                     return ZEBRA_OK;
1132                 }
1133                 break;
1134             case 102:
1135                 if (!term_102_icu(zm, &termp, term_dict, space_split, display_term))
1136                 {
1137                     *term_sub = 0;
1138                     return ZEBRA_OK;
1139                 }
1140                 break;
1141             case 1:          /* right truncation */
1142                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1143                 {
1144                     *term_sub = 0;
1145                     return ZEBRA_OK;
1146                 }
1147                 break;
1148             case 2:
1149                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 2))
1150                 {
1151                     *term_sub = 0;
1152                     return ZEBRA_OK;
1153                 }
1154                 break;
1155             case 3:
1156                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 3))
1157                 {
1158                     *term_sub = 0;
1159                     return ZEBRA_OK;
1160                 }
1161                 break;
1162             default:
1163                 zebra_setError_zint(zh,
1164                                     YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1165                                     truncation_value);
1166                 return ZEBRA_FAIL;
1167             }
1168         }
1169         else
1170         {
1171             zebra_setError_zint(zh,
1172                                 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1173                                 relation_value);
1174             return ZEBRA_FAIL;
1175         }
1176     }
1177     else
1178     {
1179         /* non-ICU case. using string.chr and friends */
1180         switch (truncation_value)
1181         {
1182         case -1:         /* not specified */
1183         case 100:        /* do not truncate */
1184             if (!string_relation(zh, zapt, &termp, term_dict,
1185                                  attributeSet,
1186                                  zm, space_split, display_term,
1187                                  &relation_error))
1188             {
1189                 if (relation_error)
1190                 {
1191                     zebra_setError(zh, relation_error, 0);
1192                     return ZEBRA_FAIL;
1193                 }
1194                 *term_sub = 0;
1195                 return ZEBRA_OK;
1196             }
1197             break;
1198         case 1:          /* right truncation */
1199             wrbuf_putc(term_dict, '(');
1200             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1201             {
1202                 *term_sub = 0;
1203                 return ZEBRA_OK;
1204             }
1205             wrbuf_puts(term_dict, ".*)");
1206             break;
1207         case 2:          /* left truncation */
1208             wrbuf_puts(term_dict, "(.*");
1209             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1210             {
1211                 *term_sub = 0;
1212                 return ZEBRA_OK;
1213             }
1214             wrbuf_putc(term_dict, ')');
1215             break;
1216         case 3:          /* left&right truncation */
1217             wrbuf_puts(term_dict, "(.*");
1218             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1219             {
1220                 *term_sub = 0;
1221                 return ZEBRA_OK;
1222             }
1223             wrbuf_puts(term_dict, ".*)");
1224             break;
1225         case 101:        /* process # in term */
1226             wrbuf_putc(term_dict, '(');
1227             if (!term_101(zm, &termp, term_dict, space_split, display_term))
1228             {
1229                 *term_sub = 0;
1230                 return ZEBRA_OK;
1231             }
1232             wrbuf_puts(term_dict, ")");
1233             break;
1234         case 102:        /* Regexp-1 */
1235             wrbuf_putc(term_dict, '(');
1236             if (!term_102(zm, &termp, term_dict, space_split, display_term))
1237             {
1238                 *term_sub = 0;
1239                 return ZEBRA_OK;
1240             }
1241             wrbuf_putc(term_dict, ')');
1242             break;
1243         case 103:       /* Regexp-2 */
1244             regex_range = 1;
1245             wrbuf_putc(term_dict, '(');
1246             if (!term_103(zm, &termp, term_dict, &regex_range,
1247                           space_split, display_term))
1248             {
1249                 *term_sub = 0;
1250                 return ZEBRA_OK;
1251             }
1252             wrbuf_putc(term_dict, ')');
1253             break;
1254         case 104:        /* process ?n * # term */
1255             wrbuf_putc(term_dict, '(');
1256             if (!term_104(zm, &termp, term_dict, space_split, display_term))
1257             {
1258                 *term_sub = 0;
1259                 return ZEBRA_OK;
1260             }
1261             wrbuf_putc(term_dict, ')');
1262             break;
1263         case 105:        /* process * ! in term and right truncate */
1264             wrbuf_putc(term_dict, '(');
1265             if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1266             {
1267                 *term_sub = 0;
1268                 return ZEBRA_OK;
1269             }
1270             wrbuf_putc(term_dict, ')');
1271             break;
1272         case 106:        /* process * ! in term */
1273             wrbuf_putc(term_dict, '(');
1274             if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1275             {
1276                 *term_sub = 0;
1277                 return ZEBRA_OK;
1278             }
1279             wrbuf_putc(term_dict, ')');
1280             break;
1281         default:
1282             zebra_setError_zint(zh,
1283                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1284                                 truncation_value);
1285             return ZEBRA_FAIL;
1286         }
1287     }
1288     if (1)
1289     {
1290         char buf[1000];
1291         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1292         esc_str(buf, sizeof(buf), input, strlen(input));
1293     }
1294     {
1295         WRBUF pr_wr = wrbuf_alloc();
1296 
1297         wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1298         yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1299         wrbuf_destroy(pr_wr);
1300     }
1301     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1302                          grep_info, &max_pos,
1303                          ord_len /* number of "exact" chars */,
1304                          grep_handle);
1305     if (r == 1)
1306         zebra_set_partial_result(zh);
1307     else if (r)
1308         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1309     *term_sub = termp;
1310     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1311     return ZEBRA_OK;
1312 }
1313 
1314 
1315 
grep_info_delete(struct grep_info * grep_info)1316 static void grep_info_delete(struct grep_info *grep_info)
1317 {
1318 #ifdef TERM_COUNT
1319     xfree(grep_info->term_no);
1320 #endif
1321     xfree(grep_info->isam_p_buf);
1322 }
1323 
grep_info_prepare(ZebraHandle zh,Z_AttributesPlusTerm * zapt,struct grep_info * grep_info,const char * index_type)1324 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1325 				   Z_AttributesPlusTerm *zapt,
1326 				   struct grep_info *grep_info,
1327 				   const char *index_type)
1328 {
1329 #ifdef TERM_COUNT
1330     grep_info->term_no = 0;
1331 #endif
1332     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1333     grep_info->isam_p_size = 0;
1334     grep_info->isam_p_buf = NULL;
1335     grep_info->zh = zh;
1336     grep_info->index_type = index_type;
1337     grep_info->termset = 0;
1338     if (zapt)
1339     {
1340         AttrType truncmax;
1341         int truncmax_value;
1342 
1343         attr_init_APT(&truncmax, zapt, 13);
1344         truncmax_value = attr_find(&truncmax, NULL);
1345         if (truncmax_value != -1)
1346             grep_info->trunc_max = truncmax_value;
1347     }
1348     if (zapt)
1349     {
1350         AttrType termset;
1351         int termset_value_numeric;
1352         const char *termset_value_string;
1353 
1354         attr_init_APT(&termset, zapt, 8);
1355         termset_value_numeric =
1356             attr_find_ex(&termset, NULL, &termset_value_string);
1357         if (termset_value_numeric != -1)
1358         {
1359 #if TERMSET_DISABLE
1360             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1361             return ZEBRA_FAIL;
1362 #else
1363             char resname[32];
1364             const char *termset_name = 0;
1365             if (termset_value_numeric != -2)
1366             {
1367 
1368                 sprintf(resname, "%d", termset_value_numeric);
1369                 termset_name = resname;
1370             }
1371             else
1372                 termset_name = termset_value_string;
1373             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1374             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1375             if (!grep_info->termset)
1376             {
1377                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1378                 return ZEBRA_FAIL;
1379             }
1380 #endif
1381         }
1382     }
1383     return ZEBRA_OK;
1384 }
1385 
search_terms_chrmap(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char * termz,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,const char * index_type,int complete_flag,const char * rank_type,const char * xpath_use,NMEM rset_nmem,RSET ** result_sets,int * num_result_sets,struct rset_key_control * kc,zebra_map_t zm)1386 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1387                                      Z_AttributesPlusTerm *zapt,
1388                                      const char *termz,
1389                                      const Odr_oid *attributeSet,
1390                                      zint hits_limit,
1391                                      NMEM stream,
1392                                      const char *index_type, int complete_flag,
1393                                      const char *rank_type,
1394                                      const char *xpath_use,
1395                                      NMEM rset_nmem,
1396                                      RSET **result_sets, int *num_result_sets,
1397                                      struct rset_key_control *kc,
1398                                      zebra_map_t zm)
1399 {
1400     struct grep_info grep_info;
1401     const char *termp = termz;
1402     int alloc_sets = 0;
1403 
1404     *num_result_sets = 0;
1405     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1406         return ZEBRA_FAIL;
1407     while (1)
1408     {
1409 	ZEBRA_RES res;
1410 
1411 	if (alloc_sets == *num_result_sets)
1412 	{
1413 	    int add = 10;
1414 	    RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1415 					      sizeof(*rnew));
1416 	    if (alloc_sets)
1417 		memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1418 	    alloc_sets = alloc_sets + add;
1419 	    *result_sets = rnew;
1420 	}
1421         res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1422                           stream, &grep_info,
1423                           index_type, complete_flag,
1424                           rank_type,
1425                           xpath_use, rset_nmem,
1426                           &(*result_sets)[*num_result_sets],
1427                           kc, zm);
1428 	if (res != ZEBRA_OK)
1429 	{
1430 	    int i;
1431 	    for (i = 0; i < *num_result_sets; i++)
1432 		rset_delete((*result_sets)[i]);
1433 	    grep_info_delete(&grep_info);
1434 	    return res;
1435 	}
1436 	if ((*result_sets)[*num_result_sets] == 0)
1437 	    break;
1438 	(*num_result_sets)++;
1439 
1440         if (!*termp)
1441             break;
1442     }
1443     grep_info_delete(&grep_info);
1444     return ZEBRA_OK;
1445 }
1446 
1447 /**
1448    \brief Create result set(s) for list of terms
1449    \param zh Zebra Handle
1450    \param zapt Attributes Plust Term (RPN leaf)
1451    \param termz term as used in query but converted to UTF-8
1452    \param attributeSet default attribute set
1453    \param stream memory for result
1454    \param index_type register type ("w", "p",..)
1455    \param complete_flag whether it's phrases or not
1456    \param rank_type term flags for ranking
1457    \param xpath_use use attribute for X-Path (-1 for no X-path)
1458    \param rset_nmem memory for result sets
1459    \param result_sets output result set for each term in list (output)
1460    \param num_result_sets number of output result sets
1461    \param kc rset key control to be used for created result sets
1462 */
search_terms_list(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char * termz,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,const char * index_type,int complete_flag,const char * rank_type,const char * xpath_use,NMEM rset_nmem,RSET ** result_sets,int * num_result_sets,struct rset_key_control * kc)1463 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1464                                    Z_AttributesPlusTerm *zapt,
1465                                    const char *termz,
1466                                    const Odr_oid *attributeSet,
1467                                    zint hits_limit,
1468                                    NMEM stream,
1469                                    const char *index_type, int complete_flag,
1470                                    const char *rank_type,
1471                                    const char *xpath_use,
1472                                    NMEM rset_nmem,
1473                                    RSET **result_sets, int *num_result_sets,
1474                                    struct rset_key_control *kc)
1475 {
1476     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1477     return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1478                                stream, index_type, complete_flag,
1479                                rank_type, xpath_use,
1480                                rset_nmem, result_sets, num_result_sets,
1481                                kc, zm);
1482 }
1483 
1484 
1485 /** \brief limit a search by position - returns result set
1486  */
search_position(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const Odr_oid * attributeSet,const char * index_type,NMEM rset_nmem,RSET * rset,struct rset_key_control * kc)1487 static ZEBRA_RES search_position(ZebraHandle zh,
1488                                  Z_AttributesPlusTerm *zapt,
1489                                  const Odr_oid *attributeSet,
1490                                  const char *index_type,
1491                                  NMEM rset_nmem,
1492                                  RSET *rset,
1493                                  struct rset_key_control *kc)
1494 {
1495     int position_value;
1496     AttrType position;
1497     int ord = -1;
1498     char ord_buf[32];
1499     char term_dict[100];
1500     int ord_len;
1501     char *val;
1502     ISAM_P isam_p;
1503     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1504 
1505     attr_init_APT(&position, zapt, 3);
1506     position_value = attr_find(&position, NULL);
1507     switch(position_value)
1508     {
1509     case 3:
1510     case -1:
1511         return ZEBRA_OK;
1512     case 1:
1513     case 2:
1514         break;
1515     default:
1516         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1517                             position_value);
1518         return ZEBRA_FAIL;
1519     }
1520 
1521 
1522     if (!zebra_maps_is_first_in_field(zm))
1523     {
1524         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1525                             position_value);
1526         return ZEBRA_FAIL;
1527     }
1528 
1529     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1530                           attributeSet, &ord) != ZEBRA_OK)
1531     {
1532         return ZEBRA_FAIL;
1533     }
1534     ord_len = key_SU_encode(ord, ord_buf);
1535     memcpy(term_dict, ord_buf, ord_len);
1536     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1537     val = dict_lookup(zh->reg->dict, term_dict);
1538     if (val)
1539     {
1540         assert(*val == sizeof(ISAM_P));
1541         memcpy(&isam_p, val+1, sizeof(isam_p));
1542 
1543         *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1544                                        isam_p, 0);
1545     }
1546     return ZEBRA_OK;
1547 }
1548 
1549 /** \brief returns result set for phrase search
1550  */
rpn_search_APT_phrase(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char * termz_org,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,const char * index_type,int complete_flag,const char * rank_type,const char * xpath_use,NMEM rset_nmem,RSET * rset,struct rset_key_control * kc)1551 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1552 				       Z_AttributesPlusTerm *zapt,
1553 				       const char *termz_org,
1554 				       const Odr_oid *attributeSet,
1555                                        zint hits_limit,
1556 				       NMEM stream,
1557 				       const char *index_type,
1558                                        int complete_flag,
1559 				       const char *rank_type,
1560                                        const char *xpath_use,
1561 				       NMEM rset_nmem,
1562 				       RSET *rset,
1563 				       struct rset_key_control *kc)
1564 {
1565     RSET *result_sets = 0;
1566     int num_result_sets = 0;
1567     ZEBRA_RES res =
1568 	search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1569                           stream, index_type, complete_flag,
1570                           rank_type, xpath_use,
1571                           rset_nmem,
1572                           &result_sets, &num_result_sets, kc);
1573 
1574     if (res != ZEBRA_OK)
1575 	return res;
1576 
1577     if (num_result_sets > 0)
1578     {
1579         RSET first_set = 0;
1580         res = search_position(zh, zapt, attributeSet,
1581                               index_type,
1582                               rset_nmem, &first_set,
1583                               kc);
1584         if (res != ZEBRA_OK)
1585         {
1586             int i;
1587             for (i = 0; i < num_result_sets; i++)
1588                 rset_delete(result_sets[i]);
1589             return res;
1590         }
1591         if (first_set)
1592         {
1593             RSET *nsets = nmem_malloc(stream,
1594                                       sizeof(RSET) * (num_result_sets+1));
1595             nsets[0] = first_set;
1596             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1597             result_sets = nsets;
1598             num_result_sets++;
1599         }
1600     }
1601     if (num_result_sets == 0)
1602 	*rset = rset_create_null(rset_nmem, kc, 0);
1603     else if (num_result_sets == 1)
1604 	*rset = result_sets[0];
1605     else
1606 	*rset = rset_create_prox(rset_nmem, kc, kc->scope,
1607                                  num_result_sets, result_sets,
1608                                  1 /* ordered */, 0 /* exclusion */,
1609                                  3 /* relation */, 1 /* distance */);
1610     if (!*rset)
1611 	return ZEBRA_FAIL;
1612     return ZEBRA_OK;
1613 }
1614 
1615 /** \brief returns result set for or-list search
1616  */
rpn_search_APT_or_list(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char * termz_org,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,const char * index_type,int complete_flag,const char * rank_type,const char * xpath_use,NMEM rset_nmem,RSET * rset,struct rset_key_control * kc)1617 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1618 					Z_AttributesPlusTerm *zapt,
1619 					const char *termz_org,
1620 					const Odr_oid *attributeSet,
1621                                         zint hits_limit,
1622 					NMEM stream,
1623 					const char *index_type,
1624                                         int complete_flag,
1625 					const char *rank_type,
1626                                         const char *xpath_use,
1627 					NMEM rset_nmem,
1628 					RSET *rset,
1629 					struct rset_key_control *kc)
1630 {
1631     RSET *result_sets = 0;
1632     int num_result_sets = 0;
1633     int i;
1634     ZEBRA_RES res =
1635 	search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1636                           stream, index_type, complete_flag,
1637                           rank_type, xpath_use,
1638                           rset_nmem,
1639                           &result_sets, &num_result_sets, kc);
1640     if (res != ZEBRA_OK)
1641 	return res;
1642 
1643     for (i = 0; i < num_result_sets; i++)
1644     {
1645         RSET first_set = 0;
1646         res = search_position(zh, zapt, attributeSet,
1647                               index_type,
1648                               rset_nmem, &first_set,
1649                               kc);
1650         if (res != ZEBRA_OK)
1651         {
1652             for (i = 0; i < num_result_sets; i++)
1653                 rset_delete(result_sets[i]);
1654             return res;
1655         }
1656 
1657         if (first_set)
1658         {
1659             RSET tmp_set[2];
1660 
1661             tmp_set[0] = first_set;
1662             tmp_set[1] = result_sets[i];
1663 
1664             result_sets[i] = rset_create_prox(
1665                 rset_nmem, kc, kc->scope,
1666                 2, tmp_set,
1667                 1 /* ordered */, 0 /* exclusion */,
1668                 3 /* relation */, 1 /* distance */);
1669         }
1670     }
1671     if (num_result_sets == 0)
1672 	*rset = rset_create_null(rset_nmem, kc, 0);
1673     else if (num_result_sets == 1)
1674 	*rset = result_sets[0];
1675     else
1676 	*rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1677                                num_result_sets, result_sets);
1678     if (!*rset)
1679 	return ZEBRA_FAIL;
1680     return ZEBRA_OK;
1681 }
1682 
1683 /** \brief returns result set for and-list search
1684  */
rpn_search_APT_and_list(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char * termz_org,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,const char * index_type,int complete_flag,const char * rank_type,const char * xpath_use,NMEM rset_nmem,RSET * rset,struct rset_key_control * kc)1685 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1686 					 Z_AttributesPlusTerm *zapt,
1687 					 const char *termz_org,
1688 					 const Odr_oid *attributeSet,
1689                                          zint hits_limit,
1690 					 NMEM stream,
1691 					 const char *index_type,
1692                                          int complete_flag,
1693 					 const char *rank_type,
1694                                          const char *xpath_use,
1695 					 NMEM rset_nmem,
1696 					 RSET *rset,
1697 					 struct rset_key_control *kc)
1698 {
1699     RSET *result_sets = 0;
1700     int num_result_sets = 0;
1701     int i;
1702     ZEBRA_RES res =
1703 	search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1704                           stream, index_type, complete_flag,
1705                           rank_type, xpath_use,
1706                           rset_nmem,
1707                           &result_sets, &num_result_sets,
1708                           kc);
1709     if (res != ZEBRA_OK)
1710 	return res;
1711     for (i = 0; i < num_result_sets; i++)
1712     {
1713         RSET first_set = 0;
1714         res = search_position(zh, zapt, attributeSet,
1715                               index_type,
1716                               rset_nmem, &first_set,
1717                               kc);
1718         if (res != ZEBRA_OK)
1719         {
1720             for (i = 0; i < num_result_sets; i++)
1721                 rset_delete(result_sets[i]);
1722             return res;
1723         }
1724 
1725         if (first_set)
1726         {
1727             RSET tmp_set[2];
1728 
1729             tmp_set[0] = first_set;
1730             tmp_set[1] = result_sets[i];
1731 
1732             result_sets[i] = rset_create_prox(
1733                 rset_nmem, kc, kc->scope,
1734                 2, tmp_set,
1735                 1 /* ordered */, 0 /* exclusion */,
1736                 3 /* relation */, 1 /* distance */);
1737         }
1738     }
1739 
1740 
1741     if (num_result_sets == 0)
1742 	*rset = rset_create_null(rset_nmem, kc, 0);
1743     else if (num_result_sets == 1)
1744 	*rset = result_sets[0];
1745     else
1746 	*rset = rset_create_and(rset_nmem, kc, kc->scope,
1747                                 num_result_sets, result_sets);
1748     if (!*rset)
1749 	return ZEBRA_FAIL;
1750     return ZEBRA_OK;
1751 }
1752 
numeric_relation(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char ** term_sub,WRBUF term_dict,const Odr_oid * attributeSet,struct grep_info * grep_info,int * max_pos,zebra_map_t zm,WRBUF display_term,int * error_code)1753 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1754 			    const char **term_sub,
1755 			    WRBUF term_dict,
1756 			    const Odr_oid *attributeSet,
1757 			    struct grep_info *grep_info,
1758 			    int *max_pos,
1759 			    zebra_map_t zm,
1760 			    WRBUF display_term,
1761 			    int *error_code)
1762 {
1763     AttrType relation;
1764     int relation_value;
1765     int term_value;
1766     int r;
1767     WRBUF term_num = wrbuf_alloc();
1768 
1769     *error_code = 0;
1770     attr_init_APT(&relation, zapt, 2);
1771     relation_value = attr_find(&relation, NULL);
1772 
1773     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1774 
1775     switch (relation_value)
1776     {
1777     case 1:
1778         yaz_log(log_level_rpn, "Relation <");
1779         if (!term_100(zm, term_sub, term_num, 1, display_term))
1780         {
1781             wrbuf_destroy(term_num);
1782             return 0;
1783         }
1784         term_value = atoi(wrbuf_cstr(term_num));
1785         gen_regular_rel(term_dict, term_value-1, 1);
1786         break;
1787     case 2:
1788         yaz_log(log_level_rpn, "Relation <=");
1789         if (!term_100(zm, term_sub, term_num, 1, display_term))
1790         {
1791             wrbuf_destroy(term_num);
1792             return 0;
1793         }
1794         term_value = atoi(wrbuf_cstr(term_num));
1795         gen_regular_rel(term_dict, term_value, 1);
1796         break;
1797     case 4:
1798         yaz_log(log_level_rpn, "Relation >=");
1799         if (!term_100(zm, term_sub, term_num, 1, display_term))
1800         {
1801             wrbuf_destroy(term_num);
1802             return 0;
1803         }
1804         term_value = atoi(wrbuf_cstr(term_num));
1805         gen_regular_rel(term_dict, term_value, 0);
1806         break;
1807     case 5:
1808         yaz_log(log_level_rpn, "Relation >");
1809         if (!term_100(zm, term_sub, term_num, 1, display_term))
1810         {
1811             wrbuf_destroy(term_num);
1812             return 0;
1813         }
1814         term_value = atoi(wrbuf_cstr(term_num));
1815         gen_regular_rel(term_dict, term_value+1, 0);
1816         break;
1817     case -1:
1818     case 3:
1819         yaz_log(log_level_rpn, "Relation =");
1820         if (!term_100(zm, term_sub, term_num, 1, display_term))
1821         {
1822             wrbuf_destroy(term_num);
1823             return 0;
1824         }
1825         term_value = atoi(wrbuf_cstr(term_num));
1826         wrbuf_printf(term_dict, "(0*%d)", term_value);
1827 	break;
1828     case 103:
1829         /* term_tmp untouched.. */
1830         while (**term_sub != '\0')
1831             (*term_sub)++;
1832         break;
1833     default:
1834 	*error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1835 	wrbuf_destroy(term_num);
1836         return 0;
1837     }
1838     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1839                          0, grep_info, max_pos, 0, grep_handle);
1840 
1841     if (r == 1)
1842         zebra_set_partial_result(zh);
1843     else if (r)
1844         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1845     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1846     wrbuf_destroy(term_num);
1847     return 1;
1848 }
1849 
numeric_term(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char ** term_sub,WRBUF term_dict,const Odr_oid * attributeSet,NMEM stream,struct grep_info * grep_info,const char * index_type,int complete_flag,WRBUF display_term,const char * xpath_use,struct ord_list ** ol)1850 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1851 			      const char **term_sub,
1852                               WRBUF term_dict,
1853 			      const Odr_oid *attributeSet, NMEM stream,
1854 			      struct grep_info *grep_info,
1855 			      const char *index_type, int complete_flag,
1856 			      WRBUF display_term,
1857                               const char *xpath_use,
1858                               struct ord_list **ol)
1859 {
1860     const char *termp;
1861     struct rpn_char_map_info rcmi;
1862     int max_pos;
1863     int relation_error = 0;
1864     int ord, ord_len, i;
1865     char ord_buf[32];
1866     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1867 
1868     *ol = ord_list_create(stream);
1869 
1870     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1871 
1872     termp = *term_sub;
1873 
1874     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1875                           attributeSet, &ord) != ZEBRA_OK)
1876     {
1877         return ZEBRA_FAIL;
1878     }
1879 
1880     wrbuf_rewind(term_dict);
1881 
1882     *ol = ord_list_append(stream, *ol, ord);
1883 
1884     ord_len = key_SU_encode(ord, ord_buf);
1885 
1886     wrbuf_putc(term_dict, '(');
1887     for (i = 0; i < ord_len; i++)
1888     {
1889         wrbuf_putc(term_dict, 1);
1890         wrbuf_putc(term_dict, ord_buf[i]);
1891     }
1892     wrbuf_putc(term_dict, ')');
1893 
1894     if (!numeric_relation(zh, zapt, &termp, term_dict,
1895                           attributeSet, grep_info, &max_pos, zm,
1896                           display_term, &relation_error))
1897     {
1898         if (relation_error)
1899         {
1900             zebra_setError(zh, relation_error, 0);
1901             return ZEBRA_FAIL;
1902         }
1903         *term_sub = 0;
1904         return ZEBRA_OK;
1905     }
1906     *term_sub = termp;
1907     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1908     return ZEBRA_OK;
1909 }
1910 
1911 
rpn_search_APT_numeric(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char * termz,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,const char * index_type,int complete_flag,const char * rank_type,const char * xpath_use,NMEM rset_nmem,RSET * rset,struct rset_key_control * kc)1912 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1913 					Z_AttributesPlusTerm *zapt,
1914 					const char *termz,
1915 					const Odr_oid *attributeSet,
1916                                         zint hits_limit,
1917 					NMEM stream,
1918 					const char *index_type,
1919                                         int complete_flag,
1920 					const char *rank_type,
1921                                         const char *xpath_use,
1922 					NMEM rset_nmem,
1923 					RSET *rset,
1924 					struct rset_key_control *kc)
1925 {
1926     const char *termp = termz;
1927     RSET *result_sets = 0;
1928     int num_result_sets = 0;
1929     ZEBRA_RES res;
1930     struct grep_info grep_info;
1931     int alloc_sets = 0;
1932     zint hits_limit_value = hits_limit;
1933     const char *term_ref_id_str = 0;
1934 
1935     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1936                           stream);
1937 
1938     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1939     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1940         return ZEBRA_FAIL;
1941     while (1)
1942     {
1943         struct ord_list *ol;
1944         WRBUF term_dict = wrbuf_alloc();
1945         WRBUF display_term = wrbuf_alloc();
1946 	if (alloc_sets == num_result_sets)
1947 	{
1948 	    int add = 10;
1949 	    RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1950 					      sizeof(*rnew));
1951 	    if (alloc_sets)
1952 		memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1953 	    alloc_sets = alloc_sets + add;
1954 	    result_sets = rnew;
1955 	}
1956         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1957         grep_info.isam_p_indx = 0;
1958         res = numeric_term(zh, zapt, &termp, term_dict,
1959                            attributeSet, stream, &grep_info,
1960 			   index_type, complete_flag,
1961 			   display_term, xpath_use, &ol);
1962         wrbuf_destroy(term_dict);
1963 	if (res == ZEBRA_FAIL || termp == 0)
1964         {
1965             wrbuf_destroy(display_term);
1966 	    break;
1967         }
1968         yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1969         result_sets[num_result_sets] =
1970 	    rset_trunc(zh, grep_info.isam_p_buf,
1971 		       grep_info.isam_p_indx, wrbuf_buf(display_term),
1972 		       wrbuf_len(display_term), rank_type,
1973 		       0 /* preserve position */,
1974 		       zapt->term->which, rset_nmem,
1975 		       kc, kc->scope, ol, index_type,
1976 		       hits_limit_value,
1977 		       term_ref_id_str);
1978         wrbuf_destroy(display_term);
1979 	if (!result_sets[num_result_sets])
1980 	    break;
1981 	num_result_sets++;
1982         if (!*termp)
1983             break;
1984     }
1985     grep_info_delete(&grep_info);
1986 
1987     if (res != ZEBRA_OK)
1988         return res;
1989     if (num_result_sets == 0)
1990         *rset = rset_create_null(rset_nmem, kc, 0);
1991     else if (num_result_sets == 1)
1992         *rset = result_sets[0];
1993     else
1994         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1995                                 num_result_sets, result_sets);
1996     if (!*rset)
1997         return ZEBRA_FAIL;
1998     return ZEBRA_OK;
1999 }
2000 
rpn_search_APT_local(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const char * termz,const Odr_oid * attributeSet,NMEM stream,const char * rank_type,NMEM rset_nmem,RSET * rset,struct rset_key_control * kc)2001 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
2002 				      Z_AttributesPlusTerm *zapt,
2003 				      const char *termz,
2004 				      const Odr_oid *attributeSet,
2005 				      NMEM stream,
2006 				      const char *rank_type, NMEM rset_nmem,
2007 				      RSET *rset,
2008 				      struct rset_key_control *kc)
2009 {
2010     Record rec;
2011     zint sysno = atozint(termz);
2012 
2013     if (sysno <= 0)
2014         sysno = 0;
2015     rec = rec_get(zh->reg->records, sysno);
2016     if (!rec)
2017         sysno = 0;
2018 
2019     rec_free(&rec);
2020 
2021     if (sysno <= 0)
2022     {
2023         *rset = rset_create_null(rset_nmem, kc, 0);
2024     }
2025     else
2026     {
2027         RSFD rsfd;
2028         struct it_key key;
2029         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
2030                                  res_get(zh->res, "setTmpDir"), 0);
2031         rsfd = rset_open(*rset, RSETF_WRITE);
2032 
2033         key.mem[0] = sysno;
2034         key.mem[1] = 1;
2035         key.len = 2;
2036         rset_write(rsfd, &key);
2037         rset_close(rsfd);
2038     }
2039     return ZEBRA_OK;
2040 }
2041 
rpn_sort_spec(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const Odr_oid * attributeSet,NMEM stream,Z_SortKeySpecList * sort_sequence,const char * rank_type,NMEM rset_nmem,RSET * rset,struct rset_key_control * kc)2042 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2043 			       const Odr_oid *attributeSet, NMEM stream,
2044 			       Z_SortKeySpecList *sort_sequence,
2045 			       const char *rank_type,
2046 			       NMEM rset_nmem,
2047 			       RSET *rset,
2048 			       struct rset_key_control *kc)
2049 {
2050     int i;
2051     int sort_relation_value;
2052     AttrType sort_relation_type;
2053     Z_SortKeySpec *sks;
2054     Z_SortKey *sk;
2055     char termz[20];
2056 
2057     attr_init_APT(&sort_relation_type, zapt, 7);
2058     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2059 
2060     if (!sort_sequence->specs)
2061     {
2062         sort_sequence->num_specs = 10;
2063         sort_sequence->specs = (Z_SortKeySpec **)
2064             nmem_malloc(stream, sort_sequence->num_specs *
2065                         sizeof(*sort_sequence->specs));
2066         for (i = 0; i < sort_sequence->num_specs; i++)
2067             sort_sequence->specs[i] = 0;
2068     }
2069     if (zapt->term->which != Z_Term_general)
2070         i = 0;
2071     else
2072         i = atoi_n((char *) zapt->term->u.general->buf,
2073                    zapt->term->u.general->len);
2074     if (i >= sort_sequence->num_specs)
2075         i = 0;
2076     sprintf(termz, "%d", i);
2077 
2078     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2079     sks->sortElement = (Z_SortElement *)
2080         nmem_malloc(stream, sizeof(*sks->sortElement));
2081     sks->sortElement->which = Z_SortElement_generic;
2082     sk = sks->sortElement->u.generic = (Z_SortKey *)
2083         nmem_malloc(stream, sizeof(*sk));
2084     sk->which = Z_SortKey_sortAttributes;
2085     sk->u.sortAttributes = (Z_SortAttributes *)
2086         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2087 
2088     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
2089     sk->u.sortAttributes->list = zapt->attributes;
2090 
2091     sks->sortRelation = (Odr_int *)
2092         nmem_malloc(stream, sizeof(*sks->sortRelation));
2093     if (sort_relation_value == 1)
2094         *sks->sortRelation = Z_SortKeySpec_ascending;
2095     else if (sort_relation_value == 2)
2096         *sks->sortRelation = Z_SortKeySpec_descending;
2097     else
2098         *sks->sortRelation = Z_SortKeySpec_ascending;
2099 
2100     sks->caseSensitivity = (Odr_int *)
2101         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2102     *sks->caseSensitivity = 0;
2103 
2104     sks->which = Z_SortKeySpec_null;
2105     sks->u.null = odr_nullval ();
2106     sort_sequence->specs[i] = sks;
2107     *rset = rset_create_null(rset_nmem, kc, 0);
2108     return ZEBRA_OK;
2109 }
2110 
2111 
rpn_check_xpath(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const Odr_oid * attributeSet,struct xpath_location_step * xpath,int max,NMEM mem)2112 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2113                            const Odr_oid *attributeSet,
2114                            struct xpath_location_step *xpath, int max,
2115                            NMEM mem)
2116 {
2117     const Odr_oid *curAttributeSet = attributeSet;
2118     AttrType use;
2119     const char *use_string = 0;
2120 
2121     attr_init_APT(&use, zapt, 1);
2122     attr_find_ex(&use, &curAttributeSet, &use_string);
2123 
2124     if (!use_string || *use_string != '/')
2125         return -1;
2126 
2127     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2128 }
2129 
2130 
2131 
xpath_trunc(ZebraHandle zh,NMEM stream,const char * index_type,const char * term,const char * xpath_use,NMEM rset_nmem,struct rset_key_control * kc)2132 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2133                         const char *index_type, const char *term,
2134                         const char *xpath_use,
2135                         NMEM rset_nmem,
2136 			struct rset_key_control *kc)
2137 {
2138     struct grep_info grep_info;
2139     int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2140                                            zinfo_index_category_index,
2141                                            index_type, xpath_use);
2142     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2143         return rset_create_null(rset_nmem, kc, 0);
2144 
2145     if (ord < 0)
2146         return rset_create_null(rset_nmem, kc, 0);
2147     else
2148     {
2149         int i, max_pos;
2150         char ord_buf[32];
2151         RSET rset;
2152         WRBUF term_dict = wrbuf_alloc();
2153         int ord_len = key_SU_encode(ord, ord_buf);
2154         int term_type = Z_Term_characterString;
2155         const char *flags = "void";
2156 
2157         wrbuf_putc(term_dict, '(');
2158         for (i = 0; i < ord_len; i++)
2159         {
2160             wrbuf_putc(term_dict, 1);
2161             wrbuf_putc(term_dict, ord_buf[i]);
2162         }
2163         wrbuf_putc(term_dict, ')');
2164         wrbuf_puts(term_dict, term);
2165 
2166         grep_info.isam_p_indx = 0;
2167         dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2168                          &grep_info, &max_pos, 0, grep_handle);
2169         yaz_log(YLOG_DEBUG, "%s %d positions", term,
2170                 grep_info.isam_p_indx);
2171         rset = rset_trunc(zh, grep_info.isam_p_buf,
2172                           grep_info.isam_p_indx, term, strlen(term),
2173                           flags, 1, term_type, rset_nmem,
2174                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2175                           0 /* term_ref_id_str */);
2176         grep_info_delete(&grep_info);
2177         wrbuf_destroy(term_dict);
2178         return rset;
2179     }
2180 }
2181 
2182 static
rpn_search_xpath(ZebraHandle zh,NMEM stream,const char * rank_type,RSET rset,int xpath_len,struct xpath_location_step * xpath,NMEM rset_nmem,RSET * rset_out,struct rset_key_control * kc)2183 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2184 			   NMEM stream, const char *rank_type, RSET rset,
2185 			   int xpath_len, struct xpath_location_step *xpath,
2186 			   NMEM rset_nmem,
2187 			   RSET *rset_out,
2188 			   struct rset_key_control *kc)
2189 {
2190     int i;
2191     int always_matches = rset ? 0 : 1;
2192 
2193     if (xpath_len < 0)
2194     {
2195 	*rset_out = rset;
2196 	return ZEBRA_OK;
2197     }
2198 
2199     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2200     for (i = 0; i < xpath_len; i++)
2201     {
2202         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2203 
2204     }
2205 
2206     /*
2207     //a    ->    a/.*
2208     //a/b  ->    b/a/.*
2209     /a     ->    a/
2210     /a/b   ->    b/a/
2211 
2212     /      ->    none
2213 
2214     a[@attr = value]/b[@other = othervalue]
2215 
2216     /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2217     /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2218     /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2219     /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2220     /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2221     /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2222 
2223     */
2224 
2225     dict_grep_cmap(zh->reg->dict, 0, 0);
2226 
2227     {
2228         int level = xpath_len;
2229         int first_path = 1;
2230 
2231         while (--level >= 0)
2232         {
2233             WRBUF xpath_rev = wrbuf_alloc();
2234             int i;
2235             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2236 
2237             for (i = level; i >= 1; --i)
2238             {
2239                 const char *cp = xpath[i].part;
2240                 if (*cp)
2241                 {
2242                     for (; *cp; cp++)
2243                     {
2244                         if (*cp == '*')
2245                             wrbuf_puts(xpath_rev, "[^/]*");
2246                         else if (*cp == ' ')
2247                             wrbuf_puts(xpath_rev, "\001 ");
2248                         else
2249                             wrbuf_putc(xpath_rev, *cp);
2250 
2251                         /* wrbuf_putc does not null-terminate , but
2252                            wrbuf_puts below ensures it does.. so xpath_rev
2253                            is OK iff length is > 0 */
2254                     }
2255                     wrbuf_puts(xpath_rev, "/");
2256                 }
2257                 else if (i == 1)  /* // case */
2258                     wrbuf_puts(xpath_rev, ".*");
2259             }
2260             if (xpath[level].predicate &&
2261                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2262                 xpath[level].predicate->u.relation.name[0])
2263             {
2264                 WRBUF wbuf = wrbuf_alloc();
2265                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2266                 if (xpath[level].predicate->u.relation.value)
2267                 {
2268                     const char *cp = xpath[level].predicate->u.relation.value;
2269                     wrbuf_putc(wbuf, '=');
2270 
2271                     while (*cp)
2272                     {
2273                         if (strchr(REGEX_CHARS, *cp))
2274                             wrbuf_putc(wbuf, '\\');
2275                         wrbuf_putc(wbuf, *cp);
2276                         cp++;
2277                     }
2278                 }
2279                 rset_attr = xpath_trunc(
2280                     zh, stream, "0", wrbuf_cstr(wbuf),
2281                     ZEBRA_XPATH_ATTR_NAME,
2282                     rset_nmem, kc);
2283                 wrbuf_destroy(wbuf);
2284             }
2285             else
2286             {
2287                 if (!first_path)
2288                 {
2289                     wrbuf_destroy(xpath_rev);
2290                     continue;
2291                 }
2292             }
2293             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2294                     wrbuf_cstr(xpath_rev));
2295             if (wrbuf_len(xpath_rev))
2296             {
2297                 rset_start_tag = xpath_trunc(zh, stream, "0",
2298                                              wrbuf_cstr(xpath_rev),
2299                                              ZEBRA_XPATH_ELM_BEGIN,
2300                                              rset_nmem, kc);
2301                 if (always_matches)
2302                     rset = rset_start_tag;
2303                 else
2304                 {
2305                     rset_end_tag = xpath_trunc(zh, stream, "0",
2306                                                wrbuf_cstr(xpath_rev),
2307                                                ZEBRA_XPATH_ELM_END,
2308                                                rset_nmem, kc);
2309 
2310                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2311                                                rset_start_tag, rset,
2312                                                rset_end_tag, rset_attr);
2313                 }
2314             }
2315             wrbuf_destroy(xpath_rev);
2316             first_path = 0;
2317         }
2318     }
2319     *rset_out = rset;
2320     return ZEBRA_OK;
2321 }
2322 
2323 #define MAX_XPATH_STEPS 10
2324 
2325 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2326                                      Z_AttributesPlusTerm *zapt,
2327                                      const Odr_oid *attributeSet,
2328                                      zint hits_limit, NMEM stream,
2329                                      Z_SortKeySpecList *sort_sequence,
2330                                      NMEM rset_nmem,
2331                                      RSET *rset,
2332                                      struct rset_key_control *kc);
2333 
rpn_search_APT(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,Z_SortKeySpecList * sort_sequence,int num_bases,const char ** basenames,NMEM rset_nmem,RSET * rset,struct rset_key_control * kc)2334 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2335 				const Odr_oid *attributeSet,
2336                                 zint hits_limit, NMEM stream,
2337 				Z_SortKeySpecList *sort_sequence,
2338 				int num_bases, const char **basenames,
2339 				NMEM rset_nmem,
2340 				RSET *rset,
2341 				struct rset_key_control *kc)
2342 {
2343     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2344     ZEBRA_RES res = ZEBRA_OK;
2345     int i;
2346     for (i = 0; i < num_bases; i++)
2347     {
2348 
2349         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2350         {
2351 	    zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2352 			   basenames[i]);
2353             res = ZEBRA_FAIL;
2354             break;
2355         }
2356         res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2357                                   sort_sequence,
2358                                   rset_nmem, rsets+i, kc);
2359         if (res != ZEBRA_OK)
2360             break;
2361     }
2362     if (res != ZEBRA_OK)
2363     {   /* must clean up the already created sets */
2364         while (--i >= 0)
2365             rset_delete(rsets[i]);
2366         *rset = 0;
2367     }
2368     else
2369     {
2370         if (num_bases == 1)
2371             *rset = rsets[0];
2372         else if (num_bases == 0)
2373             *rset = rset_create_null(rset_nmem, kc, 0);
2374         else
2375             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2376                                    num_bases, rsets);
2377     }
2378     return res;
2379 }
2380 
rpn_search_database(ZebraHandle zh,Z_AttributesPlusTerm * zapt,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,Z_SortKeySpecList * sort_sequence,NMEM rset_nmem,RSET * rset,struct rset_key_control * kc)2381 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2382                                      Z_AttributesPlusTerm *zapt,
2383                                      const Odr_oid *attributeSet,
2384                                      zint hits_limit, NMEM stream,
2385                                      Z_SortKeySpecList *sort_sequence,
2386                                      NMEM rset_nmem,
2387                                      RSET *rset,
2388                                      struct rset_key_control *kc)
2389 {
2390     ZEBRA_RES res = ZEBRA_OK;
2391     const char *index_type;
2392     char *search_type = NULL;
2393     char rank_type[128];
2394     int complete_flag;
2395     int sort_flag;
2396     char termz[IT_MAX_WORD+1];
2397     int xpath_len;
2398     const char *xpath_use = 0;
2399     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2400 
2401     if (!log_level_set)
2402     {
2403         log_level_rpn = yaz_log_module_level("rpn");
2404         log_level_set = 1;
2405     }
2406     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2407 		    rank_type, &complete_flag, &sort_flag);
2408 
2409     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2410     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2411     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2412     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2413 
2414     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2415 	return ZEBRA_FAIL;
2416 
2417     if (sort_flag)
2418         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2419 			     rank_type, rset_nmem, rset, kc);
2420     /* consider if an X-Path query is used */
2421     xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2422                                 xpath, MAX_XPATH_STEPS, stream);
2423     if (xpath_len >= 0)
2424     {
2425         if (xpath[xpath_len-1].part[0] == '@')
2426             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2427         else
2428             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */
2429 
2430         if (1)
2431         {
2432             AttrType relation;
2433             int relation_value;
2434 
2435             attr_init_APT(&relation, zapt, 2);
2436             relation_value = attr_find(&relation, NULL);
2437 
2438             if (relation_value == 103) /* alwaysmatches */
2439             {
2440                 *rset = 0; /* signal no "term" set */
2441                 return rpn_search_xpath(zh, stream, rank_type, *rset,
2442                                         xpath_len, xpath, rset_nmem, rset, kc);
2443             }
2444         }
2445     }
2446 
2447     /* search using one of the various search type strategies
2448        termz is our UTF-8 search term
2449        attributeSet is top-level default attribute set
2450        stream is ODR for search
2451        reg_id is the register type
2452        complete_flag is 1 for complete subfield, 0 for incomplete
2453        xpath_use is use-attribute to be used for X-Path search, 0 for none
2454     */
2455     if (!strcmp(search_type, "phrase"))
2456     {
2457         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2458                                     stream,
2459 				    index_type, complete_flag, rank_type,
2460 				    xpath_use,
2461 				    rset_nmem,
2462 				    rset, kc);
2463     }
2464     else if (!strcmp(search_type, "and-list"))
2465     {
2466         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2467                                       stream,
2468 				      index_type, complete_flag, rank_type,
2469 				      xpath_use,
2470 				      rset_nmem,
2471 				      rset, kc);
2472     }
2473     else if (!strcmp(search_type, "or-list"))
2474     {
2475         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2476                                      stream,
2477 				     index_type, complete_flag, rank_type,
2478 				     xpath_use,
2479                                      rset_nmem,
2480 				     rset, kc);
2481     }
2482     else if (!strcmp(search_type, "local"))
2483     {
2484         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2485 				   rank_type, rset_nmem, rset, kc);
2486     }
2487     else if (!strcmp(search_type, "numeric"))
2488     {
2489         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit,
2490                                      stream,
2491 				     index_type, complete_flag, rank_type,
2492 				     xpath_use,
2493 				     rset_nmem,
2494 				     rset, kc);
2495     }
2496     else
2497     {
2498 	zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2499 	res = ZEBRA_FAIL;
2500     }
2501     if (res != ZEBRA_OK)
2502 	return res;
2503     if (!*rset)
2504 	return ZEBRA_FAIL;
2505     return rpn_search_xpath(zh, stream, rank_type, *rset,
2506 			    xpath_len, xpath, rset_nmem, rset, kc);
2507 }
2508 
2509 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2510 				      const Odr_oid *attributeSet,
2511                                       zint hits_limit,
2512 				      NMEM stream, NMEM rset_nmem,
2513 				      Z_SortKeySpecList *sort_sequence,
2514 				      int num_bases, const char **basenames,
2515 				      RSET **result_sets, int *num_result_sets,
2516 				      Z_Operator *parent_op,
2517 				      struct rset_key_control *kc);
2518 
rpn_get_top_approx_limit(ZebraHandle zh,Z_RPNStructure * zs,zint * approx_limit)2519 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2520                                    zint *approx_limit)
2521 {
2522     ZEBRA_RES res = ZEBRA_OK;
2523     if (zs->which == Z_RPNStructure_complex)
2524     {
2525         if (res == ZEBRA_OK)
2526             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2527                                            approx_limit);
2528         if (res == ZEBRA_OK)
2529             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2530                                            approx_limit);
2531     }
2532     else if (zs->which == Z_RPNStructure_simple)
2533     {
2534         if (zs->u.simple->which == Z_Operand_APT)
2535         {
2536             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2537             AttrType global_hits_limit_attr;
2538             int l;
2539 
2540             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2541 
2542             l = attr_find(&global_hits_limit_attr, NULL);
2543             if (l != -1)
2544                 *approx_limit = l;
2545         }
2546     }
2547     return res;
2548 }
2549 
rpn_search_top(ZebraHandle zh,Z_RPNStructure * zs,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,NMEM rset_nmem,Z_SortKeySpecList * sort_sequence,int num_bases,const char ** basenames,RSET * result_set)2550 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2551 			 const Odr_oid *attributeSet,
2552                          zint hits_limit,
2553 			 NMEM stream, NMEM rset_nmem,
2554 			 Z_SortKeySpecList *sort_sequence,
2555 			 int num_bases, const char **basenames,
2556 			 RSET *result_set)
2557 {
2558     RSET *result_sets = 0;
2559     int num_result_sets = 0;
2560     ZEBRA_RES res;
2561     struct rset_key_control *kc = zebra_key_control_create(zh);
2562 
2563     res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2564 			       stream, rset_nmem,
2565 			       sort_sequence,
2566 			       num_bases, basenames,
2567 			       &result_sets, &num_result_sets,
2568 			       0 /* no parent op */,
2569 			       kc);
2570     if (res != ZEBRA_OK)
2571     {
2572 	int i;
2573 	for (i = 0; i < num_result_sets; i++)
2574 	    rset_delete(result_sets[i]);
2575 	*result_set = 0;
2576     }
2577     else
2578     {
2579 	assert(num_result_sets == 1);
2580 	assert(result_sets);
2581 	assert(*result_sets);
2582 	*result_set = *result_sets;
2583     }
2584     (*kc->dec)(kc);
2585     return res;
2586 }
2587 
rpn_search_structure(ZebraHandle zh,Z_RPNStructure * zs,const Odr_oid * attributeSet,zint hits_limit,NMEM stream,NMEM rset_nmem,Z_SortKeySpecList * sort_sequence,int num_bases,const char ** basenames,RSET ** result_sets,int * num_result_sets,Z_Operator * parent_op,struct rset_key_control * kc)2588 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2589 			       const Odr_oid *attributeSet, zint hits_limit,
2590 			       NMEM stream, NMEM rset_nmem,
2591 			       Z_SortKeySpecList *sort_sequence,
2592 			       int num_bases, const char **basenames,
2593 			       RSET **result_sets, int *num_result_sets,
2594 			       Z_Operator *parent_op,
2595 			       struct rset_key_control *kc)
2596 {
2597     *num_result_sets = 0;
2598     if (zs->which == Z_RPNStructure_complex)
2599     {
2600 	ZEBRA_RES res;
2601         Z_Operator *zop = zs->u.complex->roperator;
2602 	RSET *result_sets_l = 0;
2603 	int num_result_sets_l = 0;
2604 	RSET *result_sets_r = 0;
2605 	int num_result_sets_r = 0;
2606 
2607         res = rpn_search_structure(zh, zs->u.complex->s1,
2608 				   attributeSet, hits_limit, stream, rset_nmem,
2609 				   sort_sequence,
2610 				   num_bases, basenames,
2611 				   &result_sets_l, &num_result_sets_l,
2612 				   zop, kc);
2613 	if (res != ZEBRA_OK)
2614 	{
2615 	    int i;
2616 	    for (i = 0; i < num_result_sets_l; i++)
2617 		rset_delete(result_sets_l[i]);
2618 	    return res;
2619 	}
2620         res = rpn_search_structure(zh, zs->u.complex->s2,
2621 				   attributeSet, hits_limit, stream, rset_nmem,
2622 				   sort_sequence,
2623 				   num_bases, basenames,
2624 				   &result_sets_r, &num_result_sets_r,
2625 				   zop, kc);
2626 	if (res != ZEBRA_OK)
2627 	{
2628 	    int i;
2629 	    for (i = 0; i < num_result_sets_l; i++)
2630 		rset_delete(result_sets_l[i]);
2631 	    for (i = 0; i < num_result_sets_r; i++)
2632 		rset_delete(result_sets_r[i]);
2633 	    return res;
2634 	}
2635 
2636 	/* make a new list of result for all children */
2637 	*num_result_sets = num_result_sets_l + num_result_sets_r;
2638 	*result_sets = nmem_malloc(stream, *num_result_sets *
2639 				   sizeof(**result_sets));
2640 	memcpy(*result_sets, result_sets_l,
2641 	       num_result_sets_l * sizeof(**result_sets));
2642 	memcpy(*result_sets + num_result_sets_l, result_sets_r,
2643 	       num_result_sets_r * sizeof(**result_sets));
2644 
2645 	if (!parent_op || parent_op->which != zop->which
2646 	    || (zop->which != Z_Operator_and &&
2647 		zop->which != Z_Operator_or))
2648 	{
2649 	    /* parent node different from this one (or non-present) */
2650 	    /* we must combine result sets now */
2651 	    RSET rset;
2652 	    switch (zop->which)
2653 	    {
2654 	    case Z_Operator_and:
2655 		rset = rset_create_and(rset_nmem, kc,
2656                                        kc->scope,
2657                                        *num_result_sets, *result_sets);
2658 		break;
2659 	    case Z_Operator_or:
2660 		rset = rset_create_or(rset_nmem, kc,
2661                                       kc->scope, 0, /* termid */
2662                                       *num_result_sets, *result_sets);
2663 		break;
2664 	    case Z_Operator_and_not:
2665 		rset = rset_create_not(rset_nmem, kc,
2666                                        kc->scope,
2667                                        (*result_sets)[0],
2668                                        (*result_sets)[1]);
2669 		break;
2670 	    case Z_Operator_prox:
2671 		if (zop->u.prox->which != Z_ProximityOperator_known)
2672 		{
2673 		    zebra_setError(zh,
2674 				   YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2675 				   0);
2676 		    return ZEBRA_FAIL;
2677 		}
2678 		if (*zop->u.prox->u.known != Z_ProxUnit_word)
2679 		{
2680 		    zebra_setError_zint(zh,
2681 					YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2682 					*zop->u.prox->u.known);
2683 		    return ZEBRA_FAIL;
2684 		}
2685 		else
2686 		{
2687 		    rset = rset_create_prox(rset_nmem, kc,
2688                                             kc->scope,
2689                                             *num_result_sets, *result_sets,
2690                                             *zop->u.prox->ordered,
2691                                             (!zop->u.prox->exclusion ?
2692                                              0 : *zop->u.prox->exclusion),
2693                                             *zop->u.prox->relationType,
2694                                             *zop->u.prox->distance );
2695 		}
2696 		break;
2697 	    default:
2698 		zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2699 		return ZEBRA_FAIL;
2700 	    }
2701 	    *num_result_sets = 1;
2702 	    *result_sets = nmem_malloc(stream, *num_result_sets *
2703 				       sizeof(**result_sets));
2704 	    (*result_sets)[0] = rset;
2705 	}
2706     }
2707     else if (zs->which == Z_RPNStructure_simple)
2708     {
2709 	RSET rset;
2710 	ZEBRA_RES res;
2711 
2712         if (zs->u.simple->which == Z_Operand_APT)
2713         {
2714             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2715             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2716 				 attributeSet, hits_limit,
2717                                  stream, sort_sequence,
2718 				 num_bases, basenames, rset_nmem, &rset,
2719 				 kc);
2720 	    if (res != ZEBRA_OK)
2721 		return res;
2722         }
2723         else if (zs->u.simple->which == Z_Operand_resultSetId)
2724         {
2725             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2726             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2727             if (!rset)
2728             {
2729 		zebra_setError(zh,
2730 			       YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2731 			       zs->u.simple->u.resultSetId);
2732 		return ZEBRA_FAIL;
2733             }
2734 	    rset_dup(rset);
2735         }
2736         else
2737         {
2738 	    zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2739             return ZEBRA_FAIL;
2740         }
2741 	*num_result_sets = 1;
2742 	*result_sets = nmem_malloc(stream, *num_result_sets *
2743 				   sizeof(**result_sets));
2744 	(*result_sets)[0] = rset;
2745     }
2746     else
2747     {
2748 	zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2749         return ZEBRA_FAIL;
2750     }
2751     return ZEBRA_OK;
2752 }
2753 
2754 
2755 
2756 /*
2757  * Local variables:
2758  * c-basic-offset: 4
2759  * c-file-style: "Stroustrup"
2760  * indent-tabs-mode: nil
2761  * End:
2762  * vim: shiftwidth=4 tabstop=8 expandtab
2763  */
2764 
2765