1 /* -*- c-basic-offset: 2 -*- */
2 /*
3   Copyright(C) 2009-2016 Brazil
4 
5   This library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License version 2.1 as published by the Free Software Foundation.
8 
9   This library is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   Lesser General Public License for more details.
13 
14   You should have received a copy of the GNU Lesser General Public
15   License along with this library; if not, write to the Free Software
16   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA
17 */
18 
19 #pragma once
20 
21 /* "ii" is for inverted index */
22 
23 #include "grn.h"
24 #include "grn_hash.h"
25 #include "grn_io.h"
26 #include "grn_store.h"
27 
28 #ifdef  __cplusplus
29 extern "C" {
30 #endif
31 
32 struct _grn_ii {
33   grn_db_obj obj;
34   grn_io *seg;           /* I/O for a variety of segments */
35   grn_io *chunk;         /* I/O for posting chunks */
36   grn_obj *lexicon;      /* Lexicon table */
37   grn_table_flags lflags;
38   grn_encoding encoding; /* Character encoding */
39                          /* This member is used for matching */
40   uint32_t n_elements;   /* Number of elements in postings */
41                          /* rid, [sid], tf, [weight] and [pos] */
42   struct grn_ii_header *header;
43 };
44 
45 /* BGQ is buffer garbage queue? */
46 #define GRN_II_BGQSIZE 16
47 #define GRN_II_MAX_LSEG           0x10000
48 #define GRN_II_W_TOTAL_CHUNK      40
49 #define GRN_II_W_CHUNK            22
50 #define GRN_II_W_LEAST_CHUNK      (GRN_II_W_TOTAL_CHUNK - 32)
51 #define GRN_II_MAX_CHUNK          (1 << (GRN_II_W_TOTAL_CHUNK - GRN_II_W_CHUNK))
52 #define GRN_II_N_CHUNK_VARIATION  (GRN_II_W_CHUNK - GRN_II_W_LEAST_CHUNK)
53 
54 #define GRN_II_MAX_CHUNK_SMALL    (1 << (GRN_II_W_TOTAL_CHUNK - GRN_II_W_CHUNK - 8))
55 /* GRN_II_MAX_CHUNK_MEDIUM has enough space for the following source:
56  *   * Single source.
57  *   * Source is a fixed size column or _key of a table.
58  *   * Source column is a scalar column.
59  *   * Lexicon doesn't have tokenizer.
60  */
61 #define GRN_II_MAX_CHUNK_MEDIUM   (1 << (GRN_II_W_TOTAL_CHUNK - GRN_II_W_CHUNK - 4))
62 
63 #define GRN_II_PSEG_NOT_ASSIGNED  0xffffffff
64 
65 struct grn_ii_header {
66   uint64_t total_chunk_size;
67   uint64_t bmax;
68   uint32_t flags;
69   uint32_t amax;
70   uint32_t smax;
71   uint32_t param1;
72   uint32_t param2;
73   uint32_t pnext;
74   uint32_t bgqhead;
75   uint32_t bgqtail;
76   uint32_t bgqbody[GRN_II_BGQSIZE];
77   uint32_t reserved[288];
78   uint32_t ainfo[GRN_II_MAX_LSEG]; /* array info */
79   uint32_t binfo[GRN_II_MAX_LSEG]; /* buffer info */
80   uint32_t free_chunks[GRN_II_N_CHUNK_VARIATION + 1];
81   uint32_t garbages[GRN_II_N_CHUNK_VARIATION + 1];
82   uint32_t ngarbages[GRN_II_N_CHUNK_VARIATION + 1];
83   uint8_t chunks[GRN_II_MAX_CHUNK >> 3];
84 };
85 
86 struct _grn_ii_pos {
87   struct _grn_ii_pos *next;
88   uint32_t pos;
89 };
90 
91 struct _grn_ii_updspec {
92   uint32_t rid;
93   uint32_t sid;
94   int32_t weight;
95   int32_t tf;                 /* number of postings successfully stored to index */
96   int32_t atf;                /* actual number of postings */
97   int32_t offset;
98   struct _grn_ii_pos *pos;
99   struct _grn_ii_pos *tail;
100   /* grn_vgram_vnode *vnodes; */
101 };
102 
103 typedef struct _grn_ii_updspec grn_ii_updspec;
104 
105 void grn_ii_init_from_env(void);
106 
107 GRN_API grn_ii *grn_ii_create(grn_ctx *ctx, const char *path, grn_obj *lexicon,
108                               uint32_t flags);
109 GRN_API grn_ii *grn_ii_open(grn_ctx *ctx, const char *path, grn_obj *lexicon);
110 GRN_API grn_rc grn_ii_close(grn_ctx *ctx, grn_ii *ii);
111 GRN_API grn_rc grn_ii_remove(grn_ctx *ctx, const char *path);
112 grn_rc grn_ii_info(grn_ctx *ctx, grn_ii *ii, uint64_t *seg_size, uint64_t *chunk_size);
113 grn_column_flags grn_ii_get_flags(grn_ctx *ctx, grn_ii *ii);
114 grn_rc grn_ii_update_one(grn_ctx *ctx, grn_ii *ii, uint32_t key, grn_ii_updspec *u,
115                          grn_hash *h);
116 grn_rc grn_ii_delete_one(grn_ctx *ctx, grn_ii *ii, uint32_t key, grn_ii_updspec *u,
117                          grn_hash *h);
118 grn_ii_updspec *grn_ii_updspec_open(grn_ctx *ctx, uint32_t rid, uint32_t sid);
119 grn_rc grn_ii_updspec_close(grn_ctx *ctx, grn_ii_updspec *u);
120 grn_rc grn_ii_updspec_add(grn_ctx *ctx, grn_ii_updspec *u, int pos, int32_t weight);
121 int grn_ii_updspec_cmp(grn_ii_updspec *a, grn_ii_updspec *b);
122 
123 void grn_ii_expire(grn_ctx *ctx, grn_ii *ii);
124 grn_rc grn_ii_flush(grn_ctx *ctx, grn_ii *ii);
125 size_t grn_ii_get_disk_usage(grn_ctx *ctx, grn_ii *ii);
126 
127 grn_ii_cursor *grn_ii_cursor_openv1(grn_ii *ii, uint32_t key);
128 grn_rc grn_ii_cursor_openv2(grn_ii_cursor **cursors, int ncursors);
129 
130 uint32_t grn_ii_max_section(grn_ii *ii);
131 
132 const char *grn_ii_path(grn_ii *ii);
133 grn_obj *grn_ii_lexicon(grn_ii *ii);
134 
135 /*
136 grn_rc grn_ii_upd(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram,
137                    const char *oldvalue, unsigned int oldvalue_len,
138                    const char *newvalue, unsigned int newvalue_len);
139 grn_rc grn_ii_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram,
140                       unsigned int section,
141                       grn_values *oldvalues, grn_values *newvalues);
142 */
143 
144 typedef struct _grn_select_optarg grn_select_optarg;
145 
146 struct _grn_select_optarg {
147   grn_operator mode;
148   int similarity_threshold;
149   int max_interval;
150   int *weight_vector;
151   int vector_size;
152   int (*func)(grn_ctx *, grn_hash *, const void *, int, void *);
153   void *func_arg;
154   int max_size;
155   grn_obj *scorer;
156   grn_obj *scorer_args_expr;
157   unsigned int scorer_args_expr_offset;
158   grn_fuzzy_search_optarg fuzzy;
159   grn_match_info *match_info;
160 };
161 
162 GRN_API grn_rc grn_ii_column_update(grn_ctx *ctx, grn_ii *ii, grn_id id,
163                                     unsigned int section, grn_obj *oldvalue,
164                                     grn_obj *newvalue, grn_obj *posting);
165 grn_rc grn_ii_term_extract(grn_ctx *ctx, grn_ii *ii, const char *string,
166                             unsigned int string_len, grn_hash *s,
167                             grn_operator op, grn_select_optarg *optarg);
168 grn_rc grn_ii_similar_search(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len,
169                               grn_hash *s, grn_operator op, grn_select_optarg *optarg);
170 GRN_API grn_rc grn_ii_select(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len,
171                              grn_hash *s, grn_operator op, grn_select_optarg *optarg);
172 grn_rc grn_ii_sel(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len,
173                   grn_hash *s, grn_operator op, grn_search_optarg *optarg);
174 
175 void grn_ii_resolve_sel_and(grn_ctx *ctx, grn_hash *s, grn_operator op);
176 
177 grn_rc grn_ii_at(grn_ctx *ctx, grn_ii *ii, grn_id id, grn_hash *s, grn_operator op);
178 
179 void grn_ii_inspect_values(grn_ctx *ctx, grn_ii *ii, grn_obj *buf);
180 void grn_ii_cursor_inspect(grn_ctx *ctx, grn_ii_cursor *c, grn_obj *buf);
181 
182 grn_rc grn_ii_truncate(grn_ctx *ctx, grn_ii *ii);
183 grn_rc grn_ii_build(grn_ctx *ctx, grn_ii *ii, uint64_t sparsity);
184 
185 typedef struct grn_ii_builder_options grn_ii_builder_options;
186 
187 grn_rc grn_ii_build2(grn_ctx *ctx, grn_ii *ii,
188                      const grn_ii_builder_options *options);
189 
190 #ifdef __cplusplus
191 }
192 #endif
193