1 /* -*- c-basic-offset: 2 -*- */ 2 /* 3 Copyright(C) 2009-2016 Brazil 4 5 This library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License version 2.1 as published by the Free Software Foundation. 8 9 This library is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 Lesser General Public License for more details. 13 14 You should have received a copy of the GNU Lesser General Public 15 License along with this library; if not, write to the Free Software 16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA 17 */ 18 19 #pragma once 20 21 /* "ii" is for inverted index */ 22 23 #include "grn.h" 24 #include "grn_hash.h" 25 #include "grn_io.h" 26 #include "grn_store.h" 27 28 #ifdef __cplusplus 29 extern "C" { 30 #endif 31 32 struct _grn_ii { 33 grn_db_obj obj; 34 grn_io *seg; /* I/O for a variety of segments */ 35 grn_io *chunk; /* I/O for posting chunks */ 36 grn_obj *lexicon; /* Lexicon table */ 37 grn_table_flags lflags; 38 grn_encoding encoding; /* Character encoding */ 39 /* This member is used for matching */ 40 uint32_t n_elements; /* Number of elements in postings */ 41 /* rid, [sid], tf, [weight] and [pos] */ 42 struct grn_ii_header *header; 43 }; 44 45 /* BGQ is buffer garbage queue? */ 46 #define GRN_II_BGQSIZE 16 47 #define GRN_II_MAX_LSEG 0x10000 48 #define GRN_II_W_TOTAL_CHUNK 40 49 #define GRN_II_W_CHUNK 22 50 #define GRN_II_W_LEAST_CHUNK (GRN_II_W_TOTAL_CHUNK - 32) 51 #define GRN_II_MAX_CHUNK (1 << (GRN_II_W_TOTAL_CHUNK - GRN_II_W_CHUNK)) 52 #define GRN_II_N_CHUNK_VARIATION (GRN_II_W_CHUNK - GRN_II_W_LEAST_CHUNK) 53 54 #define GRN_II_MAX_CHUNK_SMALL (1 << (GRN_II_W_TOTAL_CHUNK - GRN_II_W_CHUNK - 8)) 55 /* GRN_II_MAX_CHUNK_MEDIUM has enough space for the following source: 56 * * Single source. 57 * * Source is a fixed size column or _key of a table. 58 * * Source column is a scalar column. 59 * * Lexicon doesn't have tokenizer. 60 */ 61 #define GRN_II_MAX_CHUNK_MEDIUM (1 << (GRN_II_W_TOTAL_CHUNK - GRN_II_W_CHUNK - 4)) 62 63 #define GRN_II_PSEG_NOT_ASSIGNED 0xffffffff 64 65 struct grn_ii_header { 66 uint64_t total_chunk_size; 67 uint64_t bmax; 68 uint32_t flags; 69 uint32_t amax; 70 uint32_t smax; 71 uint32_t param1; 72 uint32_t param2; 73 uint32_t pnext; 74 uint32_t bgqhead; 75 uint32_t bgqtail; 76 uint32_t bgqbody[GRN_II_BGQSIZE]; 77 uint32_t reserved[288]; 78 uint32_t ainfo[GRN_II_MAX_LSEG]; /* array info */ 79 uint32_t binfo[GRN_II_MAX_LSEG]; /* buffer info */ 80 uint32_t free_chunks[GRN_II_N_CHUNK_VARIATION + 1]; 81 uint32_t garbages[GRN_II_N_CHUNK_VARIATION + 1]; 82 uint32_t ngarbages[GRN_II_N_CHUNK_VARIATION + 1]; 83 uint8_t chunks[GRN_II_MAX_CHUNK >> 3]; 84 }; 85 86 struct _grn_ii_pos { 87 struct _grn_ii_pos *next; 88 uint32_t pos; 89 }; 90 91 struct _grn_ii_updspec { 92 uint32_t rid; 93 uint32_t sid; 94 int32_t weight; 95 int32_t tf; /* number of postings successfully stored to index */ 96 int32_t atf; /* actual number of postings */ 97 int32_t offset; 98 struct _grn_ii_pos *pos; 99 struct _grn_ii_pos *tail; 100 /* grn_vgram_vnode *vnodes; */ 101 }; 102 103 typedef struct _grn_ii_updspec grn_ii_updspec; 104 105 void grn_ii_init_from_env(void); 106 107 GRN_API grn_ii *grn_ii_create(grn_ctx *ctx, const char *path, grn_obj *lexicon, 108 uint32_t flags); 109 GRN_API grn_ii *grn_ii_open(grn_ctx *ctx, const char *path, grn_obj *lexicon); 110 GRN_API grn_rc grn_ii_close(grn_ctx *ctx, grn_ii *ii); 111 GRN_API grn_rc grn_ii_remove(grn_ctx *ctx, const char *path); 112 grn_rc grn_ii_info(grn_ctx *ctx, grn_ii *ii, uint64_t *seg_size, uint64_t *chunk_size); 113 grn_column_flags grn_ii_get_flags(grn_ctx *ctx, grn_ii *ii); 114 grn_rc grn_ii_update_one(grn_ctx *ctx, grn_ii *ii, uint32_t key, grn_ii_updspec *u, 115 grn_hash *h); 116 grn_rc grn_ii_delete_one(grn_ctx *ctx, grn_ii *ii, uint32_t key, grn_ii_updspec *u, 117 grn_hash *h); 118 grn_ii_updspec *grn_ii_updspec_open(grn_ctx *ctx, uint32_t rid, uint32_t sid); 119 grn_rc grn_ii_updspec_close(grn_ctx *ctx, grn_ii_updspec *u); 120 grn_rc grn_ii_updspec_add(grn_ctx *ctx, grn_ii_updspec *u, int pos, int32_t weight); 121 int grn_ii_updspec_cmp(grn_ii_updspec *a, grn_ii_updspec *b); 122 123 void grn_ii_expire(grn_ctx *ctx, grn_ii *ii); 124 grn_rc grn_ii_flush(grn_ctx *ctx, grn_ii *ii); 125 size_t grn_ii_get_disk_usage(grn_ctx *ctx, grn_ii *ii); 126 127 grn_ii_cursor *grn_ii_cursor_openv1(grn_ii *ii, uint32_t key); 128 grn_rc grn_ii_cursor_openv2(grn_ii_cursor **cursors, int ncursors); 129 130 uint32_t grn_ii_max_section(grn_ii *ii); 131 132 const char *grn_ii_path(grn_ii *ii); 133 grn_obj *grn_ii_lexicon(grn_ii *ii); 134 135 /* 136 grn_rc grn_ii_upd(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram, 137 const char *oldvalue, unsigned int oldvalue_len, 138 const char *newvalue, unsigned int newvalue_len); 139 grn_rc grn_ii_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram, 140 unsigned int section, 141 grn_values *oldvalues, grn_values *newvalues); 142 */ 143 144 typedef struct _grn_select_optarg grn_select_optarg; 145 146 struct _grn_select_optarg { 147 grn_operator mode; 148 int similarity_threshold; 149 int max_interval; 150 int *weight_vector; 151 int vector_size; 152 int (*func)(grn_ctx *, grn_hash *, const void *, int, void *); 153 void *func_arg; 154 int max_size; 155 grn_obj *scorer; 156 grn_obj *scorer_args_expr; 157 unsigned int scorer_args_expr_offset; 158 grn_fuzzy_search_optarg fuzzy; 159 grn_match_info *match_info; 160 }; 161 162 GRN_API grn_rc grn_ii_column_update(grn_ctx *ctx, grn_ii *ii, grn_id id, 163 unsigned int section, grn_obj *oldvalue, 164 grn_obj *newvalue, grn_obj *posting); 165 grn_rc grn_ii_term_extract(grn_ctx *ctx, grn_ii *ii, const char *string, 166 unsigned int string_len, grn_hash *s, 167 grn_operator op, grn_select_optarg *optarg); 168 grn_rc grn_ii_similar_search(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len, 169 grn_hash *s, grn_operator op, grn_select_optarg *optarg); 170 GRN_API grn_rc grn_ii_select(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len, 171 grn_hash *s, grn_operator op, grn_select_optarg *optarg); 172 grn_rc grn_ii_sel(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len, 173 grn_hash *s, grn_operator op, grn_search_optarg *optarg); 174 175 void grn_ii_resolve_sel_and(grn_ctx *ctx, grn_hash *s, grn_operator op); 176 177 grn_rc grn_ii_at(grn_ctx *ctx, grn_ii *ii, grn_id id, grn_hash *s, grn_operator op); 178 179 void grn_ii_inspect_values(grn_ctx *ctx, grn_ii *ii, grn_obj *buf); 180 void grn_ii_cursor_inspect(grn_ctx *ctx, grn_ii_cursor *c, grn_obj *buf); 181 182 grn_rc grn_ii_truncate(grn_ctx *ctx, grn_ii *ii); 183 grn_rc grn_ii_build(grn_ctx *ctx, grn_ii *ii, uint64_t sparsity); 184 185 typedef struct grn_ii_builder_options grn_ii_builder_options; 186 187 grn_rc grn_ii_build2(grn_ctx *ctx, grn_ii *ii, 188 const grn_ii_builder_options *options); 189 190 #ifdef __cplusplus 191 } 192 #endif 193