1 /* -*- c-basic-offset: 2 -*- */ 2 /* 3 Copyright(C) 2009-2016 Brazil 4 5 This library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License version 2.1 as published by the Free Software Foundation. 8 9 This library is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 Lesser General Public License for more details. 13 14 You should have received a copy of the GNU Lesser General Public 15 License along with this library; if not, write to the Free Software 16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA 17 */ 18 19 #pragma once 20 21 #include "grn_ctx.h" 22 #include "grn_db.h" 23 24 #include <groonga/tokenizer.h> 25 26 #ifdef __cplusplus 27 extern "C" { 28 #endif 29 30 #define GRN_TOKENIZER_BEGIN_MARK_UTF8 "\xEF\xBF\xAF" 31 #define GRN_TOKENIZER_BEGIN_MARK_UTF8_LEN 3 32 #define GRN_TOKENIZER_END_MARK_UTF8 "\xEF\xBF\xB0" 33 #define GRN_TOKENIZER_END_MARK_UTF8_LEN 3 34 35 typedef enum { 36 GRN_TOKEN_CURSOR_DOING = 0, 37 GRN_TOKEN_CURSOR_DONE, 38 GRN_TOKEN_CURSOR_DONE_SKIP, 39 GRN_TOKEN_CURSOR_NOT_FOUND 40 } grn_token_cursor_status; 41 42 struct _grn_token { 43 grn_obj data; 44 grn_token_status status; 45 }; 46 47 typedef struct { 48 grn_obj *table; 49 const unsigned char *orig; 50 const unsigned char *curr; 51 uint32_t orig_blen; 52 uint32_t curr_size; 53 int32_t pos; 54 grn_tokenize_mode mode; 55 grn_token_cursor_status status; 56 grn_bool force_prefix; 57 grn_obj_flags table_flags; 58 grn_encoding encoding; 59 grn_obj *tokenizer; 60 grn_proc_ctx pctx; 61 struct { 62 grn_obj *objects; 63 void **data; 64 } token_filter; 65 uint32_t variant; 66 grn_obj *nstr; 67 } grn_token_cursor; 68 69 #define GRN_TOKEN_CURSOR_ENABLE_TOKENIZED_DELIMITER (0x01L<<0) 70 71 GRN_API grn_token_cursor *grn_token_cursor_open(grn_ctx *ctx, grn_obj *table, 72 const char *str, size_t str_len, 73 grn_tokenize_mode mode, 74 unsigned int flags); 75 76 GRN_API grn_id grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor); 77 GRN_API grn_rc grn_token_cursor_close(grn_ctx *ctx, grn_token_cursor *token_cursor); 78 79 #ifdef __cplusplus 80 } 81 #endif 82