1 /* -*- c-basic-offset: 2 -*- */
2 /*
3   Copyright(C) 2009-2016 Brazil
4 
5   This library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License version 2.1 as published by the Free Software Foundation.
8 
9   This library is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   Lesser General Public License for more details.
13 
14   You should have received a copy of the GNU Lesser General Public
15   License along with this library; if not, write to the Free Software
16   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA
17 */
18 
19 #pragma once
20 
21 #include "grn_ctx.h"
22 #include "grn_db.h"
23 
24 #include <groonga/tokenizer.h>
25 
26 #ifdef __cplusplus
27 extern "C" {
28 #endif
29 
30 #define GRN_TOKENIZER_BEGIN_MARK_UTF8     "\xEF\xBF\xAF"
31 #define GRN_TOKENIZER_BEGIN_MARK_UTF8_LEN 3
32 #define GRN_TOKENIZER_END_MARK_UTF8       "\xEF\xBF\xB0"
33 #define GRN_TOKENIZER_END_MARK_UTF8_LEN   3
34 
35 typedef enum {
36   GRN_TOKEN_CURSOR_DOING = 0,
37   GRN_TOKEN_CURSOR_DONE,
38   GRN_TOKEN_CURSOR_DONE_SKIP,
39   GRN_TOKEN_CURSOR_NOT_FOUND
40 } grn_token_cursor_status;
41 
42 struct _grn_token {
43   grn_obj data;
44   grn_token_status status;
45 };
46 
47 typedef struct {
48   grn_obj *table;
49   const unsigned char *orig;
50   const unsigned char *curr;
51   uint32_t orig_blen;
52   uint32_t curr_size;
53   int32_t pos;
54   grn_tokenize_mode mode;
55   grn_token_cursor_status status;
56   grn_bool force_prefix;
57   grn_obj_flags table_flags;
58   grn_encoding encoding;
59   grn_obj *tokenizer;
60   grn_proc_ctx pctx;
61   struct {
62     grn_obj *objects;
63     void **data;
64   } token_filter;
65   uint32_t variant;
66   grn_obj *nstr;
67 } grn_token_cursor;
68 
69 #define GRN_TOKEN_CURSOR_ENABLE_TOKENIZED_DELIMITER (0x01L<<0)
70 
71 GRN_API grn_token_cursor *grn_token_cursor_open(grn_ctx *ctx, grn_obj *table,
72                                                 const char *str, size_t str_len,
73                                                 grn_tokenize_mode mode,
74                                                 unsigned int flags);
75 
76 GRN_API grn_id grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor);
77 GRN_API grn_rc grn_token_cursor_close(grn_ctx *ctx, grn_token_cursor *token_cursor);
78 
79 #ifdef __cplusplus
80 }
81 #endif
82