1 /* Copyright(C) 2004 Brazil
2 
3   This library is free software; you can redistribute it and/or
4   modify it under the terms of the GNU Lesser General Public
5   License as published by the Free Software Foundation; either
6   version 2.1 of the License, or (at your option) any later version.
7 
8   This library is distributed in the hope that it will be useful,
9   but WITHOUT ANY WARRANTY; without even the implied warranty of
10   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11   Lesser General Public License for more details.
12 
13   You should have received a copy of the GNU Lesser General Public
14   License along with this library; if not, write to the Free Software
15   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 */
17 #ifndef NO_MECAB
18 #include <mecab.h>
19 #endif /* NO_MECAB */
20 
21 #ifndef SEN_LEX_H
22 #define SEN_LEX_H
23 
24 #ifndef SENNA_H
25 #include "senna_in.h"
26 #endif /* SENNA_H */
27 
28 #ifndef SEN_SYM_H
29 #include "sym.h"
30 #endif /* SEN_SYM_H */
31 
32 #ifndef SEN_STR_H
33 #include "str.h"
34 #endif /* SEN_STR_H */
35 
36 #ifdef	__cplusplus
37 extern "C" {
38 #endif
39 
40 #define SEN_LEX_ADD 1
41 #define SEN_LEX_UPD 2
42 
43 typedef struct {
44   sen_sym *sym;
45   unsigned char *buf;
46   const unsigned char *orig;
47   const unsigned char *next;
48   unsigned char *token;
49   uint32_t tlen;
50   sen_nstr *nstr;
51 #ifndef NO_MECAB
52   mecab_t *mecab;
53 #endif /* NO_MECAB */
54   int32_t pos;
55   int32_t len;
56   uint32_t skip;
57   uint32_t tail;
58   uint32_t offset;
59   uint8_t flags;
60   uint8_t status;
61   uint8_t uni_alpha;
62   uint8_t uni_digit;
63   uint8_t uni_symbol;
64   uint8_t force_prefix;
65   sen_encoding encoding;
66 } sen_lex;
67 
68 enum {
69   sen_lex_doing = 0,
70   sen_lex_done,
71   sen_lex_not_found
72 };
73 
74 sen_rc sen_lex_init(void);
75 sen_lex *sen_lex_open(sen_sym *sym, const char *str, size_t str_len, uint8_t flags);
76 sen_rc sen_lex_next(sen_lex *ng);
77 sen_rc sen_lex_close(sen_lex *ng);
78 sen_rc sen_lex_fin(void);
79 sen_rc sen_lex_validate(sen_sym *sym);
80 
81 #ifdef __cplusplus
82 }
83 #endif
84 
85 #endif /* SEN_LEX_H */
86