1 /* Copyright(C) 2004 Brazil 2 3 This library is free software; you can redistribute it and/or 4 modify it under the terms of the GNU Lesser General Public 5 License as published by the Free Software Foundation; either 6 version 2.1 of the License, or (at your option) any later version. 7 8 This library is distributed in the hope that it will be useful, 9 but WITHOUT ANY WARRANTY; without even the implied warranty of 10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 Lesser General Public License for more details. 12 13 You should have received a copy of the GNU Lesser General Public 14 License along with this library; if not, write to the Free Software 15 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 */ 17 #ifndef NO_MECAB 18 #include <mecab.h> 19 #endif /* NO_MECAB */ 20 21 #ifndef SEN_LEX_H 22 #define SEN_LEX_H 23 24 #ifndef SENNA_H 25 #include "senna_in.h" 26 #endif /* SENNA_H */ 27 28 #ifndef SEN_SYM_H 29 #include "sym.h" 30 #endif /* SEN_SYM_H */ 31 32 #ifndef SEN_STR_H 33 #include "str.h" 34 #endif /* SEN_STR_H */ 35 36 #ifdef __cplusplus 37 extern "C" { 38 #endif 39 40 #define SEN_LEX_ADD 1 41 #define SEN_LEX_UPD 2 42 43 typedef struct { 44 sen_sym *sym; 45 unsigned char *buf; 46 const unsigned char *orig; 47 const unsigned char *next; 48 unsigned char *token; 49 uint32_t tlen; 50 sen_nstr *nstr; 51 #ifndef NO_MECAB 52 mecab_t *mecab; 53 #endif /* NO_MECAB */ 54 int32_t pos; 55 int32_t len; 56 uint32_t skip; 57 uint32_t tail; 58 uint32_t offset; 59 uint8_t flags; 60 uint8_t status; 61 uint8_t uni_alpha; 62 uint8_t uni_digit; 63 uint8_t uni_symbol; 64 uint8_t force_prefix; 65 sen_encoding encoding; 66 } sen_lex; 67 68 enum { 69 sen_lex_doing = 0, 70 sen_lex_done, 71 sen_lex_not_found 72 }; 73 74 sen_rc sen_lex_init(void); 75 sen_lex *sen_lex_open(sen_sym *sym, const char *str, size_t str_len, uint8_t flags); 76 sen_rc sen_lex_next(sen_lex *ng); 77 sen_rc sen_lex_close(sen_lex *ng); 78 sen_rc sen_lex_fin(void); 79 sen_rc sen_lex_validate(sen_sym *sym); 80 81 #ifdef __cplusplus 82 } 83 #endif 84 85 #endif /* SEN_LEX_H */ 86