1 /** 2 * @file scws.h (core include) 3 * @author Hightman Mar 4 * @editor set number ; syntax on ; set autoindent ; set tabstop=4 (vim) 5 * $Id$ 6 */ 7 8 #ifndef _SCWS_LIBSCWS_20070531_H_ 9 #define _SCWS_LIBSCWS_20070531_H_ 10 11 #ifdef __cplusplus 12 extern "C" { 13 #endif 14 15 #include "version.h" 16 #include "rule.h" 17 #include "xdict.h" 18 19 #define SCWS_IGN_SYMBOL 0x01 20 //#define SCWS_SEG_MULTI 0x02 21 //#define SCWS_XDB_USAGE 0x04 22 #define SCWS_DEBUG 0x08 23 #define SCWS_DUALITY 0x10 24 25 /* hightman.070901: multi segment policy */ 26 #define SCWS_MULTI_NONE 0x00000 // nothing 27 #define SCWS_MULTI_SHORT 0x01000 // split long words to short words from left to right 28 #define SCWS_MULTI_DUALITY 0x02000 // split every long words(3 chars?) to two chars 29 #define SCWS_MULTI_ZMAIN 0x04000 // split to main single chinese char atr = j|a|n?|v? 30 #define SCWS_MULTI_ZALL 0x08000 // attr = ** , all split to single chars 31 #define SCWS_MULTI_MASK 0xff000 // mask check for multi set 32 33 #define SCWS_ZIS_USED 0x8000000 34 35 #define SCWS_YEA (1) 36 #define SCWS_NA (0) 37 38 /* data structures */ 39 typedef struct scws_result *scws_res_t; 40 41 struct scws_result 42 { 43 int off; 44 float idf; 45 unsigned char len; 46 char attr[3]; 47 scws_res_t next; 48 }; 49 50 typedef struct scws_topword *scws_top_t; 51 52 struct scws_topword 53 { 54 char *word; 55 float weight; 56 short times; 57 char attr[2]; 58 scws_top_t next; 59 }; 60 61 struct scws_zchar 62 { 63 int start; 64 int end; 65 }; 66 67 typedef struct scws_st scws_st, *scws_t; 68 69 struct scws_st 70 { 71 xdict_t d; 72 rule_t r; 73 unsigned char *mblen; 74 unsigned int mode; 75 unsigned char *txt; 76 int zis; 77 int len; 78 int off; 79 int wend; 80 scws_res_t res0; 81 scws_res_t res1; 82 word_t **wmap; 83 struct scws_zchar *zmap; 84 }; 85 86 /* api: init the scws handler */ 87 scws_t scws_new(); 88 void scws_free(scws_t s); 89 /* fork instance for multi-threaded usage, but they shared the dict/rules */ 90 scws_t scws_fork(scws_t s); 91 92 /* mode = SCWS_XDICT_XDB | SCWS_XDICT_MEM | SCWS_XDICT_TXT */ 93 int scws_add_dict(scws_t s, const char *fpath, int mode); 94 int scws_set_dict(scws_t s, const char *fpath, int mode); 95 void scws_set_charset(scws_t s, const char *cs); 96 void scws_set_rule(scws_t s, const char *fpath); 97 98 /* set ignore symbol or multi segments */ 99 void scws_set_ignore(scws_t s, int yes); 100 void scws_set_multi(scws_t s, int mode); 101 void scws_set_debug(scws_t s, int yes); 102 void scws_set_duality(scws_t s, int yes); 103 104 void scws_send_text(scws_t s, const char *text, int len); 105 scws_res_t scws_get_result(scws_t s); 106 void scws_free_result(scws_res_t result); 107 108 scws_top_t scws_get_tops(scws_t s, int limit, char *xattr); 109 void scws_free_tops(scws_top_t tops); 110 111 scws_top_t scws_get_words(scws_t s, char *xattr); 112 int scws_has_word(scws_t s, char *xattr); 113 114 #ifdef __cplusplus 115 } 116 #endif 117 118 #endif 119