1 /**
2  * @file scws.h (core include)
3  * @author Hightman Mar
4  * @editor set number ; syntax on ; set autoindent ; set tabstop=4 (vim)
5  * $Id$
6  */
7 
8 #ifndef	_SCWS_LIBSCWS_20070531_H_
9 #define	_SCWS_LIBSCWS_20070531_H_
10 
11 #ifdef __cplusplus
12 extern "C" {
13 #endif
14 
15 #include "version.h"
16 #include "rule.h"
17 #include "xdict.h"
18 
19 #define	SCWS_IGN_SYMBOL		0x01
20 //#define	SCWS_SEG_MULTI		0x02
21 //#define	SCWS_XDB_USAGE		0x04
22 #define	SCWS_DEBUG			0x08
23 #define	SCWS_DUALITY		0x10
24 
25 /* hightman.070901: multi segment policy */
26 #define SCWS_MULTI_NONE     0x00000		// nothing
27 #define	SCWS_MULTI_SHORT	0x01000		// split long words to short words from left to right
28 #define	SCWS_MULTI_DUALITY	0x02000		// split every long words(3 chars?) to two chars
29 #define SCWS_MULTI_ZMAIN    0x04000		// split to main single chinese char atr = j|a|n?|v?
30 #define	SCWS_MULTI_ZALL		0x08000		// attr = ** , all split to single chars
31 #define	SCWS_MULTI_MASK		0xff000		// mask check for multi set
32 
33 #define	SCWS_ZIS_USED		0x8000000
34 
35 #define	SCWS_YEA			(1)
36 #define	SCWS_NA				(0)
37 
38 /* data structures */
39 typedef struct scws_result *scws_res_t;
40 
41 struct scws_result
42 {
43 	int off;
44 	float idf;
45 	unsigned char len;
46 	char attr[3];
47 	scws_res_t next;
48 };
49 
50 typedef struct scws_topword *scws_top_t;
51 
52 struct scws_topword
53 {
54 	char *word;
55 	float weight;
56 	short times;
57 	char attr[2];
58 	scws_top_t next;
59 };
60 
61 struct scws_zchar
62 {
63 	int start;
64 	int end;
65 };
66 
67 typedef struct scws_st scws_st, *scws_t;
68 
69 struct scws_st
70 {
71 	xdict_t d;
72 	rule_t r;
73 	unsigned char *mblen;
74 	unsigned int mode;
75 	unsigned char *txt;
76 	int zis;
77 	int len;
78 	int off;
79 	int wend;
80 	scws_res_t res0;
81 	scws_res_t res1;
82 	word_t **wmap;
83 	struct scws_zchar *zmap;
84 };
85 
86 /* api: init the scws handler */
87 scws_t scws_new();
88 void scws_free(scws_t s);
89 /* fork instance for multi-threaded usage, but they shared the dict/rules */
90 scws_t scws_fork(scws_t s);
91 
92 /* mode = SCWS_XDICT_XDB | SCWS_XDICT_MEM | SCWS_XDICT_TXT */
93 int scws_add_dict(scws_t s, const char *fpath, int mode);
94 int scws_set_dict(scws_t s, const char *fpath, int mode);
95 void scws_set_charset(scws_t s, const char *cs);
96 void scws_set_rule(scws_t s, const char *fpath);
97 
98 /* set ignore symbol or multi segments */
99 void scws_set_ignore(scws_t s, int yes);
100 void scws_set_multi(scws_t s, int mode);
101 void scws_set_debug(scws_t s, int yes);
102 void scws_set_duality(scws_t s, int yes);
103 
104 void scws_send_text(scws_t s, const char *text, int len);
105 scws_res_t scws_get_result(scws_t s);
106 void scws_free_result(scws_res_t result);
107 
108 scws_top_t scws_get_tops(scws_t s, int limit, char *xattr);
109 void scws_free_tops(scws_top_t tops);
110 
111 scws_top_t scws_get_words(scws_t s, char *xattr);
112 int scws_has_word(scws_t s, char *xattr);
113 
114 #ifdef __cplusplus
115 }
116 #endif
117 
118 #endif
119