1/*
2 * sombok.h - common definitions for Sombok library
3 *
4 * Copyright (C) 2009-2012 by Hatuka*nezumi - IKEDA Soji.
5 *
6 * This file is part of the Sombok Package.  This program is free
7 * software; you can redistribute it and/or modify it under the terms of
8 * either the GNU General Public License or the Artistic License, as
9 * specified in the README file.
10 *
11 */
12
13#ifndef _SOMBOK_H_
14
15#ifdef HAVE_CONFIG_H
16#    include "config.h"
17#endif
18#include <errno.h>
19#include <stddef.h>
20#include <stdlib.h>
21#include <string.h>
22#ifdef HAVE_STRINGS_H
23#    include <strings.h>
24#endif				/* HAVE_STRINGS_H */
25
26#define SOMBOK_VERSION "@PACKAGE_VERSION@"
27
28@SOMBOK_UNICHAR_T_IS_WCHAR_T@
29@SOMBOK_UNICHAR_T_IS_UNSIGNED_INT@
30@SOMBOK_UNICHAR_T_IS_UNSIGNED_LONG@
31
32/***
33 *** Data structure.
34 ***/
35
36/* Primitive types */
37
38/** Unicode character */
39typedef @SOMBOK_UNICHAR_T@ unichar_t;
40
41/** Character property
42 * @ingroup linebreak */
43typedef unsigned char propval_t;
44
45/** Unicode string
46 * @ingroup gcstring,linebreak,linebreak_break */
47typedef struct {
48    /** Sequence of Unicode character.
49     * Note that NUL character (U+0000) may be contained.
50     * NULL may specify zero-length string. */
51    unichar_t *str;
52    /** Length of Unicode character sequence. */
53    size_t len;
54} unistr_t;
55
56/** Grapheme cluster
57 * @ingroup gcstring
58 */
59typedef struct {
60    /** Offset of Unicode string. */
61    size_t idx;
62    /** Length of Unicode string. */
63    size_t len;
64    /** Calculated number of columns. */
65    size_t col;
66    /** Line breaking class of grapheme base. */
67    propval_t lbc;
68    /** Line breaking class of grapheme extender if it is not CM. */
69    propval_t elbc;
70    /** User-defined flag. */
71    unsigned char flag;
72} gcchar_t;
73
74/** Property map entry
75 * @ingroup linebreak */
76typedef struct {
77    /** Beginning of UCS range. */
78    unichar_t beg;
79    /** End of UCS range. */
80    unichar_t end;
81    /** UAX #14 line breaking class. */
82    propval_t lbc;
83    /** UAX #11 East_Asian_Width property value. */
84    propval_t eaw;
85    /** UAX #29 Grapheme_Cluster_Break property value. */
86    propval_t gcb;
87    /** Script property value. */
88    propval_t scr;
89} mapent_t;
90
91struct linebreak_t;
92
93/** Grapheme cluster string.
94 * @ingroup gcstring,linebreak,linebreak_break */
95typedef struct {
96    /** Sequence of Unicode characters.
97     * Note that NUL character (U+0000) may be contained.
98     * NULL may specify zero-length string. */
99    unichar_t *str;
100    /** Number of Unicode characters. */
101    size_t len;
102    /** Sequence of grapheme clusters.
103     * NULL may specify zero-length grapheme cluster string. */
104    gcchar_t *gcstr;
105    /** Number of grapheme clusters. */
106    size_t gclen;
107    /** Next position. */
108    size_t pos;
109    /** linebreak object. */
110    struct linebreak_t *lbobj;
111} gcstring_t;
112
113/** @ingroup linebreak
114 * state argument for format callback. */
115typedef enum {
116    LINEBREAK_STATE_NONE = 0,
117    LINEBREAK_STATE_SOT, LINEBREAK_STATE_SOP, LINEBREAK_STATE_SOL,
118    LINEBREAK_STATE_LINE,
119    LINEBREAK_STATE_EOL, LINEBREAK_STATE_EOP, LINEBREAK_STATE_EOT,
120    LINEBREAK_STATE_MAX
121} linebreak_state_t;
122
123typedef void
124    (*linebreak_ref_func_t) (void *, int, int);
125typedef gcstring_t *
126    (*linebreak_format_func_t) (struct linebreak_t *, linebreak_state_t,
127				gcstring_t *);
128typedef double
129    (*linebreak_sizing_func_t) (struct linebreak_t *, double,
130				gcstring_t *, gcstring_t *, gcstring_t *);
131typedef gcstring_t *
132    (*linebreak_urgent_func_t) (struct linebreak_t *, gcstring_t *);
133typedef gcstring_t *
134    (*linebreak_prep_func_t) (struct linebreak_t *, void *, unistr_t *,
135			      unistr_t *);
136typedef gcstring_t *
137    (*linebreak_obs_prep_func_t) (struct linebreak_t *, unistr_t *);
138
139/** LineBreak object.
140 * @ingroup linebreak */
141typedef struct linebreak_t {
142    /** @name private members
143     *@{*/
144    /** reference count */
145    unsigned long int refcount;
146    /** state */
147    int state;
148    /** buffered line */
149    unistr_t bufstr;
150    /** spaces trailing to buffered line */
151    unistr_t bufspc;
152    /** calculated columns of buffered line */
153    double bufcols;
154    /** unread input */
155    unistr_t unread;
156    /*@}*/
157
158    /** @name public members
159     *@{*/
160    /** Maximum number of Unicode characters each line may contain. */
161    size_t charmax;
162    /** Maximum number of columns. */
163    double colmax;
164    /** Minimum number of columns. */
165    double colmin;
166    /** User-tailored property map. */
167    mapent_t *map;
168    size_t mapsiz;
169    /** Newline sequence. */
170    unistr_t newline;
171    /** Options.  See Defines. */
172    unsigned int options;
173    /** Data argument of callback functions.  See utils.c. */
174    void *format_data;
175    void *sizing_data;
176    void *urgent_data;
177    /** @deprecated Use prep_data instead. */
178    void *user_data;
179    /** User-defined private data. */
180    void *stash;
181    /** Format callback function.  See utils.c. */
182    linebreak_format_func_t format_func;
183    /** Sizing callback function.  See utils.c. */
184    linebreak_sizing_func_t sizing_func;
185    /** Urgent breaking callback function.  See utils.c. */
186    linebreak_urgent_func_t urgent_func;
187    /** Preprocessing callback function.  See utils.c.
188     * @deprecated Use prep_func instead. */
189    linebreak_obs_prep_func_t user_func;
190    /** Reference Count function.
191     * This may be called with 3 arguments: ref_func(data, type, action).
192     * data is a (pointer to) external object assinged to stash, format_data,
193     * sizing_data, urgent_data or prep_data members.  type is type of object.
194     * according to action being negative or positive, this function should
195     * decrement or increment reference count of object, respectively.
196     */
197    linebreak_ref_func_t ref_func;
198    /** Number of last error.
199     * may be a value of errno defined in <errno.h> or LINEBREAK_ELONG below.
200     */
201    int errnum;
202    /*@}*/
203
204    /** @name public members addendum on release 2011.1.
205     *@{*/
206    /** Array of preprocessing callback functions.  See utils.c. */
207    linebreak_prep_func_t * prep_func;
208    /** Data argument of each preprocessing callback functions. See utils.c. */
209    void **prep_data;
210    /*@}*/
211} linebreak_t;
212
213/***
214 *** Constants.
215 ***/
216
217/** General: Unknown property value. */
218#define PROP_UNKNOWN ((propval_t)~0)
219
220/** @ingroup gcstring
221 * standard flag values. */
222#define LINEBREAK_FLAG_PROHIBIT_BEFORE (1)
223#define LINEBREAK_FLAG_ALLOW_BEFORE (2)
224#define LINEBREAK_FLAG_BREAK_BEFORE LINEBREAK_FLAG_ALLOW_BEFORE
225
226/** @ingroup linebreak
227 * default of charmax member. */
228#define LINEBREAK_DEFAULT_CHARMAX (998)
229
230/** @ingroup linebreak
231 * bitwise options. */
232#define LINEBREAK_OPTION_EASTASIAN_CONTEXT (1)
233#define LINEBREAK_OPTION_HANGUL_AS_AL (2)
234#define LINEBREAK_OPTION_LEGACY_CM (4)
235#define LINEBREAK_OPTION_BREAK_INDENT (8)
236#define LINEBREAK_OPTION_COMPLEX_BREAKING (16)
237#define LINEBREAK_OPTION_NONSTARTER_LOOSE (32)
238#define LINEBREAK_OPTION_VIRAMA_AS_JOINER (64)
239#define LINEBREAK_OPTION_WIDE_NONSPACING_W (128)
240
241/** @ingroup linebreak
242 * internal states. */
243#define LINEBREAK_STATE_SOT_FORMAT (-LINEBREAK_STATE_SOT)
244#define LINEBREAK_STATE_SOP_FORMAT (-LINEBREAK_STATE_SOP)
245#define LINEBREAK_STATE_SOL_FORMAT (-LINEBREAK_STATE_SOL)
246
247/** @ingroup linebreak
248 * type argument of ref_func callback. */
249#define LINEBREAK_REF_STASH (0)
250#define LINEBREAK_REF_FORMAT (1)
251#define LINEBREAK_REF_SIZING (2)
252#define LINEBREAK_REF_URGENT (3)
253#define LINEBREAK_REF_USER (4)
254#define LINEBREAK_REF_PREP (5)
255
256/** @ingroup linebreak
257 * Line breaking action. */
258#define LINEBREAK_ACTION_MANDATORY (4)
259#define LINEBREAK_ACTION_DIRECT (3)
260#define LINEBREAK_ACTION_INDIRECT (2)
261#define LINEBREAK_ACTION_PROHIBITED (1)
262
263/** @ingroup linebreak
264 * special errnum value. */
265#define LINEBREAK_ELONG (-2)
266#define LINEBREAK_EEXTN (-3)
267
268/** @ingroup utf8
269 * check specs. */
270#define SOMBOK_UTF8_CHECK_NONE (0)
271#define SOMBOK_UTF8_CHECK_MALFORMED (1)
272#define SOMBOK_UTF8_CHECK_SURROGATE (2)
273#define SOMBOK_UTF8_CHECK_NONUNICODE (3)
274
275/***
276 *** Public functions, global variables and macros.
277 ***/
278
279extern void linebreak_charprop(linebreak_t *, unichar_t,
280			       propval_t *, propval_t *, propval_t *,
281			       propval_t *);
282
283extern gcstring_t *gcstring_new(unistr_t *, linebreak_t *);
284extern gcstring_t *gcstring_new_from_utf8(char *, size_t, int,
285					  linebreak_t *);
286extern gcstring_t *gcstring_newcopy(unistr_t *, linebreak_t *);
287extern gcstring_t *gcstring_copy(gcstring_t *);
288extern void gcstring_destroy(gcstring_t *);
289extern gcstring_t *gcstring_append(gcstring_t *, gcstring_t *);
290extern size_t gcstring_columns(gcstring_t *);
291extern int gcstring_cmp(gcstring_t *, gcstring_t *);
292extern gcstring_t *gcstring_concat(gcstring_t *, gcstring_t *);
293extern gcchar_t *gcstring_next(gcstring_t *);
294extern void gcstring_setpos(gcstring_t *, int);
295extern void gcstring_shrink(gcstring_t *, int);
296extern gcstring_t *gcstring_substr(gcstring_t *, int, int);
297extern gcstring_t *gcstring_replace(gcstring_t *, int, int, gcstring_t *);
298
299#define gcstring_eos(gcstr) \
300  ((gcstr)->gclen <= (gcstr)->pos)
301#define gcstring_getpos(gcstr) \
302  ((gcstr)->pos)
303
304extern propval_t gcstring_lbclass(gcstring_t *, int);
305extern propval_t gcstring_lbclass_ext(gcstring_t *, int);
306
307extern linebreak_t *linebreak_new(linebreak_ref_func_t);
308extern linebreak_t *linebreak_copy(linebreak_t *);
309extern linebreak_t *linebreak_incref(linebreak_t *);
310extern void linebreak_destroy(linebreak_t *);
311
312extern void linebreak_set_newline(linebreak_t *, unistr_t *);
313extern void linebreak_set_stash(linebreak_t *, void *);
314extern void linebreak_set_format(linebreak_t *, linebreak_format_func_t,
315				 void *);
316extern void linebreak_add_prep(linebreak_t *, linebreak_prep_func_t,
317			       void *);
318extern void linebreak_set_sizing(linebreak_t *, linebreak_sizing_func_t,
319				 void *);
320extern void linebreak_set_urgent(linebreak_t *, linebreak_urgent_func_t,
321				 void *);
322extern void linebreak_set_user(linebreak_t *, linebreak_obs_prep_func_t,
323			       void *);
324extern void linebreak_reset(linebreak_t *);
325extern void linebreak_update_lbclass(linebreak_t *, unichar_t, propval_t);
326extern void linebreak_clear_lbclass(linebreak_t *);
327extern void linebreak_update_eawidth(linebreak_t *, unichar_t, propval_t);
328extern void linebreak_clear_eawidth(linebreak_t *);
329extern propval_t linebreak_search_lbclass(linebreak_t *, unichar_t);
330extern propval_t linebreak_search_eawidth(linebreak_t *, unichar_t);
331extern void linebreak_merge_lbclass(linebreak_t *, linebreak_t *);
332extern void linebreak_merge_eawidth(linebreak_t *, linebreak_t *);
333
334extern propval_t linebreak_eawidth(linebreak_t *, unichar_t); /* obs. */
335extern propval_t linebreak_get_lbrule(linebreak_t *, propval_t, propval_t);
336extern propval_t linebreak_lbclass(linebreak_t *, unichar_t); /* obs. */
337
338extern gcstring_t **linebreak_break(linebreak_t *, unistr_t *);
339extern gcstring_t **linebreak_break_fast(linebreak_t *, unistr_t *);
340extern gcstring_t **linebreak_break_from_utf8(linebreak_t *, char *,
341					      size_t, int);
342extern gcstring_t **linebreak_break_partial(linebreak_t *, unistr_t *);
343extern void linebreak_free_result(gcstring_t **, int);
344extern propval_t linebreak_lbrule(propval_t, propval_t); /* obs. */
345
346extern const char *linebreak_unicode_version;
347extern const char *linebreak_propvals_EA[];
348extern const char *linebreak_propvals_LB[];
349extern const char *linebreak_southeastasian_supported;
350extern void linebreak_southeastasian_flagbreak(gcstring_t *);
351
352extern unistr_t *sombok_decode_utf8(unistr_t *, size_t, const char *,
353				    size_t, int);
354extern char *sombok_encode_utf8(char *, size_t *, size_t, unistr_t *);
355
356/***
357 *** Built-in callbacks for linebreak_t.
358 ***/
359extern gcstring_t *linebreak_format_SIMPLE(linebreak_t *,
360					   linebreak_state_t,
361					   gcstring_t *);
362extern gcstring_t *linebreak_format_NEWLINE(linebreak_t *,
363					    linebreak_state_t,
364					    gcstring_t *);
365extern gcstring_t *linebreak_format_TRIM(linebreak_t *, linebreak_state_t,
366					 gcstring_t *);
367extern gcstring_t *linebreak_prep_URIBREAK(linebreak_t *, void *,
368					   unistr_t *, unistr_t *);
369extern double linebreak_sizing_UAX11(linebreak_t *, double, gcstring_t *,
370				     gcstring_t *, gcstring_t *);
371extern gcstring_t *linebreak_urgent_ABORT(linebreak_t *, gcstring_t *);
372extern gcstring_t *linebreak_urgent_FORCE(linebreak_t *, gcstring_t *);
373
374#define _SOMBOK_H_
375#endif				/* _SOMBOK_H_ */
376
377#ifdef MALLOC_DEBUG
378#include "src/mymalloc.h"
379#endif				/* MALLOC_DEBUG */
380