1/* 2 * sombok.h - common definitions for Sombok library 3 * 4 * Copyright (C) 2009-2012 by Hatuka*nezumi - IKEDA Soji. 5 * 6 * This file is part of the Sombok Package. This program is free 7 * software; you can redistribute it and/or modify it under the terms of 8 * either the GNU General Public License or the Artistic License, as 9 * specified in the README file. 10 * 11 */ 12 13#ifndef _SOMBOK_H_ 14 15#ifdef HAVE_CONFIG_H 16# include "config.h" 17#endif 18#include <errno.h> 19#include <stddef.h> 20#include <stdlib.h> 21#include <string.h> 22#ifdef HAVE_STRINGS_H 23# include <strings.h> 24#endif /* HAVE_STRINGS_H */ 25 26#define SOMBOK_VERSION "@PACKAGE_VERSION@" 27 28@SOMBOK_UNICHAR_T_IS_WCHAR_T@ 29@SOMBOK_UNICHAR_T_IS_UNSIGNED_INT@ 30@SOMBOK_UNICHAR_T_IS_UNSIGNED_LONG@ 31 32/*** 33 *** Data structure. 34 ***/ 35 36/* Primitive types */ 37 38/** Unicode character */ 39typedef @SOMBOK_UNICHAR_T@ unichar_t; 40 41/** Character property 42 * @ingroup linebreak */ 43typedef unsigned char propval_t; 44 45/** Unicode string 46 * @ingroup gcstring,linebreak,linebreak_break */ 47typedef struct { 48 /** Sequence of Unicode character. 49 * Note that NUL character (U+0000) may be contained. 50 * NULL may specify zero-length string. */ 51 unichar_t *str; 52 /** Length of Unicode character sequence. */ 53 size_t len; 54} unistr_t; 55 56/** Grapheme cluster 57 * @ingroup gcstring 58 */ 59typedef struct { 60 /** Offset of Unicode string. */ 61 size_t idx; 62 /** Length of Unicode string. */ 63 size_t len; 64 /** Calculated number of columns. */ 65 size_t col; 66 /** Line breaking class of grapheme base. */ 67 propval_t lbc; 68 /** Line breaking class of grapheme extender if it is not CM. */ 69 propval_t elbc; 70 /** User-defined flag. */ 71 unsigned char flag; 72} gcchar_t; 73 74/** Property map entry 75 * @ingroup linebreak */ 76typedef struct { 77 /** Beginning of UCS range. */ 78 unichar_t beg; 79 /** End of UCS range. */ 80 unichar_t end; 81 /** UAX #14 line breaking class. */ 82 propval_t lbc; 83 /** UAX #11 East_Asian_Width property value. */ 84 propval_t eaw; 85 /** UAX #29 Grapheme_Cluster_Break property value. */ 86 propval_t gcb; 87 /** Script property value. */ 88 propval_t scr; 89} mapent_t; 90 91struct linebreak_t; 92 93/** Grapheme cluster string. 94 * @ingroup gcstring,linebreak,linebreak_break */ 95typedef struct { 96 /** Sequence of Unicode characters. 97 * Note that NUL character (U+0000) may be contained. 98 * NULL may specify zero-length string. */ 99 unichar_t *str; 100 /** Number of Unicode characters. */ 101 size_t len; 102 /** Sequence of grapheme clusters. 103 * NULL may specify zero-length grapheme cluster string. */ 104 gcchar_t *gcstr; 105 /** Number of grapheme clusters. */ 106 size_t gclen; 107 /** Next position. */ 108 size_t pos; 109 /** linebreak object. */ 110 struct linebreak_t *lbobj; 111} gcstring_t; 112 113/** @ingroup linebreak 114 * state argument for format callback. */ 115typedef enum { 116 LINEBREAK_STATE_NONE = 0, 117 LINEBREAK_STATE_SOT, LINEBREAK_STATE_SOP, LINEBREAK_STATE_SOL, 118 LINEBREAK_STATE_LINE, 119 LINEBREAK_STATE_EOL, LINEBREAK_STATE_EOP, LINEBREAK_STATE_EOT, 120 LINEBREAK_STATE_MAX 121} linebreak_state_t; 122 123typedef void 124 (*linebreak_ref_func_t) (void *, int, int); 125typedef gcstring_t * 126 (*linebreak_format_func_t) (struct linebreak_t *, linebreak_state_t, 127 gcstring_t *); 128typedef double 129 (*linebreak_sizing_func_t) (struct linebreak_t *, double, 130 gcstring_t *, gcstring_t *, gcstring_t *); 131typedef gcstring_t * 132 (*linebreak_urgent_func_t) (struct linebreak_t *, gcstring_t *); 133typedef gcstring_t * 134 (*linebreak_prep_func_t) (struct linebreak_t *, void *, unistr_t *, 135 unistr_t *); 136typedef gcstring_t * 137 (*linebreak_obs_prep_func_t) (struct linebreak_t *, unistr_t *); 138 139/** LineBreak object. 140 * @ingroup linebreak */ 141typedef struct linebreak_t { 142 /** @name private members 143 *@{*/ 144 /** reference count */ 145 unsigned long int refcount; 146 /** state */ 147 int state; 148 /** buffered line */ 149 unistr_t bufstr; 150 /** spaces trailing to buffered line */ 151 unistr_t bufspc; 152 /** calculated columns of buffered line */ 153 double bufcols; 154 /** unread input */ 155 unistr_t unread; 156 /*@}*/ 157 158 /** @name public members 159 *@{*/ 160 /** Maximum number of Unicode characters each line may contain. */ 161 size_t charmax; 162 /** Maximum number of columns. */ 163 double colmax; 164 /** Minimum number of columns. */ 165 double colmin; 166 /** User-tailored property map. */ 167 mapent_t *map; 168 size_t mapsiz; 169 /** Newline sequence. */ 170 unistr_t newline; 171 /** Options. See Defines. */ 172 unsigned int options; 173 /** Data argument of callback functions. See utils.c. */ 174 void *format_data; 175 void *sizing_data; 176 void *urgent_data; 177 /** @deprecated Use prep_data instead. */ 178 void *user_data; 179 /** User-defined private data. */ 180 void *stash; 181 /** Format callback function. See utils.c. */ 182 linebreak_format_func_t format_func; 183 /** Sizing callback function. See utils.c. */ 184 linebreak_sizing_func_t sizing_func; 185 /** Urgent breaking callback function. See utils.c. */ 186 linebreak_urgent_func_t urgent_func; 187 /** Preprocessing callback function. See utils.c. 188 * @deprecated Use prep_func instead. */ 189 linebreak_obs_prep_func_t user_func; 190 /** Reference Count function. 191 * This may be called with 3 arguments: ref_func(data, type, action). 192 * data is a (pointer to) external object assinged to stash, format_data, 193 * sizing_data, urgent_data or prep_data members. type is type of object. 194 * according to action being negative or positive, this function should 195 * decrement or increment reference count of object, respectively. 196 */ 197 linebreak_ref_func_t ref_func; 198 /** Number of last error. 199 * may be a value of errno defined in <errno.h> or LINEBREAK_ELONG below. 200 */ 201 int errnum; 202 /*@}*/ 203 204 /** @name public members addendum on release 2011.1. 205 *@{*/ 206 /** Array of preprocessing callback functions. See utils.c. */ 207 linebreak_prep_func_t * prep_func; 208 /** Data argument of each preprocessing callback functions. See utils.c. */ 209 void **prep_data; 210 /*@}*/ 211} linebreak_t; 212 213/*** 214 *** Constants. 215 ***/ 216 217/** General: Unknown property value. */ 218#define PROP_UNKNOWN ((propval_t)~0) 219 220/** @ingroup gcstring 221 * standard flag values. */ 222#define LINEBREAK_FLAG_PROHIBIT_BEFORE (1) 223#define LINEBREAK_FLAG_ALLOW_BEFORE (2) 224#define LINEBREAK_FLAG_BREAK_BEFORE LINEBREAK_FLAG_ALLOW_BEFORE 225 226/** @ingroup linebreak 227 * default of charmax member. */ 228#define LINEBREAK_DEFAULT_CHARMAX (998) 229 230/** @ingroup linebreak 231 * bitwise options. */ 232#define LINEBREAK_OPTION_EASTASIAN_CONTEXT (1) 233#define LINEBREAK_OPTION_HANGUL_AS_AL (2) 234#define LINEBREAK_OPTION_LEGACY_CM (4) 235#define LINEBREAK_OPTION_BREAK_INDENT (8) 236#define LINEBREAK_OPTION_COMPLEX_BREAKING (16) 237#define LINEBREAK_OPTION_NONSTARTER_LOOSE (32) 238#define LINEBREAK_OPTION_VIRAMA_AS_JOINER (64) 239#define LINEBREAK_OPTION_WIDE_NONSPACING_W (128) 240 241/** @ingroup linebreak 242 * internal states. */ 243#define LINEBREAK_STATE_SOT_FORMAT (-LINEBREAK_STATE_SOT) 244#define LINEBREAK_STATE_SOP_FORMAT (-LINEBREAK_STATE_SOP) 245#define LINEBREAK_STATE_SOL_FORMAT (-LINEBREAK_STATE_SOL) 246 247/** @ingroup linebreak 248 * type argument of ref_func callback. */ 249#define LINEBREAK_REF_STASH (0) 250#define LINEBREAK_REF_FORMAT (1) 251#define LINEBREAK_REF_SIZING (2) 252#define LINEBREAK_REF_URGENT (3) 253#define LINEBREAK_REF_USER (4) 254#define LINEBREAK_REF_PREP (5) 255 256/** @ingroup linebreak 257 * Line breaking action. */ 258#define LINEBREAK_ACTION_MANDATORY (4) 259#define LINEBREAK_ACTION_DIRECT (3) 260#define LINEBREAK_ACTION_INDIRECT (2) 261#define LINEBREAK_ACTION_PROHIBITED (1) 262 263/** @ingroup linebreak 264 * special errnum value. */ 265#define LINEBREAK_ELONG (-2) 266#define LINEBREAK_EEXTN (-3) 267 268/** @ingroup utf8 269 * check specs. */ 270#define SOMBOK_UTF8_CHECK_NONE (0) 271#define SOMBOK_UTF8_CHECK_MALFORMED (1) 272#define SOMBOK_UTF8_CHECK_SURROGATE (2) 273#define SOMBOK_UTF8_CHECK_NONUNICODE (3) 274 275/*** 276 *** Public functions, global variables and macros. 277 ***/ 278 279extern void linebreak_charprop(linebreak_t *, unichar_t, 280 propval_t *, propval_t *, propval_t *, 281 propval_t *); 282 283extern gcstring_t *gcstring_new(unistr_t *, linebreak_t *); 284extern gcstring_t *gcstring_new_from_utf8(char *, size_t, int, 285 linebreak_t *); 286extern gcstring_t *gcstring_newcopy(unistr_t *, linebreak_t *); 287extern gcstring_t *gcstring_copy(gcstring_t *); 288extern void gcstring_destroy(gcstring_t *); 289extern gcstring_t *gcstring_append(gcstring_t *, gcstring_t *); 290extern size_t gcstring_columns(gcstring_t *); 291extern int gcstring_cmp(gcstring_t *, gcstring_t *); 292extern gcstring_t *gcstring_concat(gcstring_t *, gcstring_t *); 293extern gcchar_t *gcstring_next(gcstring_t *); 294extern void gcstring_setpos(gcstring_t *, int); 295extern void gcstring_shrink(gcstring_t *, int); 296extern gcstring_t *gcstring_substr(gcstring_t *, int, int); 297extern gcstring_t *gcstring_replace(gcstring_t *, int, int, gcstring_t *); 298 299#define gcstring_eos(gcstr) \ 300 ((gcstr)->gclen <= (gcstr)->pos) 301#define gcstring_getpos(gcstr) \ 302 ((gcstr)->pos) 303 304extern propval_t gcstring_lbclass(gcstring_t *, int); 305extern propval_t gcstring_lbclass_ext(gcstring_t *, int); 306 307extern linebreak_t *linebreak_new(linebreak_ref_func_t); 308extern linebreak_t *linebreak_copy(linebreak_t *); 309extern linebreak_t *linebreak_incref(linebreak_t *); 310extern void linebreak_destroy(linebreak_t *); 311 312extern void linebreak_set_newline(linebreak_t *, unistr_t *); 313extern void linebreak_set_stash(linebreak_t *, void *); 314extern void linebreak_set_format(linebreak_t *, linebreak_format_func_t, 315 void *); 316extern void linebreak_add_prep(linebreak_t *, linebreak_prep_func_t, 317 void *); 318extern void linebreak_set_sizing(linebreak_t *, linebreak_sizing_func_t, 319 void *); 320extern void linebreak_set_urgent(linebreak_t *, linebreak_urgent_func_t, 321 void *); 322extern void linebreak_set_user(linebreak_t *, linebreak_obs_prep_func_t, 323 void *); 324extern void linebreak_reset(linebreak_t *); 325extern void linebreak_update_lbclass(linebreak_t *, unichar_t, propval_t); 326extern void linebreak_clear_lbclass(linebreak_t *); 327extern void linebreak_update_eawidth(linebreak_t *, unichar_t, propval_t); 328extern void linebreak_clear_eawidth(linebreak_t *); 329extern propval_t linebreak_search_lbclass(linebreak_t *, unichar_t); 330extern propval_t linebreak_search_eawidth(linebreak_t *, unichar_t); 331extern void linebreak_merge_lbclass(linebreak_t *, linebreak_t *); 332extern void linebreak_merge_eawidth(linebreak_t *, linebreak_t *); 333 334extern propval_t linebreak_eawidth(linebreak_t *, unichar_t); /* obs. */ 335extern propval_t linebreak_get_lbrule(linebreak_t *, propval_t, propval_t); 336extern propval_t linebreak_lbclass(linebreak_t *, unichar_t); /* obs. */ 337 338extern gcstring_t **linebreak_break(linebreak_t *, unistr_t *); 339extern gcstring_t **linebreak_break_fast(linebreak_t *, unistr_t *); 340extern gcstring_t **linebreak_break_from_utf8(linebreak_t *, char *, 341 size_t, int); 342extern gcstring_t **linebreak_break_partial(linebreak_t *, unistr_t *); 343extern void linebreak_free_result(gcstring_t **, int); 344extern propval_t linebreak_lbrule(propval_t, propval_t); /* obs. */ 345 346extern const char *linebreak_unicode_version; 347extern const char *linebreak_propvals_EA[]; 348extern const char *linebreak_propvals_LB[]; 349extern const char *linebreak_southeastasian_supported; 350extern void linebreak_southeastasian_flagbreak(gcstring_t *); 351 352extern unistr_t *sombok_decode_utf8(unistr_t *, size_t, const char *, 353 size_t, int); 354extern char *sombok_encode_utf8(char *, size_t *, size_t, unistr_t *); 355 356/*** 357 *** Built-in callbacks for linebreak_t. 358 ***/ 359extern gcstring_t *linebreak_format_SIMPLE(linebreak_t *, 360 linebreak_state_t, 361 gcstring_t *); 362extern gcstring_t *linebreak_format_NEWLINE(linebreak_t *, 363 linebreak_state_t, 364 gcstring_t *); 365extern gcstring_t *linebreak_format_TRIM(linebreak_t *, linebreak_state_t, 366 gcstring_t *); 367extern gcstring_t *linebreak_prep_URIBREAK(linebreak_t *, void *, 368 unistr_t *, unistr_t *); 369extern double linebreak_sizing_UAX11(linebreak_t *, double, gcstring_t *, 370 gcstring_t *, gcstring_t *); 371extern gcstring_t *linebreak_urgent_ABORT(linebreak_t *, gcstring_t *); 372extern gcstring_t *linebreak_urgent_FORCE(linebreak_t *, gcstring_t *); 373 374#define _SOMBOK_H_ 375#endif /* _SOMBOK_H_ */ 376 377#ifdef MALLOC_DEBUG 378#include "src/mymalloc.h" 379#endif /* MALLOC_DEBUG */ 380