1 /* 2 LibRCC - public interface 3 4 Copyright (C) 2005-2008 Suren A. Chilingaryan <csa@dside.dyndns.org> 5 6 This library is free software; you can redistribute it and/or modify it 7 under the terms of the GNU Lesser General Public License version 2.1 or later 8 as published by the Free Software Foundation. 9 10 This library is distributed in the hope that it will be useful, but WITHOUT 11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 13 for more details. 14 15 You should have received a copy of the GNU Lesser General Public License 16 along with this program; if not, write to the Free Software Foundation, Inc., 17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 */ 19 20 #ifndef _LIBRCC_H 21 #define _LIBRCC_H 22 23 #include <stdlib.h> 24 25 /******************************************************************************* 26 ***************************** Global Defines *********************************** 27 *******************************************************************************/ 28 29 #define RCC_MAX_CHARSETS 16 30 #define RCC_MAX_ENGINES 5 31 #define RCC_MAX_LANGUAGES 64 32 #define RCC_MAX_ALIASES 64 33 #define RCC_MAX_CLASSES 16 34 35 /* ID's */ 36 /** 37 * Language ID. 38 * - 0 is default language 39 * - -1 is error 40 * - 1 usually represents "LibRCC off" language 41 * - >1 is some language 42 */ 43 typedef unsigned char rcc_language_id; 44 /** 45 * Alias ID 46 */ 47 typedef unsigned char rcc_alias_id; 48 /** 49 * Relation ID 50 */ 51 typedef unsigned char rcc_relation_id; 52 /** 53 * Charset ID. 54 * - 0 is default charset 55 * - -1 is error 56 * - >0 is some charset 57 */ 58 typedef unsigned char rcc_charset_id; 59 /** 60 * Autocharset ID. 61 * - -1 is error 62 * - >0 is some encoding 63 */ 64 typedef unsigned char rcc_autocharset_id; 65 /** 66 * Engine ID. 67 * - -1 is non configured (first available will be used if any) 68 * - 0 autodetection is switched off 69 * - >0 is some auto-engine 70 */ 71 typedef unsigned char rcc_engine_id; 72 /** 73 * Class ID. 74 */ 75 typedef int rcc_class_id; 76 77 /* Opaque Pointer's */ 78 typedef struct rcc_context_t *rcc_context; /**< Current Working Context */ 79 /** 80 * Encoding Detection Engine Context. Containes considered information about 81 * engine, which can be extracted using API functions. 82 * @see rccEngineGetInternal 83 * @see rccEngineGetLanguage 84 * @see rccEngineGetRccContext 85 */ 86 typedef struct rcc_engine_context_t *rcc_engine_context; 87 /** 88 * Current Language Configuration: 89 * Information about current 'class encodings', and selected 'Encoding Detection 90 * Engine' 91 */ 92 typedef struct rcc_language_config_t *rcc_language_config; 93 typedef const struct rcc_class_t *rcc_class_ptr; 94 95 #ifdef __cplusplus 96 extern "C" { 97 #endif 98 99 /** 100 * Library Initialization function. Should be called prior to all 101 * any library manipulation. 102 */ 103 int rccInit(); 104 105 /** 106 * Library Cleanup function. 107 */ 108 void rccFree(); 109 110 /******************************************************************************* 111 **************************** Initialization ************************************ 112 *******************************************************************************/ 113 /** 114 * RCC context initialization flags 115 */ 116 typedef unsigned int rcc_init_flags; 117 118 /** 119 * Do not load default language configuration 120 */ 121 #define RCC_FLAG_NO_DEFAULT_CONFIGURATION 1 122 123 /** 124 * Initialize working context. 125 * 126 * @param locale_variable is variable to get locale from (Default: LC_CTYPE). 127 * @param max_languages is maximal number of languages supported by context. (Default: detect) 128 * @param max_classes is maximal number of classes (Default: detect) 129 * @param defclasses is list of encoding classes (Default: will add later) 130 * @param flags is option flag (Default: nothing) 131 * @see RCC_FLAG_NO_DEFAULT_CONFIGURATION 132 * @return working context or NULL in case of error 133 */ 134 rcc_context rccCreateContext(const char *locale_variable, unsigned int max_languages, unsigned int max_classes, rcc_class_ptr defclasses, rcc_init_flags flags); 135 /** 136 * Initialize default working context (used then no context supplied). Previously 137 * opened default context will be freed. 138 * 139 * @param locale_variable is variable to get locale from (Default: LC_CTYPE). 140 * @param max_languages is maximal number of languages supported by context. (Default: detect) 141 * @param max_classes is maximal number of classes (Default: detect) 142 * @param defclasses is list of encoding classes (Default: will add later) 143 * @param flags is option flag (Default: nothing) 144 * @see RCC_FLAG_NO_DEFAULT_CONFIGURATION 145 * @return non-zero value in case of error 146 */ 147 int rccInitDefaultContext(const char *locale_variable, unsigned int max_languages, unsigned int max_classes, rcc_class_ptr defclasses, rcc_init_flags flags); 148 149 /** 150 * Free all memory used by working context and destroy it. 151 * 152 * @param ctx is working context to be destroyed. 153 */ 154 void rccFreeContext(rcc_context ctx); 155 156 157 /** 158 * Berkeley DB initialization flags 159 */ 160 typedef unsigned int rcc_db4_flags; 161 162 /** 163 * Enables Berkeley DB recodings caching for specified working context. 164 * 165 * @param ctx is working context 166 * @param name is database name (can be shared between different applications) 167 * @param flags are reserved for future. 168 * @return non-zero value in case of error 169 */ 170 int rccInitDb4(rcc_context ctx, const char *name, rcc_db4_flags flags); 171 172 int rccLockConfiguration(rcc_context ctx, unsigned int lock_code); 173 int rccUnlockConfiguration(rcc_context ctx, unsigned int lock_code); 174 175 /******************************************************************************* 176 ******************* Altering Language Configuaration *************************** 177 *******************************************************************************/ 178 /** 179 * Encoding name. 180 */ 181 typedef const char *rcc_charset; 182 /** 183 * List of Encoding names 184 */ 185 typedef rcc_charset rcc_charset_list[RCC_MAX_CHARSETS+1]; 186 187 /* Engines */ 188 /** 189 * Engine internal data 190 */ 191 typedef void *rcc_engine_internal; 192 /** 193 * Engine constructor function 194 * @param ctx is engine context 195 * @see rccEngineGetInternal 196 * @see rccEngineGetLanguage 197 * @see rccEngineGetRccContext 198 * @return pointer on desired internal data to be stored in engine_context. 199 */ 200 typedef rcc_engine_internal (*rcc_engine_init_function)(rcc_engine_context ctx); 201 /** 202 * Engine encoding detection function. 203 * @param ctx is engine context 204 * @param buf is string encoded in unknow encoding 205 * @param len is exact size of string or 0 (size will be detected with strlen) 206 * @return the #rcc_autocharset_id of the detected encoding or -1 in case of error 207 */ 208 typedef rcc_autocharset_id (*rcc_engine_function)(rcc_engine_context ctx, const char *buf, int len); 209 /** 210 * Engine destructor function 211 */ 212 typedef void (*rcc_engine_free_function)(rcc_engine_context ctx); 213 214 /** 215 * Encoding detection engine description. Init and Free functions can be omited. 216 * 'func' should analyze string and return position in the encodings list 217 * coresponding to string encoding. 218 */ 219 struct rcc_engine_t { 220 const char *title; /**< Short title*/ 221 rcc_engine_init_function init_func; /**< Constructor function */ 222 rcc_engine_free_function free_func; /**< Destructor function */ 223 rcc_engine_function func; /**< Function performing encoding detection */ 224 rcc_charset_list charsets; /**< List of supported encodings */ 225 }; 226 typedef struct rcc_engine_t rcc_engine; 227 typedef rcc_engine *rcc_engine_ptr; 228 typedef rcc_engine_ptr rcc_engine_list[RCC_MAX_ENGINES+1]; 229 230 /** 231 * Language description. 232 */ 233 struct rcc_language_t { 234 const char *sn; /**< Language ISO-639-1 (2 symbol) name */ 235 rcc_charset_list charsets; /**< List of language encodings */ 236 rcc_engine_list engines; /**< List of encoding detection engines supported by language */ 237 }; 238 typedef struct rcc_language_t rcc_language; 239 typedef rcc_language *rcc_language_ptr; 240 typedef rcc_language_ptr rcc_language_list[RCC_MAX_LANGUAGES+1]; 241 242 /** 243 * Language Aliases. 244 * For example: ru_UA = uk, cs_SK = sk 245 */ 246 struct rcc_language_alias_t { 247 const char *alias; /**< Long locale name */ 248 const char *lang; /**< Coresponded language ISO-639-1 name */ 249 }; 250 typedef struct rcc_language_alias_t rcc_language_alias; 251 typedef rcc_language_alias *rcc_language_alias_ptr; 252 typedef rcc_language_alias_ptr rcc_language_alias_list[RCC_MAX_ALIASES+1]; 253 254 /** 255 * Language relations. 256 * Meaning: sentence in considered language may contain words from all his parents. This 257 * knowledge will help Autodetection Engine to guess right language. 258 * 259 * For example: Russian is parent language for Ukrainian. This means it is possible 260 * to encounter russian words in ukrainian sentence. 261 * 262 * All languages by default are related to english language. 263 */ 264 struct rcc_language_relation_t { 265 const char *lang; /**< Coresponded language ISO-639-1 name */ 266 const char *parent; /**< Parent language */ 267 }; 268 typedef struct rcc_language_relation_t rcc_language_relation; 269 270 /** 271 * Register new language in supplied working context 272 * @param ctx is working context ( or default one if NULL supplied ) 273 * @param language is pointer on language description (shouldn't be freed before library deinitialization). 274 * @return registered language id or -1 in case of a error. 275 */ 276 rcc_language_id rccRegisterLanguage(rcc_context ctx, rcc_language *language); 277 /** 278 * Register new encoding belonging to language in supplied working context 279 * @param language is language charset should be added to ( or default one if NULL supplied ) 280 * @param charset is pointer on charset name (shouldn't be freed before library deinitialization). 281 * @return registered charset id or -1 in case of a error. 282 */ 283 rcc_charset_id rccLanguageRegisterCharset(rcc_language *language, rcc_charset charset); 284 /** 285 * Register new Engine in supplied working context 286 * @param language is language charset should be added to ( or default one if NULL supplied ) 287 * @param engine is pointer on engine description (shouldn't be freed before library deinitialization). 288 * @return registered engine id or -1 in case of a error. 289 */ 290 rcc_engine_id rccLanguageRegisterEngine(rcc_language *language, rcc_engine *engine); 291 /** 292 * Register new language alias in supplied working context 293 * @param ctx is working context ( or default one if NULL supplied ) 294 * @param alias is pointer on alias description (shouldn't be freed before library deinitialization). 295 * @return registered alias id or -1 in case of a error. 296 */ 297 rcc_alias_id rccRegisterLanguageAlias(rcc_context ctx, rcc_language_alias *alias); 298 /** 299 * Register new language relation in supplied working context 300 * @param ctx is working context ( or default one if NULL supplied ) 301 * @param relation is pointer on relation description (shouldn't be freed before library deinitialization). 302 * @return registered relation id or -1 in case of a error. 303 */ 304 rcc_relation_id rccRegisterLanguageRelation(rcc_context ctx, rcc_language_relation *relation); 305 306 /******************************************************************************* 307 ************************ Altering Configuaration ******************************* 308 *******************************************************************************/ 309 /** 310 * Enumeration represents type of class. 311 */ 312 typedef enum rcc_class_type_t { 313 RCC_CLASS_INVALID = 0, /**< Invalid value */ 314 RCC_CLASS_STANDARD, /**< Standard class */ 315 RCC_CLASS_KNOWN, /**< Class encoding is known and no autodetection should be performed */ 316 RCC_CLASS_FS, /**< Class strings are representing file names */ 317 RCC_CLASS_TRANSLATE_LOCALE, /**< It is permited to translate class strings to current Locale Language in rccTo */ 318 RCC_CLASS_TRANSLATE_CURRENT,/**< It is permited to translate class strings to Current Language in rccTo */ 319 RCC_CLASS_TRANSLATE_FROM, /**< It is permited to translate class strings to Current Language in rccFrom */ 320 } rcc_class_type; 321 322 /** 323 * Provides information about default encoding for specific language 324 */ 325 struct rcc_class_default_charset_t { 326 const char *lang; /**< Language */ 327 const char *charset; /**< Default encoding for #lang */ 328 }; 329 typedef const struct rcc_class_default_charset_t rcc_class_default_charset; 330 331 /** Forbid change class value using Library API */ 332 #define RCC_CLASS_FLAG_CONST 0x01 333 /** Forbid saving and loading of class value */ 334 #define RCC_CLASS_FLAG_SKIP_SAVELOAD 0x02 335 336 /** Encoding class description. Encoding classes are main concept of LibRCC library. 337 * The strings are recoded between different classes (for example in RusXMMS2 project 338 * ID3 titles are recoded between ID3 and Output classes). The current encoding of 339 * each class can be set using configuration file or API call, otherwise it will 340 * be detected automatically using current locale or default encoding. 341 * 342 * If the #defvalue is not NULL, it provides information about detection of 343 * the default encoding. The are following possibilities for that value: 344 * - Detect default encoding using specified locale variable (LC_CTYPE for example). 345 * . - The current encoding of the another class will be used instead of default encodings. Short name of considered class should be specified. 346 * - Just use specified multibyte encoding for all languages. 347 * In case of detection failure using all these methods, the #defcharset will 348 * be examined if default encoding for current language is available. If not, 349 * the first encoding in the list will be used as current. 350 * 351 * Additionaly it is possible to set special flags to prevent user from 352 * modifying class value. It is possible to protect class from changing 353 * ether using API, or configuration files. 354 * 355 * @see rcc_class_default_charset_t 356 * @see RCC_CLASS_FLAG_CONST 357 * @see RCC_CLASS_FLAG_SKIP_SAVELOAD 358 * 359 * The class type provides information for recoding functions about 360 * automatic detection of the class chrset. The encodings of the 361 * #RCC_CLASS_STANDARD classes will be detected using autoengine (if available 362 * for current language. The #RCC_CLASS_FS classes are associated with files 363 * and encoding will be guessed using find_file. 364 * 365 * @see rcc_class_type_t 366 * 367 */ 368 struct rcc_class_t { 369 const char *name; /**< Short class name */ 370 const rcc_class_type class_type; /**< specifies type of class (Standard, File System, Known) */ 371 const char *defvalue; /**< locale variable name or parent name or multibyte encoding name */ 372 rcc_class_default_charset *defcharset; /**< default class encodings. Should be specified on per-language basys */ 373 const char *fullname; /**< Full name of the class */ 374 const unsigned long flags; /**< Class flags. (CONST, SKIP_SAVELOAD) */ 375 }; 376 typedef const struct rcc_class_t rcc_class; 377 typedef rcc_class_ptr rcc_class_list[RCC_MAX_CLASSES+1]; 378 379 /** 380 * Register additional class 381 * @param ctx is working context ( or default one if NULL supplied ) 382 * @param cl is pointer on the class description (shouldn't be freed before library deinitialization). 383 * @return registered class id or -1 in case of a error. 384 */ 385 rcc_class_id rccRegisterClass(rcc_context ctx, rcc_class *cl); 386 /** 387 * Register additional charsets for the current class. 388 * @param ctx is working context ( or default one if NULL supplied ) 389 * @param class_id is class id. 390 * @param charsets is NULL terminated list of classes. 391 * @return non zero value in the case of a error. 392 */ 393 int rccRegisterAdditionalCharsets(rcc_context ctx, rcc_class_id class_id, rcc_charset *charsets); 394 /** 395 * Register names of charsets disabled in the specified class. 396 * @param ctx is working context ( or default one if NULL supplied ) 397 * @param class_id is class id. 398 * @param charsets is NULL terminated list of classes ("unicode" / "nonunicode" specifies corespondent group of charsets). 399 * @return non zero value in the case of a error. 400 */ 401 int rccRegisterDisabledCharsets(rcc_context ctx, rcc_class_id class_id, rcc_charset *charsets); 402 /** 403 * Checks if charset is disabled for the specified class. 404 * @param ctx is working context ( or default one if NULL supplied ) 405 * @param class_id is class id. 406 * @param charset is charset name. 407 * @return 1 if charset is disabled, 0 if charset is enabled, -1 in the case of error. 408 */ 409 int rccIsDisabledCharsetName(rcc_context ctx, rcc_class_id class_id, const char *charset); 410 411 /** 412 * Determines 'class type' of supplied class. 413 * @param ctx is working context ( or default one if NULL supplied ) 414 * @param class_id is class id 415 * @return class type or -1 in case of a error. 416 */ 417 rcc_class_type rccGetClassType(rcc_context ctx, rcc_class_id class_id); 418 /** 419 * Returns name of supplied class. 420 * @param ctx is working context ( or default one if NULL supplied ) 421 * @param class_id is class id 422 * @return class name or NULL in case of a error. 423 */ 424 const char *rccGetClassName(rcc_context ctx, rcc_class_id class_id); 425 /** 426 * Returns full name of supplied class. 427 * @param ctx is working context ( or default one if NULL supplied ) 428 * @param class_id is class id 429 * @return class full name or NULL in case of a error. 430 */ 431 const char *rccGetClassFullName(rcc_context ctx, rcc_class_id class_id); 432 433 /******************************************************************************* 434 ************************ Altering Configuaration ******************************* 435 *******************************************************************************/ 436 typedef int rcc_option_value; 437 438 /** 439 * Use BerkeleyDB recodings cache for encoding detection 440 */ 441 #define RCC_OPTION_LEARNING_FLAG_USE 1 442 /** 443 * Cache recodings in BerkeleyDB recoding cache for future use 444 */ 445 #define RCC_OPTION_LEARNING_FLAG_LEARN 2 446 447 typedef enum rcc_option_translate_t { 448 RCC_OPTION_TRANSLATE_OFF = 0, /**< Switch translation off. */ 449 RCC_OPTION_TRANSLATE_TRANSLITERATE, /**< Transliterate data. */ 450 RCC_OPTION_TRANSLATE_TO_ENGLISH, /**< Translate data to english language (Current language don't matter). */ 451 RCC_OPTION_TRANSLATE_SKIP_RELATED, /**< Skip translation of the text's between related languages. */ 452 RCC_OPTION_TRANSLATE_SKIP_PARENT, /**< Skip translation of the text's from parent languages (from english). */ 453 RCC_OPTION_TRANSLATE_FULL /**< Translate whole data to the current language */ 454 } rcc_option_translate; 455 456 /** 457 * List of options available 458 */ 459 typedef enum rcc_option_t { 460 RCC_OPTION_LEARNING_MODE = 0, /**< Recoding Caching mode (OFF/ON/RELEARN/LEARN) */ 461 RCC_OPTION_AUTODETECT_FS_TITLES, /**< Detect titles of #RCC_CLASS_FS classes */ 462 RCC_OPTION_AUTODETECT_FS_NAMES, /**< Try to find encoding of #RCC_CLASS_FS by accessing fs */ 463 RCC_OPTION_CONFIGURED_LANGUAGES_ONLY, /**< Use only configured languages or languages with auto-engines */ 464 RCC_OPTION_AUTOENGINE_SET_CURRENT, /**< If enabled autodetection engine will set current charset */ 465 RCC_OPTION_AUTODETECT_LANGUAGE, /**< Enables language detection */ 466 RCC_OPTION_TRANSLATE, /**< Translate #rcc_string if it's language differs from current one */ 467 RCC_OPTION_TIMEOUT, /**< Recoding timeout. Currently it is only used to limit translation time */ 468 RCC_OPTION_OFFLINE, /**< Allows external module to finish it's job in offline after the main program is terminated */ 469 RCC_MAX_OPTIONS, 470 RCC_OPTION_ALL 471 } rcc_option; 472 473 /** 474 * List of option types 475 */ 476 typedef enum rcc_option_type_t { 477 RCC_OPTION_TYPE_INVISIBLE = 0, /**< Invisible option. Wouldn't be represented in UI menu */ 478 RCC_OPTION_TYPE_STANDARD, /**< Standard option. */ 479 RCC_OPTION_TYPE_MAX 480 } rcc_option_type; 481 482 /** 483 * Description of option values range type 484 */ 485 typedef enum rcc_option_range_type_t { 486 RCC_OPTION_RANGE_TYPE_BOOLEAN = 0, /**< Boolean option */ 487 RCC_OPTION_RANGE_TYPE_RANGE, /**< Range of integer values */ 488 RCC_OPTION_RANGE_TYPE_FLAGS, /**< Set of boolean flags */ 489 RCC_OPTION_RANGE_TYPE_MENU, /**< Enumeration */ 490 RCC_OPTION_RANGE_TYPE_MAX 491 } rcc_option_range_type; 492 493 /** 494 * Descriptionm of value range 495 */ 496 typedef struct rcc_option_range_t { 497 rcc_option_range_type type; /**< Value range type */ 498 rcc_option_value min; /**< Minimal acceptable option value */ 499 rcc_option_value max; /**< Maximal acceptable option value */ 500 rcc_option_value step; /**< Preccision step */ 501 }rcc_option_range; 502 503 /* lng.c */ 504 505 /** 506 * Return number of configured languages 507 * 508 * @param ctx is working context ( or default one if NULL supplied ) 509 * @return number of configured languages or 0 in the case of error 510 */ 511 int rccGetLanguageNumber(rcc_context ctx); 512 /** 513 * Return number of configured classes 514 * 515 * @param ctx is working context ( or default one if NULL supplied ) 516 * @return number of configured classes or 0 in the case of error 517 */ 518 int rccGetClassNumber(rcc_context ctx); 519 /** 520 * Determines name of the supplied language. 521 * 522 * @param ctx is working context ( or default one if NULL supplied ) 523 * @param language_id is 'language id' of desired language. For default language the 'default' value will be returned. 524 * @return language name or NULL in case of a error. 525 */ 526 const char *rccGetLanguageName(rcc_context ctx, rcc_language_id language_id); 527 /** 528 * Finds language id by the supplied name. 529 * 530 * @param ctx is working context ( or default one if NULL supplied ) 531 * @param name is language name 532 * @return language id [0-n] or -1 if not found. 533 */ 534 rcc_language_id rccGetLanguageByName(rcc_context ctx, const char *name); 535 /** 536 * This function resolves default languages. If positive language id is supplied 537 * it will be returned back unchanged. The default language (0 is supplied as 538 * language id) will be resolved to some particular language. 539 * The following procedure will be used: 540 * - 1. Detect Language by locale 541 * - 2. Check if language intialized if RCC_OPTION_CONFIGURED_LANGUAGES_ONLY is set 542 * - 3. If one of the previous steps is failed, select first available language (id=1). Usually it should be 'LibRCC off'. 543 * 544 * @param ctx is working context ( or default one if NULL supplied ) 545 * @param language_id is language id 546 * @return resolved language id [1-n] or -1 in case of error. 547 */ 548 rcc_language_id rccGetRealLanguage(rcc_context ctx, rcc_language_id language_id); 549 /** 550 * Return considered language name, resolving default language if necessary. 551 * @see rccGetRealLanguage 552 * 553 * @param ctx is working context ( or default one if NULL supplied ) 554 * @param language_id is language id 555 * @return resolved language name or NULL in case of error. 556 */ 557 const char *rccGetRealLanguageName(rcc_context ctx, rcc_language_id language_id); 558 /** 559 * Return selected language id. 560 * 561 * @param ctx is working context ( or default one if NULL supplied ) 562 * @return selected language id [0-n] or -1 in case of error 563 */ 564 rcc_language_id rccGetSelectedLanguage(rcc_context ctx); 565 /** 566 * Return selected language name. 567 * @see rccGetSelectedLanguage 568 * 569 * @param ctx is working context ( or default one if NULL supplied ) 570 * @return selected language name or NULL in case of error. 571 */ 572 const char *rccGetSelectedLanguageName(rcc_context ctx); 573 /** 574 * Return current language id, resolving default language to particular one if necessary. 575 * See more details how default language is resolved: @see rccGetRealLanguage 576 * 577 * @param ctx is working context ( or default one if NULL supplied ) 578 * @return current language id [1-n] or -1 in case of error 579 */ 580 rcc_language_id rccGetCurrentLanguage(rcc_context ctx); 581 /** 582 * Return current language name. 583 # @see rccGetCurrentLanguage 584 * 585 * @param ctx is working context ( or default one if NULL supplied ) 586 * @return current language id [1-n] or -1 in case of error 587 */ 588 const char *rccGetCurrentLanguageName(rcc_context ctx); 589 590 591 /** 592 * Set current language. 593 * 594 * @param ctx is working context ( or default one if NULL supplied ) 595 * @param language_id is new language id [0-n]. Set to default state is Ok. 596 * @return non-zero value in case of error 597 */ 598 int rccSetLanguage(rcc_context ctx, rcc_language_id language_id); 599 /** 600 * Set current language by name. 601 * 602 * @param ctx is working context ( or default one if NULL supplied ) 603 * @param name is the short name of new language. 604 * @return non-zero value in case of error 605 */ 606 int rccSetLanguageByName(rcc_context ctx, const char *name); 607 608 /* opt.c */ 609 /** 610 * Return option value. 611 * 612 * @param ctx is working context ( or default one if NULL supplied ) 613 * @param option is option 614 * @return current option value or -1 in case of error 615 */ 616 rcc_option_value rccGetOption(rcc_context ctx, rcc_option option); 617 /** 618 * Tests if option have unchanged default value. 619 * 620 * @param ctx is working context ( or default one if NULL supplied ) 621 * @param option is option 622 * @return current option value or -1 in case of error 623 */ 624 int rccOptionIsDefault(rcc_context ctx, rcc_option option); 625 /** 626 * Sets option to its default value. 627 * 628 * @param ctx is working context ( or default one if NULL supplied ) 629 * @param option is option 630 * @return non-zero value in case of error 631 */ 632 int rccOptionSetDefault(rcc_context ctx, rcc_option option); 633 /** 634 * Set option value. 635 * 636 * @param ctx is working context ( or default one if NULL supplied ) 637 * @param option is option 638 * @param value is option value 639 * @return non-zero value in case of erros 640 */ 641 int rccSetOption(rcc_context ctx, rcc_option option, rcc_option_value value); 642 /** 643 * Get current option type. 644 * 645 * @param ctx is working context ( or default one if NULL supplied ) 646 * @param option is option 647 * @return current option type or -1 in case of error 648 */ 649 rcc_option_type rccOptionGetType(rcc_context ctx, rcc_option option); 650 /** 651 * Return range description for specified option 652 * 653 * @param ctx is working context ( or default one if NULL supplied ) 654 * @param option is option 655 * @return option range or -1 in case of error 656 */ 657 rcc_option_range *rccOptionGetRange(rcc_context ctx, rcc_option option); 658 659 /** 660 * Get short name of supplied option. 661 * 662 * @param option is option 663 * @return option range or NULL in case of error 664 */ 665 const char *rccGetOptionName(rcc_option option); 666 667 /** 668 * Get short name of supplied option value. 669 * 670 * @param option is option 671 * @param value is value of #option 672 * @return option value name or NULL in case of error 673 */ 674 const char *rccGetOptionValueName(rcc_option option, rcc_option_value value); 675 /** 676 * Get option by short name. 677 * 678 * @param name is option name 679 * @return option or -1 in case of error 680 */ 681 rcc_option rccGetOptionByName(const char *name); 682 /** 683 * Get option value by short name. 684 * 685 * @param option is option 686 * @param name is value name 687 * @return option value or -1 in case of error 688 */ 689 rcc_option_value rccGetOptionValueByName(rcc_option option, const char *name); 690 691 692 /* lngconfig.c */ 693 /** 694 * Check if configuration is initialized for supplied language. 695 * 696 * @param ctx is working context ( or default one if NULL supplied ) 697 * @param language_id is concerned language id 698 * @return configuration context if: 699 * - language_id is particular language, not default one 700 * - language already intialized 701 * - language is not dummy (Disable LibRCC) language 702 * otherwise NULL is returned 703 */ 704 rcc_language_config rccCheckConfig(rcc_context ctx, rcc_language_id language_id); 705 /** 706 * Initializes language configuration if not yet configured and returns pointer on 707 * that configuration. If default language is supplied (language_id = 0), the 708 * language id will be resolved to particular language and config of that language 709 * will be returned. 710 * 711 * @param ctx is working context ( or default one if NULL supplied ) 712 * @param language_id is concerned language id 713 * @return configuration context. The NULL is returned in the case of errors or 714 * dummy (Disable LibRCC) language is selected. 715 */ 716 rcc_language_config rccGetConfig(rcc_context ctx, rcc_language_id language_id); 717 /** 718 * Checks if supplied language is usable. The usability of language is determined 719 * regarding #RCC_OPTION_CONFIGURED_LANGUAGES_ONLY option. Depending on that 720 * option there are several possibilities for language usability: 721 * Any non-dummy language is usable 722 * Any configured or AutoEngine enabled language is usable 723 * Only configured languages are usable 724 * 725 * Language configuration is initialized if not yet configured. And pointer on 726 * that configuration is returned. If default language is supplied (language_id = 0), the 727 * language id will be resolved to particular language and config of that language 728 * will be returned. 729 * 730 * @param ctx is working context ( or default one if NULL supplied ) 731 * @param language_id is concerned language id 732 * @return configuration context. The NULL is returned in the case of errors or 733 * if unusable language is supplied. 734 */ 735 rcc_language_config rccGetUsableConfig(rcc_context ctx, rcc_language_id language_id); 736 /** 737 * Initializes language configuration if not yet configured and returns pointer on 738 * that configuration. 739 * 740 * @param ctx is working context ( or default one if NULL supplied ) 741 * @param name is concerned language name 742 * @return configuration context or NULL in case of error 743 */ 744 rcc_language_config rccGetConfigByName(rcc_context ctx, const char *name); 745 /** 746 * Returns pointer on the current language configuration (Initializes it as well 747 * if required) 748 * 749 * @param ctx is working context ( or default one if NULL supplied ) 750 * @return configuration context or NULL in case of error 751 */ 752 rcc_language_config rccGetCurrentConfig(rcc_context ctx); 753 754 /** 755 * Return language associated with supplied configuration. 756 * 757 * @param config is language configuration 758 */ 759 rcc_language_id rccConfigGetLanguage(rcc_language_config config); 760 /** 761 * Return name of the language associated with supplied configuration. 762 * 763 * @param config is language configuration 764 */ 765 const char *rccConfigGetLanguageName(rcc_language_config config); 766 767 /** 768 * Return number of configured charsets 769 * 770 * @param config is language configuration 771 * @return number of charsets available in the configuration or 0 in the case of error 772 */ 773 int rccConfigGetCharsetNumber(rcc_language_config config); 774 /** 775 * Return number of configured charsets 776 * 777 * @param config is language configuration 778 * @param class_id is class id. 779 * @return number of charsets available in the configuration or 0 in the case of error 780 */ 781 int rccConfigGetClassCharsetNumber(rcc_language_config config, rcc_class_id class_id); 782 /** 783 * Return number of configured encoding auto-detection engines 784 * 785 * @param config is language configuration 786 * @return number of engines or 0 in the case of error 787 */ 788 int rccConfigGetEngineNumber(rcc_language_config config); 789 790 /** 791 * Return supplied engine name 792 * 793 * @param config is language configuration 794 * @param engine_id is desired engine 795 * @return selected engine name or NULL in case of error. 796 */ 797 const char *rccConfigGetEngineName(rcc_language_config config, rcc_engine_id engine_id); 798 /** 799 * Return supplied encoding name 800 * 801 * @param config is language configuration 802 * @param charset_id is desired charset 803 * @return selected encoding name or NULL in case of error. 804 */ 805 const char *rccConfigGetCharsetName(rcc_language_config config, rcc_charset_id charset_id); 806 /** 807 * Return supplied encoding name 808 * 809 * @param config is language configuration 810 * @param class_id is charset encodings 811 * @param charset_id is desired charset 812 * @return selected encoding name or NULL in case of error. 813 */ 814 const char *rccConfigGetClassCharsetName(rcc_language_config config, rcc_class_id class_id, rcc_charset_id charset_id); 815 816 /** 817 * Function finds engine id by the supplied name. 818 * 819 * @param config is language configuration 820 * @param name is engine name 821 * @return engine id [0-n] or -1 if not found 822 */ 823 rcc_engine_id rccConfigGetEngineByName(rcc_language_config config, const char *name); 824 /** 825 * Function finds encoding id by the supplied name. 826 * 827 * @param config is language configuration 828 * @param name is encoding name 829 * @return encoding id [0-n] or -1 if not found. 830 */ 831 rcc_charset_id rccConfigGetCharsetByName(rcc_language_config config, const char *name); 832 /** 833 * Function finds encoding id by the supplied name. 834 * 835 * @param config is language configuration 836 * @param class_id is encoding class 837 * @param name is encoding name 838 * @return encoding id [0-n] or -1 if not found. 839 */ 840 rcc_charset_id rccConfigGetClassCharsetByName(rcc_language_config config, rcc_class_id class_id, const char *name); 841 /** 842 * Checks if charset is disabled for the specified class. 843 * @param config is language configuration 844 * @param class_id is class id. 845 * @param charset_id is charset id. 846 * @return 1 if charset is disabled, 0 if charset is enabled, -1 in the case of error. 847 */ 848 int rccConfigIsDisabledCharset(rcc_language_config config, rcc_class_id class_id, rcc_charset_id charset_id); 849 /** 850 * Return selected engin id. 851 * 852 * @param config is language configuration 853 * @return selected engine id [-1-n] 854 * - -1 engine is not configured and first available will be used 855 * - 0 engines are disabled 856 * - >0 paticular engine id 857 */ 858 rcc_engine_id rccConfigGetSelectedEngine(rcc_language_config config); 859 /** 860 * Return selected engine name. 861 * @see rccConfigGetSelectedEngine 862 * 863 * @param config is language configuration 864 * @return selected engine name ('default' will be returned if engine not configured) or NULL in case of error. 865 */ 866 const char *rccConfigGetSelectedEngineName(rcc_language_config config); 867 /** 868 * Return current engine_id. The default value will be resolved to paticular engine id. Normally, 869 * the id of the first available engine will be returned. If no engines registered for supplied 870 * language the 0 will be returned, indicating id of dummy(disabled) engine. 871 * 872 * @param config is language configuration 873 * @return selected engine id [0-n] or -1 in case of error 874 * - 0 engines are disabled 875 * - >0 paticular engine id 876 */ 877 rcc_engine_id rccConfigGetCurrentEngine(rcc_language_config config); 878 /** 879 * Return current engine name. 880 * @see rccConfigGetCurrentEngine 881 * 882 * @param config is language configuration 883 * @return current engine name or NULL in case of error. 884 */ 885 const char *rccConfigGetCurrentEngineName(rcc_language_config config); 886 887 /** 888 * Return selected encoding id. 889 * 890 * @param config is language configuration 891 * @param class_id is encoding class 892 * @return selected encoding id [0-n] or -1 in case of error 893 * - 0 default encoding 894 * - >0 paticular encoding id 895 */ 896 rcc_charset_id rccConfigGetSelectedCharset(rcc_language_config config, rcc_class_id class_id); 897 /** 898 * Return selected encoding name. 899 * @see rccConfigGetSelectedCharset 900 * 901 * @param config is language configuration 902 * @param class_id is charset encodings 903 * @return selected encodings name ('default' will be returned if engine not configured) or NULL in case of error. 904 */ 905 const char *rccConfigGetSelectedCharsetName(rcc_language_config config, rcc_class_id class_id); 906 /** 907 * Return current encoding_id. The default value will be resolved to paticular encoding id. 908 * The following procedure is used to detect default encoding: 909 * - If Unicode encoding selected for the same class english language. Return this encoding. 910 * - If the parent class is defined in #defcharset, - return current encoding of parent class. 911 * - If the locale variable is defined in #defcharset and either config language coincide with locale language or unciode encoding defined, use locale encoding. 912 * - If the default value for config language is defined in #defvalue return that default value. 913 * - If the default value for all languages is defined in #defvalue return that default value. 914 * - If either config language is coincide with locale language or unicode locale is used, return locale encoding. 915 * - Return first by the list non-dissabled encoding. 916 * 917 * @param config is language configuration 918 * @param class_id is encoding class 919 * @return selected charset id [1-n] or -1 in case of error 920 */ 921 rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_id class_id); 922 /** 923 * Return current encoding name. 924 * @see rccConfigGetCurrentCharset 925 * 926 * @param config is language configuration 927 * @param class_id is encoding class 928 * @return current charset name or NULL in case of error. 929 */ 930 const char *rccConfigGetCurrentCharsetName(rcc_language_config config, rcc_class_id class_id); 931 932 /** 933 * Set current engine. 934 * 935 * @param config is language configuration 936 * @param engine_id is new language id [-1-n]. If -1 supplied the engine will go in non-configured state. 937 * @return non-zero value in case of error 938 */ 939 int rccConfigSetEngine(rcc_language_config config, rcc_engine_id engine_id); 940 /** 941 * Set current encoding. 942 * 943 * @param config is language configuration 944 * @param class_id is encoding class 945 * @param charset_id is new charset id [0-n]. The 0 will switch charset to encoding state. 946 * @return non-zero value in case of error 947 */ 948 int rccConfigSetCharset(rcc_language_config config, rcc_class_id class_id, rcc_charset_id charset_id); 949 /** 950 * Set current engine by name. 951 * 952 * @param config is language configuration 953 * @param name is the short name of new engine ("default" is okey). 954 * @return non-zero value in case of error 955 */ 956 int rccConfigSetEngineByName(rcc_language_config config, const char *name); 957 /** 958 * Set current encoding by name. 959 * 960 * @param config is language configuration 961 * @param class_id is encoding class 962 * @param name is the short name of new encoding ("default" is okey). 963 * @return non-zero value in case of error 964 */ 965 int rccConfigSetCharsetByName(rcc_language_config config, rcc_class_id class_id, const char *name); 966 967 /** 968 * Function will return encoding id of charset specified by locale configuration. 969 * 970 * @param config is language configuration 971 * @param locale_variable is locale variable (Default(NULL) is LC_CTYPE) 972 * @return encoding id 973 */ 974 rcc_charset_id rccConfigGetLocaleCharset(rcc_language_config config, const char *locale_variable); 975 /** 976 * Function will return encoding id of charset specified by locale configuration. 977 * 978 * @param config is language configuration 979 * @param class_id is encoding class 980 * @param locale_variable is locale variable (Default(NULL) is LC_CTYPE) 981 * @return encoding id 982 */ 983 rcc_charset_id rccConfigGetLocaleClassCharset(rcc_language_config config, rcc_class_id class_id, const char *locale_variable); 984 985 /* curconfig.c */ 986 int rccGetCharsetNumber(rcc_context ctx); 987 int rccGetClassCharsetNumber(rcc_context ctx, rcc_class_id class_id); 988 int rccGetEngineNumber(rcc_context ctx); 989 990 const char *rccGetEngineName(rcc_context ctx, rcc_engine_id engine_id); 991 const char *rccGetCharsetName(rcc_context ctx, rcc_charset_id charset_id); 992 const char *rccGetClassCharsetName(rcc_context ctx, rcc_class_id class_id, rcc_charset_id charset_id); 993 994 rcc_engine_id rccGetEngineByName(rcc_context ctx, const char *name); 995 rcc_charset_id rccGetCharsetByName(rcc_context ctx, const char *name); 996 rcc_charset_id rccGetClassCharsetByName(rcc_context ctx, rcc_class_id class_id, const char *name); 997 998 int rccIsDisabledCharset(rcc_context ctx, rcc_class_id class_id, rcc_charset_id charset_id); 999 1000 rcc_engine_id rccGetSelectedEngine(rcc_context ctx); 1001 const char *rccGetSelectedEngineName(rcc_context ctx); 1002 rcc_engine_id rccGetCurrentEngine(rcc_context ctx); 1003 const char *rccGetCurrentEngineName(rcc_context ctx); 1004 rcc_charset_id rccGetSelectedCharset(rcc_context ctx, rcc_class_id class_id); 1005 const char *rccGetSelectedCharsetName(rcc_context ctx, rcc_class_id class_id); 1006 rcc_charset_id rccGetCurrentCharset(rcc_context ctx, rcc_class_id class_id); 1007 const char *rccGetCurrentCharsetName(rcc_context ctx, rcc_class_id class_id); 1008 1009 int rccSetEngine(rcc_context ctx, rcc_engine_id engine_id); 1010 int rccSetCharset(rcc_context ctx, rcc_class_id class_id, rcc_charset_id charset_id); 1011 int rccSetEngineByName(rcc_context ctx, const char *name); 1012 int rccSetCharsetByName(rcc_context ctx, rcc_class_id class_id, const char *name); 1013 1014 rcc_charset_id rccGetLocaleCharset(rcc_context ctx, const char *locale_variable); 1015 rcc_charset_id rccGetLocaleClassCharset(rcc_context ctx, rcc_class_id class_id, const char *locale_variable); 1016 1017 rcc_autocharset_id rccDetectCharset(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len); 1018 1019 /******************************************************************************* 1020 ************************ Language Configuaration ******************************* 1021 *******************************************************************************/ 1022 1023 /******************************************************************************* 1024 ************************ RCC_STRING Manipulations ****************************** 1025 *******************************************************************************/ 1026 /* string.c */ 1027 /** 1028 * Intermediate string format. RCC_string can be manipulated as standard NULL terminated string. 1029 * However it contains small header with information about string language. All strings are 1030 * encoded using UTF-8 encoding. 1031 */ 1032 typedef char *rcc_string; 1033 /** 1034 * Intermediate string format. RCC_string can be manipulated as standard NULL terminated string. 1035 * However it contains small header with information about string language. All strings are 1036 * encoded using UTF-8 encoding. 1037 */ 1038 typedef const char *rcc_const_string; 1039 1040 /** 1041 * Check string header and verify if it is really correct #rcc_string. 1042 * 1043 * @param str is verifying string 1044 */ 1045 size_t rccStringCheck(const char *str); 1046 /** 1047 * Check string header and verify if it is really correct #rcc_string. 1048 * 1049 * @param str is verifying string 1050 * @param len is preciese size of str. 1051 * @return size of string in bytes or -1 if check failed 1052 */ 1053 size_t rccStringSizedCheck(const char *str, size_t len); 1054 1055 /** 1056 * Extract language from #rcc_string. 1057 * 1058 * @param str is #rcc_string 1059 * @return size of string in bytes or -1 if check failed 1060 */ 1061 rcc_language_id rccStringGetLanguage(rcc_const_string str); 1062 /** 1063 * Returns pointer on UTF-8 string kept inside of #rcc_string. 1064 * 1065 * @param str is #rcc_string 1066 * @return pointer on constant string or NULL in the case of error 1067 */ 1068 const char *rccStringGetString(rcc_const_string str); 1069 /** 1070 * Extract UTF-8 string from #rcc_string. 1071 * 1072 * @param str is #rcc_string 1073 * @return pointer on string or NULL in the case of error. The string should be freed by the caller. 1074 */ 1075 char *rccStringExtractString(rcc_const_string str); 1076 1077 /** 1078 * If str is #rcc_string function will return pointer on UTF-8 string kept inside, otherwise 1079 * pointer on the passed string is returned. 1080 * 1081 * @param str is null-terminated string 1082 * @return pointer on constant string 1083 */ 1084 const char *rccGetString(const char *str); 1085 /** 1086 * If str is #rcc_string function will return pointer on UTF-8 string kept inside, otherwise 1087 * pointer on the passed string is returned. 1088 * 1089 * @param str is string (perhaps not zero terminated) 1090 * @param len is exact length of string or 0 (in this case length will be computed using 'strlen' 1091 * @return pointer on constant string 1092 */ 1093 const char *rccSizedGetString(const char *str, size_t len); 1094 1095 int rccStringCmp(const char *str1, const char *str2); 1096 int rccStringNCmp(const char *str1, const char *str2, size_t n); 1097 int rccStringCaseCmp(const char *str1, const char *str2); 1098 int rccStringNCaseCmp(const char *str1, const char *str2, size_t n); 1099 1100 /******************************************************************************* 1101 ******************************** Recoding ************************************** 1102 *******************************************************************************/ 1103 /* rcciconv.c */ 1104 1105 /** 1106 * recoding context 1107 */ 1108 typedef struct rcc_iconv_t *rcc_iconv; 1109 1110 /** 1111 * Open recoding context. 1112 * 1113 * @param from is source encoding 1114 * @param to is destination encoding 1115 * @result 1116 * - NULL if no recoding is required 1117 * - Pointer on initialized context if successful 1118 */ 1119 rcc_iconv rccIConvOpen(const char *from, const char *to); 1120 /** 1121 * Close recoding context. 1122 * 1123 * @param icnv is recoding context */ 1124 void rccIConvClose(rcc_iconv icnv); 1125 /** 1126 * Recodes chunk of data. 1127 * 1128 * @param icnv is recoding context 1129 * @param buf is data for recoding 1130 * @param len is size of the data 1131 * @param rlen is size of recoded data 1132 * @return recoded string or NULL in the case of error 1133 */ 1134 char *rccIConv(rcc_iconv icnv, const char *buf, size_t len, size_t *rlen); 1135 1136 /* rcctranslate.c */ 1137 1138 /** 1139 * translating context 1140 */ 1141 typedef struct rcc_translate_t *rcc_translate; 1142 1143 /** 1144 * Open translating context. 1145 * 1146 * @param from is source language 1147 * @param to is destination language 1148 * @return 1149 * - NULL if translation is not required or possible 1150 * - Pointer on initialized context if successful 1151 */ 1152 rcc_translate rccTranslateOpen(const char *from, const char *to); 1153 /** 1154 * Close translating context. 1155 * 1156 * @param translate is translating context 1157 */ 1158 void rccTranslateClose(rcc_translate translate); 1159 1160 /* 1161 * Set translation timeout 1162 * 1163 * @param translate is translating context 1164 * @param us is timeout in microseconds (0 - no timeout) 1165 * @return non-zero value is returned in the case of errror 1166 */ 1167 int rccTranslateSetTimeout(rcc_translate translate, unsigned long us); 1168 1169 /* 1170 * Allows translation engine to perform pending task after the main program is 1171 * being terminated 1172 * 1173 * @param translate is translating context 1174 * @return non-zero value is returned in the case of errror 1175 */ 1176 int rccTranslateAllowOfflineMode(rcc_translate translate); 1177 1178 /** 1179 * Translate string. 1180 * 1181 * @param translate is translating context 1182 * @param buf is UTF-8 encoded string for translating 1183 * @return recoded string or NULL in the case of error 1184 */ 1185 char *rccTranslate(rcc_translate translate, const char *buf); 1186 1187 1188 /* rccspell.c */ 1189 1190 /** 1191 * spelling context 1192 */ 1193 typedef struct rcc_speller_t *rcc_speller; 1194 1195 /** 1196 * result of spelling 1197 */ 1198 typedef enum rcc_speller_result_t { 1199 RCC_SPELLER_INCORRECT = 0, /**< Word not found in dictionaries */ 1200 RCC_SPELLER_ALMOST_PARENT, /**< Similliar word is found in parents dictionary */ 1201 RCC_SPELLER_ALMOST_CORRECT, /**< Similliar word is found in dictionary */ 1202 RCC_SPELLER_PARENT, /**< Word is found in parent dictionary */ 1203 RCC_SPELLER_CORRECT /**< Word is found in dictionary */ 1204 } rcc_speller_result; 1205 1206 int rccSpellerResultIsOwn(rcc_speller_result res); 1207 int rccSpellerResultIsPrecise(rcc_speller_result res); 1208 int rccSpellerResultIsCorrect(rcc_speller_result res); 1209 1210 /** 1211 * Open spelling context. 1212 * 1213 * @param lang is language 1214 * @return 1215 * - NULL if language is not supported and in the case of error. 1216 * - Pointer on initialized context if successful 1217 */ 1218 rcc_speller rccSpellerCreate(const char *lang); 1219 /** 1220 * Close spelling context. 1221 * 1222 * @param speller is spelling context 1223 */ 1224 void rccSpellerFree(rcc_speller speller); 1225 /** 1226 * Add parent to the spelling context. 1227 * 1228 * @param speller is spelling context 1229 * @param parent is parent spelling context 1230 * @return non-zero value in the case of error 1231 */ 1232 int rccSpellerAddParent(rcc_speller speller, rcc_speller parent); 1233 /** 1234 * Spell a word. 1235 * 1236 * @param speller is spelling context 1237 * @param word is UTF-8 encoded word for spelling 1238 * @return FALSE if word is not found in dictionary 1239 */ 1240 rcc_speller_result rccSpeller(rcc_speller speller, const char *word); 1241 1242 /* recode.c */ 1243 1244 /** 1245 * Tries to detect language of string 1246 * @param ctx is working context ( or default one if NULL supplied ) 1247 * @param class_id is encoding class 1248 * @param buf is original string (perhaps not zero terminated) 1249 * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. 1250 * @result is language_id or -1 if autodetection is failed 1251 */ 1252 rcc_language_id rccDetectLanguage(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len); 1253 1254 /** 1255 * Tries to detect charset of string 1256 * @param config is language configuration 1257 * @param class_id is encoding class 1258 * @param buf is original string (perhaps not zero terminated) 1259 * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. 1260 * @result is auto_charset_id or -1 if autodetection is failed 1261 */ 1262 rcc_autocharset_id rccConfigDetectCharset(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len); 1263 1264 /** 1265 * Recode string from specified encoding class to #rcc_string. Encoding detection engines and 1266 * recoding cache are used (if possible) to detect original 'buf' encoding. Otherwise the 1267 * preconfigured encoding of class is assumed. 1268 * 1269 * @param ctx is working context ( or default one if NULL supplied ) 1270 * @param class_id is encoding class 1271 * @param buf is original string (perhaps not zero terminated) 1272 * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. 1273 * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. 1274 */ 1275 rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len); 1276 /** 1277 * Recode string from #rcc_string to specified encoding class. If encoding class is of 1278 * 'File System' type, the autoprobing for file names can be performed. In the other cases 1279 * the rcc_string will be recoded in preconfigured class encoding. 1280 * 1281 * @param ctx is working context ( or default one if NULL supplied ) 1282 * @param class_id is encoding class 1283 * @param buf is original zero terminated string 1284 * @param rlen in rlen the size of recoded string will be returned. 1285 * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. 1286 */ 1287 char *rccSizedTo(rcc_context ctx, rcc_class_id class_id, rcc_const_string buf, size_t *rlen); 1288 /** 1289 * Recode string between different encoding classes. The conversion is relays on rccSizedFrom 1290 * and rccSizedTo functions. 1291 * @see rccSizedFrom 1292 * @see rccSizedTo 1293 * 1294 * @param ctx is working context ( or default one if NULL supplied ) 1295 * @param from is source encoding class 1296 * @param to is destination encoding class 1297 * @param buf is original string (perhaps not zero terminated) 1298 * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. 1299 * @param rlen in rlen the size of recoded string will be returned. 1300 * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. 1301 */ 1302 char *rccSizedRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char *buf, size_t len, size_t *rlen); 1303 /** 1304 * Recode file name between different encoding classes. Normally, should be used to convert 1305 * string from the file list to real file system names. The autoprobing of names is enabled 1306 * depending on the output encoding class configuration and current options. 1307 * @see rcc_class_t 1308 * @see rcc_class_type_t 1309 * @see rcc_option_t 1310 1311 * The conversion is partly relays on rccSizedFrom. 1312 * @see rccSizedFrom 1313 * 1314 * @param ctx is working context ( or default one if NULL supplied ) 1315 * @param from is source encoding class 1316 * @param to is destination encoding class 1317 * @param fspath is path to the filesystem where file are located. 1318 * @param path is file path 1319 * @param filename is file name 1320 * @result is recoded file name or NULL if recoding is not required or failed. It is up to the caller to free memory. 1321 */ 1322 char *rccFS(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char *fspath, const char *path, const char *filename); 1323 1324 /** 1325 * Recode string from specified encoding to #rcc_string. 1326 * 1327 * @param ctx is working context ( or default one if NULL supplied ) 1328 * @param charset is source encoding 1329 * @param buf is original string (perhaps not zero terminated) 1330 * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. 1331 * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. 1332 */ 1333 rcc_string rccSizedFromCharset(rcc_context ctx, const char *charset, const char *buf, size_t len); 1334 /** 1335 * Recode string from #rcc_string to specified encoding. 1336 * 1337 * @param ctx is working context ( or default one if NULL supplied ) 1338 * @param charset is destination encoding 1339 * @param buf is original zero terminated string 1340 * @param rlen in rlen the size of recoded string will be returned. 1341 * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. 1342 */ 1343 char *rccSizedToCharset(rcc_context ctx, const char *charset, rcc_const_string buf, size_t *rlen); 1344 /** 1345 * Recode string between specified encoding class and encoding. 1346 * 1347 * The conversion is partly relays on rccSizedFrom. 1348 * @see rccSizedFrom 1349 * 1350 * @param ctx is working context ( or default one if NULL supplied ) 1351 * @param class_id is source encoding class 1352 * @param charset is destination encoding 1353 * @param buf is original string (perhaps not zero terminated) 1354 * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. 1355 * @param rlen in rlen the size of recoded string will be returned. 1356 * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. 1357 */ 1358 char *rccSizedRecodeToCharset(rcc_context ctx, rcc_class_id class_id, const char *charset, rcc_const_string buf, size_t len, size_t *rlen); 1359 /** 1360 * Recode string between specified encoding and encoding class. 1361 * 1362 * The conversion is partly relays on rccSizedTo 1363 * @see rccSizedTo 1364 * 1365 * @param ctx is working context ( or default one if NULL supplied ) 1366 * @param class_id is destination encoding class 1367 * @param charset is source encoding 1368 * @param buf is original string (perhaps not zero terminated) 1369 * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. 1370 * @param rlen in rlen the size of recoded string will be returned. 1371 * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. 1372 */ 1373 rcc_string rccSizedRecodeFromCharset(rcc_context ctx, rcc_class_id class_id, const char *charset, const char *buf, size_t len, size_t *rlen); 1374 /** 1375 * Recode string between specified encodings. 1376 * 1377 * @param ctx is working context ( or default one if NULL supplied ) 1378 * @param from is source encoding 1379 * @param to is destination encoding 1380 * @param buf is original string (perhaps not zero terminated) 1381 * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. 1382 * @param rlen in rlen the size of recoded string will be returned. 1383 * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. 1384 */ 1385 char *rccSizedRecodeCharsets(rcc_context ctx, const char *from, const char *to, const char *buf, size_t len, size_t *rlen); 1386 1387 1388 /** 1389 * Recode string from specified encoding class to #rcc_string. Encoding detection engines and 1390 * recoding cache are used (if possible) to detect original 'buf' encoding. Otherwise the 1391 * preconfigured encoding of class is assumed. 1392 * 1393 * @param config is language configuration 1394 * @param class_id is encoding class 1395 * @param buf is original string (perhaps not zero terminated) 1396 * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. 1397 * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. 1398 */ 1399 rcc_string rccConfigSizedFrom(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len); 1400 /** 1401 * Recode string from #rcc_string to specified encoding class. If encoding class is of 1402 * 'File System' type, the autoprobing for file names can be performed. In the other cases 1403 * the rcc_string will be recoded in preconfigured class encoding. 1404 * 1405 * @param config is language configuration 1406 * @param class_id is encoding class 1407 * @param buf is original zero terminated string 1408 * @param rlen in rlen the size of recoded string will be returned. 1409 * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. 1410 */ 1411 char *rccConfigSizedTo(rcc_language_config config, rcc_class_id class_id, rcc_const_string buf, size_t *rlen); 1412 /** 1413 * Recode string between different encoding classes. The conversion is relays on rccConfigSizedFrom 1414 * and rccConfigSizedTo functions. 1415 * @see rccConfigSizedFrom 1416 * @see rccConfigSizedTo 1417 * 1418 * @param config is language configuration 1419 * @param from is source encoding class 1420 * @param to is destination encoding class 1421 * @param buf is original string (perhaps not zero terminated) 1422 * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. 1423 * @param rlen in rlen the size of recoded string will be returned. 1424 * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. 1425 */ 1426 char *rccConfigSizedRecode(rcc_language_config config, rcc_class_id from, rcc_class_id to, const char *buf, size_t len, size_t *rlen); 1427 /** 1428 * Recode string from specified encoding to #rcc_string. 1429 * 1430 * @param config is language configuration 1431 * @param class_id is encoding class 1432 * @param charset is source encoding 1433 * @param buf is original string (perhaps not zero terminated) 1434 * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. 1435 * @param rlen in rlen the size of recoded string will be returned. 1436 * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. 1437 */ 1438 rcc_string rccConfigSizedRecodeFromCharset(rcc_language_config config, rcc_class_id class_id, const char *charset, const char *buf, size_t len, size_t *rlen); 1439 /** 1440 * Recode string from #rcc_string to specified encoding. 1441 * 1442 * @param config is language configuration 1443 * @param class_id is encoding class 1444 * @param charset is destination encoding 1445 * @param buf is original zero terminated string 1446 * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. 1447 * @param rlen in rlen the size of recoded string will be returned. 1448 * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. 1449 */ 1450 char *rccConfigSizedRecodeToCharset(rcc_language_config config, rcc_class_id class_id, const char *charset, rcc_const_string buf, size_t len, size_t *rlen); 1451 1452 1453 #define rccFrom(ctx, class_id, buf) rccSizedFrom(ctx, class_id, buf, 0) 1454 #define rccTo(ctx, class_id, buf) rccSizedTo(ctx, class_id, buf, NULL) 1455 #define rccRecode(ctx, from, to, buf) rccSizedRecode(ctx, from, to, buf, 0, NULL) 1456 1457 #define rccFromCharset(ctx, charset, buf) rccSizedFromCharset(ctx, charset, buf, 0) 1458 #define rccToCharset(ctx, charset, buf) rccSizedToCharset(ctx, charset, buf, NULL) 1459 #define rccRecodeToCharset(ctx, class_id, charset, buf) rccSizedRecodeToCharset(ctx, class_id, charset, buf, 0, NULL) 1460 #define rccRecodeFromCharset(ctx, class_id, charset, buf) rccSizedRecodeFromCharset(ctx, class_id, charset, buf, 0, NULL) 1461 #define rccRecodeCharsets(ctx, from, to, buf) rccSizedRecodeCharsets(ctx, from, to, buf, 0, NULL) 1462 1463 #define rccConfigFrom(ctx, class_id, buf) rccConfigSizedFrom(ctx, class_id, buf, 0) 1464 #define rccConfigTo(ctx, class_id, buf) rccConfigSizedTo(ctx, class_id, buf, NULL) 1465 #define rccConfigRecode(ctx, from, to, buf) rccConfigSizedRecode(ctx, from, to, buf, 0, NULL) 1466 #define rccConfigRecodeToCharset(ctx, class_id, charset, buf) rccConfigSizedRecodeToCharset(ctx, class_id, charset, buf, 0, NULL) 1467 #define rccConfigRecodeFromCharset(ctx, class_id, charset, buf) rccConfigSizedRecodeFromCharset(ctx, class_id, charset, buf, 0, NULL) 1468 1469 /******************************************************************************* 1470 ******************************** Options *************************************** 1471 *******************************************************************************/ 1472 1473 /* xml.c */ 1474 typedef void *rcc_config; 1475 rcc_config rccGetConfiguration(); 1476 1477 /** 1478 * Save Configuration. 1479 * 1480 * @param ctx is working context ( or default one if NULL supplied ) 1481 * @param name is configuration file name ( can be shared between multiple applications! ) 1482 * @return non-zero in the case of errors. 1483 */ 1484 int rccSave(rcc_context ctx, const char *name); 1485 /** 1486 * Load Configuration. 1487 * 1488 * @param ctx is working context ( or default one if NULL supplied ) 1489 * @param name is configuration file name ( can be shared between multiple applications! ) 1490 * @return non-zero in the case of errors. 1491 */ 1492 int rccLoad(rcc_context ctx, const char *name); 1493 1494 /******************************************************************************* 1495 **************************** Engine Plugins ************************************ 1496 *******************************************************************************/ 1497 1498 typedef rcc_engine *(*rcc_plugin_engine_info_function)(const char *lang); 1499 1500 rcc_engine_internal rccEngineGetInternal(rcc_engine_context ctx); 1501 rcc_language *rccEngineGetLanguage(rcc_engine_context ctx); 1502 rcc_context rccEngineGetRccContext(rcc_engine_context ctx); 1503 rcc_engine *rccEngineGetInfo(rcc_engine_context ctx); 1504 rcc_autocharset_id rccEngineGetAutoCharsetByName(rcc_engine_context ctx, const char *name); 1505 1506 1507 /******************************************************************************* 1508 **************************** Configuration ************************************* 1509 *******************************************************************************/ 1510 1511 /** 1512 * The Berkley DB support is compiled in 1513 */ 1514 #define RCC_CC_FLAG_HAVE_BERKLEY_DB 0x01 1515 /** 1516 * The dynamic engine plugins support is compiled in 1517 */ 1518 #define RCC_CC_FLAG_HAVE_DYNAMIC_ENGINES 0x02 1519 /** 1520 * Enca engine is compiled in 1521 */ 1522 #define RCC_CC_FLAG_HAVE_ENCA 0x04 1523 /** 1524 * LibRCD engine is compiled in 1525 */ 1526 #define RCC_CC_FLAG_HAVE_RCD 0x08 1527 /** 1528 * Libtranslate translation engine compiled in 1529 */ 1530 #define RCC_CC_FLAG_HAVE_LIBTRANSLATE 0x10 1531 1532 /** 1533 * The library build environment is represented by this structure 1534 */ 1535 struct rcc_compiled_configuration_t { 1536 unsigned long flags; /**< compilation flags */ 1537 }; 1538 typedef struct rcc_compiled_configuration_t rcc_compiled_configuration_s; 1539 typedef const struct rcc_compiled_configuration_t *rcc_compiled_configuration; 1540 1541 /** 1542 * Get information about library compilation environment 1543 */ 1544 rcc_compiled_configuration rccGetCompiledConfiguration(); 1545 1546 int rccLocaleGetClassByName(const char *locale); 1547 int rccLocaleGetLanguage(char *result, const char *lv, unsigned int n); 1548 int rccLocaleGetCharset(char *result, const char *lv, unsigned int n); 1549 1550 1551 /** 1552 * For compatibilty reasons 1553 */ 1554 #define RCC_OPTION_TRANSLATE_SKIP_PARRENT RCC_OPTION_TRANSLATE_SKIP_PARENT 1555 #define RCC_SPELLER_ALMOST_PARRENT RCC_SPELLER_ALMOST_PARENT 1556 #define RCC_SPELLER_PARRENT RCC_SPELLER_PARENT 1557 #define rccSpellerAddParrent rccSpellerAddParent 1558 1559 #ifdef __cplusplus 1560 } 1561 #endif 1562 1563 #endif /* _LIBRCC_H */ 1564