1 /* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */ 2 3 /* 4 * Word breaking in a Unicode sequence. Designed to be used in a 5 * generic text renderer. 6 * 7 * Copyright (C) 2013-2019 Tom Hacohen <tom at stosb dot com> 8 * Copyright (C) 2018 Wu Yongwei <wuyongwei at gmail dot com> 9 * 10 * This software is provided 'as-is', without any express or implied 11 * warranty. In no event will the author be held liable for any damages 12 * arising from the use of this software. 13 * 14 * Permission is granted to anyone to use this software for any purpose, 15 * including commercial applications, and to alter it and redistribute 16 * it freely, subject to the following restrictions: 17 * 18 * 1. The origin of this software must not be misrepresented; you must 19 * not claim that you wrote the original software. If you use this 20 * software in a product, an acknowledgement in the product 21 * documentation would be appreciated but is not required. 22 * 2. Altered source versions must be plainly marked as such, and must 23 * not be misrepresented as being the original software. 24 * 3. This notice may not be removed or altered from any source 25 * distribution. 26 * 27 * The main reference is Unicode Standard Annex 29 (UAX #29): 28 * <URL:http://unicode.org/reports/tr29> 29 * 30 * When this library was designed, this annex was at Revision 17, for 31 * Unicode 6.0.0: 32 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html> 33 * 34 * This library has been updated according to Revision 37, for 35 * Unicode 13.0.0: 36 * <URL:http://www.unicode.org/reports/tr29/tr29-37.html> 37 * 38 * The Unicode Terms of Use are available at 39 * <URL:http://www.unicode.org/copyright.html> 40 */ 41 42 /** 43 * @file wordbreak.h 44 * 45 * Header file for the word breaking (segmentation) algorithm. 46 * 47 * @author Tom Hacohen 48 */ 49 50 #ifndef WORDBREAK_H 51 #define WORDBREAK_H 52 53 #include <stddef.h> 54 #include "unibreakbase.h" 55 56 #ifdef __cplusplus 57 extern "C" { 58 #endif 59 60 #define WORDBREAK_BREAK 0 /**< Break is allowed */ 61 #define WORDBREAK_NOBREAK 1 /**< No break is allowed */ 62 #define WORDBREAK_INSIDEACHAR 2 /**< A UTF-8/16 sequence is unfinished */ 63 64 void init_wordbreak(void); 65 void set_wordbreaks_utf8( 66 const utf8_t *s, size_t len, const char* lang, char *brks); 67 void set_wordbreaks_utf16( 68 const utf16_t *s, size_t len, const char* lang, char *brks); 69 void set_wordbreaks_utf32( 70 const utf32_t *s, size_t len, const char* lang, char *brks); 71 72 #ifdef __cplusplus 73 } 74 #endif 75 76 #endif /* WORDBREAK_H */ 77