1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 #ifndef INCLUDED_I18NUTIL_UNICODE_HXX
20 #define INCLUDED_I18NUTIL_UNICODE_HXX
21 
22 #include <com/sun/star/i18n/UnicodeScript.hpp>
23 #include <sal/types.h>
24 #include <rtl/ustrbuf.hxx>
25 #include <unicode/uscript.h>
26 #include <i18nutil/i18nutildllapi.h>
27 
28 class LanguageTag;
29 
30 struct ScriptTypeList
31 {
32     css::i18n::UnicodeScript from;
33     css::i18n::UnicodeScript to;
34     sal_Int16 value;
35 };
36 
37 class I18NUTIL_DLLPUBLIC unicode
38 {
39 public:
40     static sal_Int16 getUnicodeType(const sal_Unicode ch);
41     static sal_Int16 getUnicodeScriptType(const sal_Unicode ch, const ScriptTypeList* typeList,
42                                           sal_Int16 unknownType = 0);
43     static sal_Unicode getUnicodeScriptStart(css::i18n::UnicodeScript type);
44     static sal_Unicode getUnicodeScriptEnd(css::i18n::UnicodeScript type);
45     static sal_uInt8 getUnicodeDirection(const sal_Unicode ch);
46     static bool isControl(const sal_Unicode ch);
47     static bool isAlpha(const sal_Unicode ch);
48     static bool isSpace(const sal_Unicode ch);
49     static bool isWhiteSpace(const sal_Unicode ch);
50 
51     /** Check for Unicode variation sequence selectors
52 
53         @param nCode  A Unicode code point.
54 
55         @return  True if code is a Unicode variation sequence selector.
56      */
isIVSSelector(sal_uInt32 nCode)57     static bool isIVSSelector(sal_uInt32 nCode)
58     {
59         return (nCode >= 0xFE00 && nCode <= 0xFE0F) // Variation Selectors block
60                || (nCode >= 0xE0100 && nCode <= 0xE01EF); // Variation Selectors Supplement block
61     }
62 
63     /** Check for base characters of a CJK ideographic variation sequence (IVS)
64 
65         @param nCode  A Unicode code point.
66 
67         @return  True if code is a Unicode base character part of CJK IVS
68      */
isCJKIVSCharacter(sal_uInt32 nCode)69     static bool isCJKIVSCharacter(sal_uInt32 nCode)
70     {
71         return (nCode >= 0x4E00 && nCode <= 0x9FFF) // CJK Unified Ideographs
72                || (nCode >= 0x3400 && nCode <= 0x4DBF) // CJK Unified Ideographs Extension A
73                || (nCode >= 0x20000 && nCode <= 0x2A6DF); // CJK Unified Ideographs Extension B
74     }
75 
76     //Map an ISO 15924 script code to Latin/Asian/Complex/Weak
77     static sal_Int16 getScriptClassFromUScriptCode(UScriptCode eScript);
78 
79     //Return a language that can be written in a given ISO 15924 script code
80     static OString getExemplarLanguageForUScriptCode(UScriptCode eScript);
81 
82     //Format a number as a percentage according to the rules of the given
83     //language, e.g. 100 -> "100%" for en-US vs "100 %" for de-DE
84     static OUString formatPercent(double dNumber, const LanguageTag& rLangTag);
85 };
86 
87 /*
88     Toggle between a character and its Unicode Notation.
89         -implements the concept found in Microsoft Word's Alt-X
90         -accepts sequences of up to 8 hex characters and converts into the corresponding Unicode Character
91             -example:  0000A78c   or   2bc
92         -accepts sequences of up to 256 characters in Unicode notation
93             -example:  U+00000065u+0331u+308
94         -handles complex characters (with combining elements) and the all of the Unicode planes.
95 */
96 class I18NUTIL_DLLPUBLIC ToggleUnicodeCodepoint
97 {
98 private:
99     OUStringBuffer maInput;
100     OUStringBuffer maUtf16;
101     OUStringBuffer maCombining;
102     bool mbAllowMoreChars = true;
103     bool mbRequiresU = false;
104     bool mbIsHexString = false;
105 
106 public:
107     /**
108     Build an input string of valid UTF16 units to toggle.
109         -do not call the other functions until the input process is complete
110         -build string from Right to Left.  (Start from the character to the left of the cursor: move left.)
111     */
112     bool AllowMoreInput(sal_Unicode uChar);
113 
114     /**
115     Validates (and potentially modifies) the input string.
116         -all non-input functions must use this function to first to validate the input string
117         -additional input may be prevented after this function is called
118     */
119     OUString StringToReplace();
120     OUString ReplacementString();
121 
122     /**
123     While sInput.getLength() returns the number of utf16 units to delete,
124         this function returns the number of "characters" to delete - potentially a smaller number
125     */
126     sal_uInt32 CharsToDelete();
127 };
128 
129 #endif
130 
131 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
132