1 /* Test whether a Unicode character is case-ignorable.
2    Copyright (C) 2002, 2006-2007, 2009-2021 Free Software Foundation, Inc.
3    Written by Bruno Haible <bruno@clisp.org>, 2009.
4 
5    This file is free software.
6    It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
7    You can redistribute it and/or modify it under either
8      - the terms of the GNU Lesser General Public License as published
9        by the Free Software Foundation; either version 3, or (at your
10        option) any later version, or
11      - the terms of the GNU General Public License as published by the
12        Free Software Foundation; either version 2, or (at your option)
13        any later version, or
14      - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
15 
16    This file is distributed in the hope that it will be useful,
17    but WITHOUT ANY WARRANTY; without even the implied warranty of
18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19    Lesser General Public License and the GNU General Public License
20    for more details.
21 
22    You should have received a copy of the GNU Lesser General Public
23    License and of the GNU General Public License along with this
24    program.  If not, see <https://www.gnu.org/licenses/>.  */
25 
26 #include <config.h>
27 
28 /* Specification.  */
29 #include "caseprop.h"
30 
31 /* Quoting the Unicode standard:
32      Definition: A character is defined to be "case-ignorable" if it has the
33      value MidLetter {or the value MidNumLet} for the Word_Break property or
34      its General_Category is one of Nonspacing_Mark (Mn), Enclosing_Mark (Me),
35      Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk).
36    The text marked in braces was added in Unicode 5.1.0, see
37    <https://www.unicode.org/versions/Unicode5.1.0/> section "Update of
38    Definition of case-ignorable".   */
39 /* Since this predicate is only used for the "Before C" and "After C"
40    conditions of FINAL_SIGMA, we exclude the "cased" characters here.
41    This simplifies the evaluation of the regular expressions
42      \p{cased} (\p{case-ignorable})* C
43    and
44      C (\p{case-ignorable})* \p{cased}
45  */
46 
47 #if 0
48 
49 #include "unictype.h"
50 #include "uniwbrk.h"
51 
52 bool
53 uc_is_case_ignorable (ucs4_t uc)
54 {
55   int wbp = uc_wordbreak_property (uc);
56 
57   return (wbp == WBP_MIDLETTER || wbp == WBP_MIDNUMLET
58           || uc_is_general_category_withtable (uc, UC_CATEGORY_MASK_Mn
59                                                    | UC_CATEGORY_MASK_Me
60                                                    | UC_CATEGORY_MASK_Cf
61                                                    | UC_CATEGORY_MASK_Lm
62                                                    | UC_CATEGORY_MASK_Sk))
63          && !uc_is_cased (uc);
64 }
65 
66 #else
67 
68 #include "unictype/bitmap.h"
69 
70 /* Define u_casing_property_case_ignorable table.  */
71 #include "ignorable.h"
72 
73 bool
74 uc_is_case_ignorable (ucs4_t uc)
75 {
76   return bitmap_lookup (&u_casing_property_case_ignorable, uc);
77 }
78 
79 #endif
80