1 /*
2 * This file is part of libdom.
3 * Licensed under the MIT License,
4 * http://www.opensource.org/licenses/mit-license.php
5 * Copyright 2009 Bo Yang <struggleyb.nku@gmail.com>
6 */
7
8 #include <inttypes.h>
9 #include <stddef.h>
10
11 #include "utils/validate.h"
12
13 #include <dom/core/string.h>
14
15 #include "utils/character_valid.h"
16 #include "utils/namespace.h"
17 #include "utils/utils.h"
18
19 #include <parserutils/charset/utf8.h>
20
21 /* An combination of various tests */
22 static bool is_first_char(uint32_t ch);
23 static bool is_name_char(uint32_t ch);
24
25 /* Test whether the character can be the first character of
26 * a NCName. */
is_first_char(uint32_t ch)27 static bool is_first_char(uint32_t ch)
28 {
29 /* Refer http://www.w3.org/TR/REC-xml/ for detail */
30 if (((ch >= 'a') && (ch <= 'z')) ||
31 ((ch >= 'A') && (ch <= 'Z')) ||
32 (ch == '_') || (ch == ':') ||
33 ((ch >= 0xC0) && (ch <= 0xD6)) ||
34 ((ch >= 0xD8) && (ch <= 0xF6)) ||
35 ((ch >= 0xF8) && (ch <= 0x2FF)) ||
36 ((ch >= 0x370) && (ch <= 0x37D)) ||
37 ((ch >= 0x37F) && (ch <= 0x1FFF)) ||
38 ((ch >= 0x200C) && (ch <= 0x200D)) ||
39 ((ch >= 0x2070) && (ch <= 0x218F)) ||
40 ((ch >= 0x2C00) && (ch <= 0x2FEF)) ||
41 ((ch >= 0x3001) && (ch <= 0xD7FF)) ||
42 ((ch >= 0xF900) && (ch <= 0xFDCF)) ||
43 ((ch >= 0xFDF0) && (ch <= 0xFFFD)) ||
44 ((ch >= 0x10000) && (ch <= 0xEFFFF)))
45 return true;
46
47 if (is_letter(ch) || ch == (uint32_t) '_' || ch == (uint32_t) ':') {
48 return true;
49 }
50
51 return false;
52 }
53
54 /* Test whether the character can be a part of a NCName */
is_name_char(uint32_t ch)55 static bool is_name_char(uint32_t ch)
56 {
57 /* Refer http://www.w3.org/TR/REC-xml/ for detail */
58 if (((ch >= 'a') && (ch <= 'z')) ||
59 ((ch >= 'A') && (ch <= 'Z')) ||
60 ((ch >= '0') && (ch <= '9')) || /* !start */
61 (ch == '_') || (ch == ':') ||
62 (ch == '-') || (ch == '.') || (ch == 0xB7) || /* !start */
63 ((ch >= 0xC0) && (ch <= 0xD6)) ||
64 ((ch >= 0xD8) && (ch <= 0xF6)) ||
65 ((ch >= 0xF8) && (ch <= 0x2FF)) ||
66 ((ch >= 0x300) && (ch <= 0x36F)) || /* !start */
67 ((ch >= 0x370) && (ch <= 0x37D)) ||
68 ((ch >= 0x37F) && (ch <= 0x1FFF)) ||
69 ((ch >= 0x200C) && (ch <= 0x200D)) ||
70 ((ch >= 0x203F) && (ch <= 0x2040)) || /* !start */
71 ((ch >= 0x2070) && (ch <= 0x218F)) ||
72 ((ch >= 0x2C00) && (ch <= 0x2FEF)) ||
73 ((ch >= 0x3001) && (ch <= 0xD7FF)) ||
74 ((ch >= 0xF900) && (ch <= 0xFDCF)) ||
75 ((ch >= 0xFDF0) && (ch <= 0xFFFD)) ||
76 ((ch >= 0x10000) && (ch <= 0xEFFFF)))
77 return true;
78
79 if (is_letter(ch) == true)
80 return true;
81 if (is_digit(ch) == true)
82 return true;
83 if (is_combining_char(ch) == true)
84 return true;
85 if (is_extender(ch) == true)
86 return true;
87
88 return false;
89 }
90
91 /**
92 * Test whether the name is a valid one according XML 1.0 standard.
93 * For the standard please refer:
94 *
95 * http://www.w3.org/TR/2004/REC-xml-20040204/
96 *
97 * \param name The name need to be tested
98 * \return true if ::name is valid, false otherwise.
99 */
_dom_validate_name(dom_string * name)100 bool _dom_validate_name(dom_string *name)
101 {
102 uint32_t ch;
103 size_t clen, slen;
104 parserutils_error err;
105 const uint8_t *s;
106
107 if (name == NULL)
108 return false;
109
110 slen = dom_string_length(name);
111 if (slen == 0)
112 return false;
113
114 s = (const uint8_t *) dom_string_data(name);
115 slen = dom_string_byte_length(name);
116
117 err = parserutils_charset_utf8_to_ucs4(s, slen, &ch, &clen);
118 if (err != PARSERUTILS_OK) {
119 return false;
120 }
121
122 if (is_first_char(ch) == false)
123 return false;
124
125 s += clen;
126 slen -= clen;
127
128 while (slen > 0) {
129 err = parserutils_charset_utf8_to_ucs4(s, slen, &ch, &clen);
130 if (err != PARSERUTILS_OK) {
131 return false;
132 }
133
134 if (is_name_char(ch) == false)
135 return false;
136
137 s += clen;
138 slen -= clen;
139 }
140
141 return true;
142 }
143
144 /**
145 * Validate whether the string is a legal NCName.
146 * Refer http://www.w3.org/TR/REC-xml-names/ for detail.
147 *
148 * \param str The name to validate
149 * \return true if ::name is valid, false otherwise.
150 */
_dom_validate_ncname(dom_string * name)151 bool _dom_validate_ncname(dom_string *name)
152 {
153 uint32_t ch;
154 size_t clen, slen;
155 parserutils_error err;
156 const uint8_t *s;
157
158 if (name == NULL)
159 return false;
160
161 slen = dom_string_length(name);
162 if (slen == 0)
163 return false;
164
165 s = (const uint8_t *) dom_string_data(name);
166 slen = dom_string_byte_length(name);
167
168 err = parserutils_charset_utf8_to_ucs4(s, slen, &ch, &clen);
169 if (err != PARSERUTILS_OK) {
170 return false;
171 }
172
173 if (is_letter(ch) == false && ch != (uint32_t) '_')
174 return false;
175
176 s += clen;
177 slen -= clen;
178
179 while (slen > 0) {
180 err = parserutils_charset_utf8_to_ucs4(s, slen, &ch, &clen);
181 if (err != PARSERUTILS_OK) {
182 return false;
183 }
184
185 if (is_name_char(ch) == false)
186 return false;
187
188 if (ch == (uint32_t) ':')
189 return false;
190
191 s += clen;
192 slen -= clen;
193 }
194
195 return true;
196 }
197
198