1 /*
2 www.sourceforge.net/projects/tinyxpath
3 Copyright (c) 2002-2004 Yves Berquin (yvesb@users.sourceforge.net)
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any
7 damages arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any
10 purpose, including commercial applications, and to alter it and
11 redistribute it freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must
14 not claim that you wrote the original software. If you use this
15 software in a product, an acknowledgment in the product documentation
16 would be appreciated but is not required.
17
18 2. Altered source versions must be plainly marked as such, and
19 must not be misrepresented as being the original software.
20
21 3. This notice may not be removed or altered from any source
22 distribution.
23 */
24
25 /**
26 \file tinyutil.cpp
27 \author Yves Berquin
28 Miscellaneous utilities for the TinyXPath project
29 */
30
31 #include "stdafx.h"
32 #include <string.h>
33 #include <stdio.h>
34
35 #include "lex_util.h"
36
37 namespace TinyXPath
38 {
39
40 /// Mapping of all the byte values into elementary lexical items
41 static lexico lex_char_map [256] =
42 {
43 /* 0 1 2 3 4 5 6 7 */
44 /* 8 9 a b c d e f */
45 /* 00 .. 07 */ lex_null, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none,
46 /* 08 .. 0f */ lex_none, lex_space, lex_space, lex_none, lex_none, lex_space, lex_none, lex_none,
47 /* 10 .. 17 */ lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none,
48 /* 18 .. 1f */ lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none,
49 /* 20 .. 27 */ lex_space, lex_exclam, lex_2_quote, lex_none, lex_dollar, lex_none, lex_none, lex_1_quote,
50 /* 28 .. 2f */ lex_oparen, lex_cparen, lex_star, lex_plus, lex_comma, lex_minus, lex_dot, lex_slash,
51 /* 30 .. 37 */ lex_digit, lex_digit, lex_digit, lex_digit, lex_digit, lex_digit, lex_digit, lex_digit,
52 /* 38 .. 3f */ lex_digit, lex_digit, lex_colon, lex_scolon, lex_lt, lex_equal, lex_gt, lex_none,
53 /* 40 .. 47 */ lex_at, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar,
54 /* 48 .. 4f */ lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar,
55 /* 50 .. 57 */ lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar,
56 /* 58 .. 5f */ lex_bchar, lex_bchar, lex_bchar, lex_obrack, lex_none, lex_cbrack, lex_none, lex_under,
57 /* 60 .. 67 */ lex_none, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar,
58 /* 68 .. 6f */ lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar,
59 /* 70 .. 77 */ lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar,
60 /* 78 .. 7f */ lex_bchar, lex_bchar, lex_bchar, lex_none, lex_orchar, lex_none, lex_none, lex_none,
61 /* 80 .. 87 */ lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none,
62 /* 88 .. 8f */ lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none,
63 /* 90 .. 97 */ lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none,
64 /* 98 .. 9f */ lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none,
65 /* a0 .. a7 */ lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none,
66 /* a8 .. af */ lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none,
67 /* b0 .. b7 */ lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_extend,
68 /* b8 .. bf */ lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none, lex_none,
69 /* c0 .. c7 */ lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar,
70 /* c8 .. cf */ lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar,
71 /* d0 .. d7 */ lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_none,
72 /* d8 .. df */ lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar,
73 /* e0 .. e7 */ lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar,
74 /* e8 .. ef */ lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar,
75 /* f0 .. f7 */ lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_none,
76 /* f8 .. ff */ lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar, lex_bchar
77 };
78
79 /// Dump a lexical element
cp_disp_class_lex(lexico lex_in)80 const char * cp_disp_class_lex (lexico lex_in)
81 {
82 switch (lex_in)
83 {
84 case lex_none : return "<small>none</small>";
85 case lex_null : return "null";
86 case lex_digit : return "digit";
87 case lex_bchar : return "base char";
88 case lex_space : return "space";
89 case lex_extend : return "extender";
90 case lex_slash : return "slash";
91 case lex_at : return "at";
92 case lex_dot : return "dot";
93 case lex_minus : return "minus";
94 case lex_under : return "under";
95 case lex_colon : return "colon";
96 case lex_scolon : return "semi colon";
97 case lex_2_quote : return "double quote";
98 case lex_1_quote : return "simple quote";
99 case lex_dollar : return "dollar";
100 case lex_oparen : return "opening parenthesis";
101 case lex_cparen : return "closing parenthesis";
102 case lex_star : return "star";
103 case lex_plus : return "plus";
104 case lex_comma : return "comma";
105 case lex_lt : return "less than";
106 case lex_equal : return "equal";
107 case lex_gt : return "greater than";
108 case lex_obrack : return "opening bracket";
109 case lex_cbrack : return "closing bracket";
110 case lex_orchar : return "or character (|)";
111 case lex_exclam : return "exclamation (!)";
112 case lex_2_colon : return "::";
113 case lex_2_slash : return "//";
114 case lex_2_dot : return "..";
115 case lex_not_equal : return "!=";
116 case lex_lt_equal : return "<=";
117 case lex_gt_equal : return ">=";
118
119 case lex_ncname : return "NCName";
120 case lex_number : return "Number";
121 case lex_literal : return "literal";
122 case lex_or : return "or";
123 case lex_and : return "and";
124 case lex_mod : return "mod";
125 case lex_div : return "div";
126 case lex_ancestor : return "ancestor";
127 case lex_ancestor_or_self : return "ancestor-or-self";
128 case lex_attribute : return "attribute";
129 case lex_child : return "child";
130 case lex_descendant : return "descendant";
131 case lex_descendant_or_self : return "descendant-or-self";
132 case lex_following : return "following";
133 case lex_following_sibling : return "following-sibling";
134 case lex_namespace : return "namespace";
135 case lex_parent : return "parent";
136 case lex_preceding : return "preceding";
137 case lex_preceding_sibling : return "preceding-sibling";
138 case lex_self : return "self";
139 case lex_processing_instruction : return "processing-instruction";
140 case lex_comment : return "comment";
141 case lex_node : return "node";
142 case lex_text : return "text";
143
144 }
145 return "???";
146 }
147
148 /// Get the lexical class of an XPath expression byte
lex_get_class(_byte_ b_in)149 lexico lex_get_class (_byte_ b_in)
150 {
151 return lex_char_map [b_in];
152 }
153
154 /// Check if a lexical element can be an axis name
o_is_axis_name(lexico lex_test)155 bool o_is_axis_name (lexico lex_test)
156 {
157 if (lex_test >= lex_start_axis_name && lex_test <= lex_end_axis_name)
158 return true;
159 return false;
160 }
161
162 /// Check if an ID maps an existing keyword
163 /// \n Returns the new lexical element or lex_ncname if not found
lex_test_id(const _byte_ * bp_str,unsigned u_size,lexico lex_next)164 lexico lex_test_id (const _byte_ * bp_str, unsigned u_size, lexico lex_next)
165 {
166 char * cp_equi;
167 unsigned u_lex;
168
169 cp_equi = new char [u_size + 1];
170 memcpy (cp_equi, bp_str, u_size);
171 cp_equi [u_size] = 0;
172 for (u_lex = lex_start_keyword; u_lex <= lex_end_keyword; u_lex++)
173 if (! strcmp (cp_equi, cp_disp_class_lex (lexico (u_lex))))
174 // DO not recognize the text keyword unless it is followed by an open parenthesis
175 if ((u_lex != lex_text) || (lex_next == lex_oparen))
176 {
177 delete [] cp_equi;
178 return lexico (u_lex);
179 }
180 delete [] cp_equi;
181 return lex_ncname;
182 }
183
184 /// Removes all leading and trailing white spaces
S_remove_lead_trail(const char * cp_in)185 TIXML_STRING S_remove_lead_trail (const char * cp_in)
186 {
187 TIXML_STRING S_ret;
188 const char * cp_start, * cp_end;
189 char * cp_new, * cp_out;
190
191 // result can't be longer. we assign same size
192 cp_new = new char [strlen (cp_in) + 1];
193 cp_out = cp_new;
194 cp_start = cp_in;
195 while (* cp_start == ' ' || * cp_start == '\t')
196 cp_start++;
197 cp_end = cp_in + strlen (cp_in) - 1;
198 while (cp_end >= cp_in && (* cp_end == ' ' || * cp_end == '\t'))
199 cp_end--;
200 while (cp_start <= cp_end)
201 {
202 if (* cp_start == ' ' || * cp_start == '\t')
203 {
204 * cp_out ++ = ' ';
205 cp_start++;
206 while (cp_start <= cp_end && (* cp_start == ' ' || * cp_start == '\t'))
207 cp_start++;
208 }
209 else
210 * cp_out++ = * cp_start++;
211 }
212 * cp_out = 0;
213 S_ret = cp_new;
214 delete [] cp_new;
215 return S_ret;
216 }
217
218 /// Assign an integer to a string
v_assign_int_to_string(TIXML_STRING & S_string,int i_val)219 void v_assign_int_to_string (TIXML_STRING & S_string, int i_val)
220 {
221 char ca_int [80];
222
223 sprintf (ca_int, "%d", i_val);
224 S_string = ca_int;
225 }
226
227 /// Assign a double to a string, cleaning any trailing zeroes and the decimal point if there's no
228 /// decimal part
v_assign_double_to_string(TIXML_STRING & S_string,double d_val)229 void v_assign_double_to_string (TIXML_STRING & S_string, double d_val)
230 {
231 char ca_int [80];
232
233 sprintf (ca_int, "%f", d_val);
234 while (ca_int [strlen (ca_int) - 1] == '0')
235 ca_int [strlen (ca_int) - 1] = 0;
236 if (ca_int [strlen (ca_int) - 1] == '.')
237 ca_int [strlen (ca_int) - 1] = 0;
238 S_string = ca_int;
239 }
240
241 #ifdef TINYXPATH_DEBUG
242 /// Return the name of an xpath_construct enumerated
cp_disp_construct(xpath_construct xc)243 const char * cp_disp_construct (xpath_construct xc)
244 {
245 switch (xc)
246 {
247 case xpath_unknown : return "xpath_unknown";
248 case xpath_location_path : return "xpath_location_path";
249 case xpath_absolute_location_path : return "xpath_absolute_location_path";
250 case xpath_relative_location_path : return "xpath_relative_location_path";
251 case xpath_step : return "xpath_step";
252 case xpath_axis_specifier : return "xpath_axis_specifier";
253 case xpath_axis_name : return "xpath_axis_name";
254 case xpath_node_test : return "xpath_node_test";
255 case xpath_predicate : return "xpath_predicate";
256 case xpath_predicate_expr : return "xpath_predicate_expr";
257 case xpath_abbreviated_absolute_location_path : return "xpath_abbreviated_absolute_location_path";
258 case xpath_abbrieviated_step : return "xpath_abbrieviated_step";
259 case xpath_abbreviated_axis_specifier : return "xpath_abbreviated_axis_specifier";
260 case xpath_expr : return "xpath_expr";
261 case xpath_primary_expr : return "xpath_primary_expr";
262 case xpath_function_call : return "xpath_function_call";
263 case xpath_argument : return "xpath_argument";
264 case xpath_union_expr : return "xpath_union_expr";
265 case xpath_path_expr : return "xpath_path_expr";
266 case xpath_filter_expr : return "xpath_filter_expr";
267 case xpath_or_expr : return "xpath_or_expr";
268 case xpath_or_expr_more : return "xpath_or_expr_more";
269 case xpath_and_expr : return "xpath_and_expr";
270 case xpath_equality_expr : return "xpath_equality_expr";
271 case xpath_relational_expr : return "xpath_relational_expr";
272 case xpath_additive_expr : return "xpath_additive_expr";
273 case xpath_multiplicative_expr : return "xpath_multiplicative_expr";
274 case xpath_unary_expr : return "xpath_unary_expr";
275 case xpath_multiply_operator : return "xpath_multiply_operator";
276 case xpath_variable_reference : return "xpath_variable_reference";
277 case xpath_name_test : return "xpath_name_test";
278 case xpath_xml_q_name : return "xpath_xml_q_name";
279 case xpath_xml_prefix : return "xpath_xml_prefix";
280 case xpath_xml_local_part : return "xpath_xml_local_part";
281 }
282 return "";
283 }
284
285 /// Generates an ascii table summarizing all possible bytes and their XPath properties
v_generate_ascii_htm()286 void v_generate_ascii_htm ()
287 {
288 int c;
289 FILE * Fp_out;
290 Fp_out = fopen ("ascii.htm", "wt");
291 fprintf (Fp_out, "<html><head><title>ASCII</title></head><body>\n");
292 fprintf (Fp_out, "<table border=1><tr><th>dec</th><th>hex</th><th>char</th><th>class</th></tr>\n");
293 for (c = 0; c < 256; c++)
294 fprintf (Fp_out, "<tr><td>%3d</td><td>0x%02x</td><td>&#x%02x;</td><td>%s</td></tr>\n", c, c, c, cp_disp_class ((_byte_) c));
295 fprintf (Fp_out, "</table>\n");
296 fclose (Fp_out);
297 }
298
299 /// Display the lexical class of an XPath expression byte
cp_disp_class(_byte_ b_in)300 const char * cp_disp_class (_byte_ b_in)
301 {
302 return cp_disp_class_lex (lex_get_class (b_in));
303 }
304
305
306 #endif
307
308 }
309