1 /*
2 www.sourceforge.net/projects/tinyxpath
3 Copyright (c) 2002-2004 Yves Berquin (yvesb@users.sourceforge.net)
4 
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any
7 damages arising from the use of this software.
8 
9 Permission is granted to anyone to use this software for any
10 purpose, including commercial applications, and to alter it and
11 redistribute it freely, subject to the following restrictions:
12 
13 1. The origin of this software must not be misrepresented; you must
14 not claim that you wrote the original software. If you use this
15 software in a product, an acknowledgment in the product documentation
16 would be appreciated but is not required.
17 
18 2. Altered source versions must be plainly marked as such, and
19 must not be misrepresented as being the original software.
20 
21 3. This notice may not be removed or altered from any source
22 distribution.
23 */
24 
25 /**
26    \file tinyutil.cpp
27    \author Yves Berquin
28    Miscellaneous utilities for the TinyXPath project
29 */
30 
31 #include "stdafx.h"
32 #include <string.h>
33 #include <stdio.h>
34 
35 #include "lex_util.h"
36 
37 namespace TinyXPath
38 {
39 
40 /// Mapping of all the byte values into elementary lexical items
41 static lexico lex_char_map [256] =
42  {
43    /*                      0            1            2            3            4            5            6            7   */
44    /*                       8            9            a            b            c            d            e            f  */
45    /* 00 .. 07 */    lex_null,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,
46    /* 08 .. 0f */    lex_none,   lex_space,   lex_space,    lex_none,    lex_none,   lex_space,    lex_none,    lex_none,
47    /* 10 .. 17 */    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,
48    /* 18 .. 1f */    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,
49    /* 20 .. 27 */   lex_space,  lex_exclam, lex_2_quote,    lex_none,  lex_dollar,    lex_none,    lex_none, lex_1_quote,
50    /* 28 .. 2f */  lex_oparen,  lex_cparen,    lex_star,    lex_plus,   lex_comma,   lex_minus,     lex_dot,   lex_slash,
51    /* 30 .. 37 */   lex_digit,   lex_digit,   lex_digit,   lex_digit,   lex_digit,   lex_digit,   lex_digit,   lex_digit,
52    /* 38 .. 3f */   lex_digit,   lex_digit,   lex_colon,  lex_scolon,      lex_lt,   lex_equal,      lex_gt,    lex_none,
53    /* 40 .. 47 */      lex_at,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,
54    /* 48 .. 4f */   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,
55    /* 50 .. 57 */   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,
56    /* 58 .. 5f */   lex_bchar,   lex_bchar,   lex_bchar,  lex_obrack,    lex_none,  lex_cbrack,    lex_none,   lex_under,
57    /* 60 .. 67 */    lex_none,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,
58    /* 68 .. 6f */   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,
59    /* 70 .. 77 */   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,
60    /* 78 .. 7f */   lex_bchar,   lex_bchar,   lex_bchar,    lex_none,  lex_orchar,    lex_none,    lex_none,    lex_none,
61    /* 80 .. 87 */    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,
62    /* 88 .. 8f */    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,
63    /* 90 .. 97 */    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,
64    /* 98 .. 9f */    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,
65    /* a0 .. a7 */    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,
66    /* a8 .. af */    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,
67    /* b0 .. b7 */    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,  lex_extend,
68    /* b8 .. bf */    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,    lex_none,
69    /* c0 .. c7 */   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,
70    /* c8 .. cf */   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,
71    /* d0 .. d7 */   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,    lex_none,
72    /* d8 .. df */   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,
73    /* e0 .. e7 */   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,
74    /* e8 .. ef */   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,
75    /* f0 .. f7 */   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,    lex_none,
76    /* f8 .. ff */   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar,   lex_bchar
77  };
78 
79 /// Dump a lexical element
cp_disp_class_lex(lexico lex_in)80 const char * cp_disp_class_lex (lexico lex_in)
81 {
82    switch (lex_in)
83    {
84       case lex_none :                     return "<small>none</small>";
85       case lex_null :                     return "null";
86       case lex_digit :                    return "digit";
87       case lex_bchar :                    return "base char";
88       case lex_space :                    return "space";
89       case lex_extend :                   return "extender";
90       case lex_slash :                    return "slash";
91       case lex_at :                       return "at";
92       case lex_dot :                      return "dot";
93       case lex_minus :                    return "minus";
94       case lex_under :                    return "under";
95       case lex_colon :                    return "colon";
96       case lex_scolon :                   return "semi colon";
97       case lex_2_quote :                  return "double quote";
98       case lex_1_quote :                  return "simple quote";
99       case lex_dollar :                   return "dollar";
100       case lex_oparen :                   return "opening parenthesis";
101       case lex_cparen :                   return "closing parenthesis";
102       case lex_star :                     return "star";
103       case lex_plus :                     return "plus";
104       case lex_comma :                    return "comma";
105       case lex_lt :                       return "less than";
106       case lex_equal :                    return "equal";
107       case lex_gt :                       return "greater than";
108       case lex_obrack :                   return "opening bracket";
109       case lex_cbrack :                   return "closing bracket";
110       case lex_orchar :                   return "or character (|)";
111       case lex_exclam :                   return "exclamation (!)";
112       case lex_2_colon :                  return "::";
113       case lex_2_slash :                  return "//";
114       case lex_2_dot :                    return "..";
115       case lex_not_equal :                return "!=";
116       case lex_lt_equal :                 return "<=";
117       case lex_gt_equal :                 return ">=";
118 
119       case lex_ncname :                   return "NCName";
120       case lex_number :                   return "Number";
121       case lex_literal :                  return "literal";
122       case lex_or :                       return "or";
123       case lex_and :                      return "and";
124       case lex_mod :                      return "mod";
125       case lex_div :                      return "div";
126       case lex_ancestor :                 return "ancestor";
127       case lex_ancestor_or_self :         return "ancestor-or-self";
128       case lex_attribute :                return "attribute";
129       case lex_child :                    return "child";
130       case lex_descendant :               return "descendant";
131       case lex_descendant_or_self :       return "descendant-or-self";
132       case lex_following :                return "following";
133       case lex_following_sibling :        return "following-sibling";
134       case lex_namespace :                return "namespace";
135       case lex_parent :                   return "parent";
136       case lex_preceding :                return "preceding";
137       case lex_preceding_sibling :        return "preceding-sibling";
138       case lex_self :                     return "self";
139       case lex_processing_instruction :   return "processing-instruction";
140       case lex_comment :                  return "comment";
141       case lex_node :                     return "node";
142       case lex_text :                     return "text";
143 
144    }
145    return "???";
146 }
147 
148 /// Get the lexical class of an XPath expression byte
lex_get_class(_byte_ b_in)149 lexico lex_get_class (_byte_ b_in)
150 {
151    return lex_char_map [b_in];
152 }
153 
154 /// Check if a lexical element can be an axis name
o_is_axis_name(lexico lex_test)155 bool o_is_axis_name (lexico lex_test)
156 {
157    if (lex_test >= lex_start_axis_name && lex_test <= lex_end_axis_name)
158       return true;
159    return false;
160 }
161 
162 /// Check if an ID maps an existing keyword
163 /// \n Returns the new lexical element or lex_ncname if not found
lex_test_id(const _byte_ * bp_str,unsigned u_size,lexico lex_next)164 lexico lex_test_id (const _byte_ * bp_str, unsigned u_size, lexico lex_next)
165 {
166    char * cp_equi;
167    unsigned u_lex;
168 
169    cp_equi = new char [u_size + 1];
170    memcpy (cp_equi, bp_str, u_size);
171    cp_equi [u_size] = 0;
172    for (u_lex = lex_start_keyword; u_lex <= lex_end_keyword; u_lex++)
173       if (! strcmp (cp_equi, cp_disp_class_lex (lexico (u_lex))))
174          // DO not recognize the text keyword unless it is followed by an open parenthesis
175          if ((u_lex != lex_text) || (lex_next == lex_oparen))
176          {
177             delete [] cp_equi;
178             return lexico (u_lex);
179          }
180    delete [] cp_equi;
181    return lex_ncname;
182 }
183 
184 /// Removes all leading and trailing white spaces
S_remove_lead_trail(const char * cp_in)185 TIXML_STRING S_remove_lead_trail (const char * cp_in)
186 {
187 	TIXML_STRING S_ret;
188 	const char * cp_start, * cp_end;
189 	char * cp_new, * cp_out;
190 
191    // result can't be longer. we assign same size
192 	cp_new = new char [strlen (cp_in) + 1];
193    cp_out = cp_new;
194 	cp_start = cp_in;
195 	while (* cp_start == ' ' || * cp_start == '\t')
196 		cp_start++;
197 	cp_end = cp_in + strlen (cp_in) - 1;
198 	while (cp_end >= cp_in && (* cp_end == ' ' || * cp_end == '\t'))
199 		cp_end--;
200    while (cp_start <= cp_end)
201    {
202       if (* cp_start == ' ' || * cp_start == '\t')
203       {
204          * cp_out ++ = ' ';
205          cp_start++;
206          while (cp_start <= cp_end && (* cp_start == ' ' || * cp_start == '\t'))
207             cp_start++;
208       }
209       else
210          * cp_out++ = * cp_start++;
211    }
212    * cp_out = 0;
213 	S_ret = cp_new;
214 	delete [] cp_new;
215 	return S_ret;
216 }
217 
218 /// Assign an integer to a string
v_assign_int_to_string(TIXML_STRING & S_string,int i_val)219 void v_assign_int_to_string (TIXML_STRING & S_string, int i_val)
220 {
221    char ca_int [80];
222 
223    sprintf (ca_int, "%d", i_val);
224    S_string = ca_int;
225 }
226 
227 /// Assign a double to a string, cleaning any trailing zeroes and the decimal point if there's no
228 /// decimal part
v_assign_double_to_string(TIXML_STRING & S_string,double d_val)229 void v_assign_double_to_string (TIXML_STRING & S_string, double d_val)
230 {
231    char ca_int [80];
232 
233    sprintf (ca_int, "%f", d_val);
234    while (ca_int [strlen (ca_int) - 1] == '0')
235       ca_int [strlen (ca_int) - 1] = 0;
236    if (ca_int [strlen (ca_int) - 1] == '.')
237       ca_int [strlen (ca_int) - 1] = 0;
238    S_string = ca_int;
239 }
240 
241 #ifdef TINYXPATH_DEBUG
242    /// Return the name of an xpath_construct enumerated
cp_disp_construct(xpath_construct xc)243    const char * cp_disp_construct (xpath_construct xc)
244    {
245       switch (xc)
246       {
247          case xpath_unknown : return "xpath_unknown";
248          case xpath_location_path : return "xpath_location_path";
249          case xpath_absolute_location_path : return "xpath_absolute_location_path";
250          case xpath_relative_location_path : return "xpath_relative_location_path";
251          case xpath_step : return "xpath_step";
252          case xpath_axis_specifier : return "xpath_axis_specifier";
253          case xpath_axis_name : return "xpath_axis_name";
254          case xpath_node_test : return "xpath_node_test";
255          case xpath_predicate : return "xpath_predicate";
256          case xpath_predicate_expr : return "xpath_predicate_expr";
257          case xpath_abbreviated_absolute_location_path : return "xpath_abbreviated_absolute_location_path";
258          case xpath_abbrieviated_step : return "xpath_abbrieviated_step";
259          case xpath_abbreviated_axis_specifier : return "xpath_abbreviated_axis_specifier";
260          case xpath_expr : return "xpath_expr";
261          case xpath_primary_expr : return "xpath_primary_expr";
262          case xpath_function_call : return "xpath_function_call";
263          case xpath_argument : return "xpath_argument";
264          case xpath_union_expr : return "xpath_union_expr";
265          case xpath_path_expr : return "xpath_path_expr";
266          case xpath_filter_expr : return "xpath_filter_expr";
267          case xpath_or_expr : return "xpath_or_expr";
268          case xpath_or_expr_more : return "xpath_or_expr_more";
269          case xpath_and_expr : return "xpath_and_expr";
270          case xpath_equality_expr : return "xpath_equality_expr";
271          case xpath_relational_expr : return "xpath_relational_expr";
272          case xpath_additive_expr : return "xpath_additive_expr";
273          case xpath_multiplicative_expr : return "xpath_multiplicative_expr";
274          case xpath_unary_expr : return "xpath_unary_expr";
275          case xpath_multiply_operator : return "xpath_multiply_operator";
276          case xpath_variable_reference : return "xpath_variable_reference";
277          case xpath_name_test : return "xpath_name_test";
278          case xpath_xml_q_name : return "xpath_xml_q_name";
279          case xpath_xml_prefix : return "xpath_xml_prefix";
280          case xpath_xml_local_part : return "xpath_xml_local_part";
281       }
282       return "";
283    }
284 
285    /// Generates an ascii table summarizing all possible bytes and their XPath properties
v_generate_ascii_htm()286    void v_generate_ascii_htm ()
287    {
288       int c;
289       FILE * Fp_out;
290       Fp_out = fopen ("ascii.htm", "wt");
291       fprintf (Fp_out, "<html><head><title>ASCII</title></head><body>\n");
292       fprintf (Fp_out, "<table border=1><tr><th>dec</th><th>hex</th><th>char</th><th>class</th></tr>\n");
293       for (c = 0; c < 256; c++)
294          fprintf (Fp_out, "<tr><td>%3d</td><td>0x%02x</td><td>&#x%02x;</td><td>%s</td></tr>\n", c, c, c, cp_disp_class ((_byte_) c));
295       fprintf (Fp_out, "</table>\n");
296       fclose (Fp_out);
297    }
298 
299    /// Display the lexical class of an XPath expression byte
cp_disp_class(_byte_ b_in)300    const char * cp_disp_class (_byte_ b_in)
301    {
302       return cp_disp_class_lex (lex_get_class (b_in));
303    }
304 
305 
306 #endif
307 
308 }
309