1 /*
2 www.sourceforge.net/projects/tinyxpath
3 Copyright (c) 2002-2004 Yves Berquin (yvesb@users.sourceforge.net)
4 
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any
7 damages arising from the use of this software.
8 
9 Permission is granted to anyone to use this software for any
10 purpose, including commercial applications, and to alter it and
11 redistribute it freely, subject to the following restrictions:
12 
13 1. The origin of this software must not be misrepresented; you must
14 not claim that you wrote the original software. If you use this
15 software in a product, an acknowledgment in the product documentation
16 would be appreciated but is not required.
17 
18 2. Altered source versions must be plainly marked as such, and
19 must not be misrepresented as being the original software.
20 
21 3. This notice may not be removed or altered from any source
22 distribution.
23 */
24 /**
25    \file xpath_stream.h
26    \author Yves Berquin
27    Specialized byte stream for the TinyXPath project
28 */
29 
30 #ifndef __TINYXPSTREAM_H
31 #define __TINYXPSTREAM_H
32 
33 #include "lex_util.h"
34 #include "byte_stream.h"
35 #include "xpath_syntax.h"
36 #include "tinyxml.h"
37 #include "tinystr.h"
38 
39 namespace TinyXPath
40 {
41 
42 /**
43    A specialized version of byte_stream for XPath
44 */
45 class xpath_stream : public byte_stream
46 {
47 protected :
48    /// List of tokens
49    token_syntax_decoder * tlp_list;
50 
51 public :
52    /// constructor
53    xpath_stream (const char * cp_in);
54    /// destructor
~xpath_stream()55    virtual ~ xpath_stream ()
56    {
57       delete tlp_list;
58    }
59    /// Decode the byte stream, and construct the lexical list
v_lexico_decode()60    void v_lexico_decode ()
61    {
62       enum {s_init, s_ncname, s_number, s_literal_1, s_literal_2, s_end} state;
63       lexico lex_new, lex_next;
64       unsigned u_size;
65       bool o_dot_in_number;
66 
67       u_size = 0;
68       o_dot_in_number = false;
69       state = s_init;
70       while (state != s_end)
71       {
72          lex_next = lex_get_class (b_top ());
73          switch (state)
74          {
75             case s_init :
76                switch (lex_next)
77                {
78                   case lex_bchar :
79                   case lex_under :
80                      // [XML:4] NCName	::= (Letter | '_') (NCNameChar)*
81 
82                      u_size = 1;
83                      state = s_ncname;
84                      b_pop ();
85                      break;
86                   case lex_null :
87                      state = s_end;
88                      break;
89                   case lex_digit :
90                      u_size = 1;
91                      state = s_number;
92                      o_dot_in_number = false;
93                      b_pop ();
94                      break;
95                   case lex_dot :
96                      if (lex_get_class (b_forward (1)) == lex_digit)
97                      {
98                         // [30]   Number				::=   Digits ('.' Digits?)? | '.' Digits
99                         // [31]   Digits				::=   [0-9]+
100                         u_size = 1;
101                         state = s_number;
102                         o_dot_in_number = true;
103                         b_pop ();
104                      }
105                      else
106                      {
107                         tlp_list -> v_add_token (lex_next, bp_get_backward (1), 1);
108                         b_pop ();
109                      }
110                      break;
111 
112                   case lex_1_quote :
113                      // [29]   Literal				::=   '"' [^"]* '"' | "'" [^']* "'"
114                      u_size = 0;
115                      b_pop ();
116                      state = s_literal_1;
117                      break;
118 
119                   case lex_2_quote :
120                      // [29]   Literal				::=   '"' [^"]* '"' | "'" [^']* "'"
121                      u_size = 0;
122                      b_pop ();
123                      state = s_literal_2;
124                      break;
125 
126                   default :
127                      tlp_list -> v_add_token (lex_next, bp_get_backward (1), 1);
128                      b_pop ();
129                      break;
130                }
131                break;
132             case s_literal_1 :
133                // [29]   Literal				::=   '"' [^"]* '"' | "'" [^']* "'"
134                switch (lex_next)
135                {
136                   case lex_1_quote :
137                      tlp_list -> v_add_token (lex_literal, bp_get_backward (u_size + 1), u_size);
138                      b_pop ();
139                      state = s_init;
140                      break;
141                   default :
142                      u_size++;
143                      b_pop ();
144                      break;
145                }
146                break;
147             case s_literal_2 :
148                // [29]   Literal				::=   '"' [^"]* '"' | "'" [^']* "'"
149                switch (lex_next)
150                {
151                   case lex_2_quote :
152                      tlp_list -> v_add_token (lex_literal, bp_get_backward (u_size + 1), u_size);
153                      b_pop ();
154                      state = s_init;
155                      break;
156                   default :
157                      u_size++;
158                      b_pop ();
159                      break;
160                }
161                break;
162             case s_ncname :
163                switch (lex_next)
164                {
165                   // [XML:5] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | CombiningChar | Extender
166                   case lex_bchar :
167                   case lex_digit :
168                   case lex_dot :
169                   case lex_minus :
170                   case lex_under :
171                   case lex_extend :
172                      u_size++;
173                      b_pop ();
174                      break;
175                   default :
176                      lex_new = lex_test_id (bp_get_backward (u_size + 1), u_size, lex_next);
177                      tlp_list -> v_add_token (lex_new, bp_get_backward (u_size + 1), u_size);
178                      state = s_init;
179                      break;
180                }
181                break;
182             case s_number :
183                switch (lex_next)
184                {
185                   // [30]   Number				::=   Digits ('.' Digits?)? | '.' Digits
186                   // [31]   Digits				::=   [0-9]+
187                   case lex_dot :
188                      if (o_dot_in_number)
189                      {
190                         tlp_list -> v_add_token (lex_number, bp_get_backward (u_size + 1), u_size);
191                         state = s_init;
192                      }
193                      else
194                      {
195                         o_dot_in_number = true;
196                         u_size++;
197                         b_pop ();
198                      }
199                      break;
200                   case lex_digit :
201                      u_size++;
202                      b_pop ();
203                      break;
204                   default :
205                      tlp_list -> v_add_token (lex_number, bp_get_backward (u_size + 1), u_size);
206                      state = s_init;
207                      break;
208                }
209                break;
210          }
211          if (lex_next == lex_null)
212             state = s_end;
213       }
214    }
215 
216    /// Evaluate a XPath expression \n
217    /// Decodes the lexical and syntax contents.
v_evaluate()218    void v_evaluate ()
219    {
220       v_lexico_decode ();
221       tlp_list -> v_syntax_decode ();
222    }
223 
224    /// Callback used by token_syntax_decoder::v_syntax_decode to notify of an action to be made. Pure virtual
225    virtual void v_action (xpath_construct , unsigned , unsigned , const char * ) = 0;
226 
227    /// Callback used by token_syntax_decoder::v_syntax_decode to know the action counter position. Pure virtual
228    /// \n This can be any kind of nomenclature, provided that the redefinition is coherent
229    virtual int i_get_action_counter () = 0;
230 } ;     // class xpath_stream
231 
232 }
233 
234 #endif
235