1 /* 2 www.sourceforge.net/projects/tinyxpath 3 Copyright (c) 2002-2004 Yves Berquin (yvesb@users.sourceforge.net) 4 5 This software is provided 'as-is', without any express or implied 6 warranty. In no event will the authors be held liable for any 7 damages arising from the use of this software. 8 9 Permission is granted to anyone to use this software for any 10 purpose, including commercial applications, and to alter it and 11 redistribute it freely, subject to the following restrictions: 12 13 1. The origin of this software must not be misrepresented; you must 14 not claim that you wrote the original software. If you use this 15 software in a product, an acknowledgment in the product documentation 16 would be appreciated but is not required. 17 18 2. Altered source versions must be plainly marked as such, and 19 must not be misrepresented as being the original software. 20 21 3. This notice may not be removed or altered from any source 22 distribution. 23 */ 24 /** 25 \file xpath_stream.h 26 \author Yves Berquin 27 Specialized byte stream for the TinyXPath project 28 */ 29 30 #ifndef __TINYXPSTREAM_H 31 #define __TINYXPSTREAM_H 32 33 #include "lex_util.h" 34 #include "byte_stream.h" 35 #include "xpath_syntax.h" 36 #include "tinyxml.h" 37 #include "tinystr.h" 38 39 namespace TinyXPath 40 { 41 42 /** 43 A specialized version of byte_stream for XPath 44 */ 45 class xpath_stream : public byte_stream 46 { 47 protected : 48 /// List of tokens 49 token_syntax_decoder * tlp_list; 50 51 public : 52 /// constructor 53 xpath_stream (const char * cp_in); 54 /// destructor ~xpath_stream()55 virtual ~ xpath_stream () 56 { 57 delete tlp_list; 58 } 59 /// Decode the byte stream, and construct the lexical list v_lexico_decode()60 void v_lexico_decode () 61 { 62 enum {s_init, s_ncname, s_number, s_literal_1, s_literal_2, s_end} state; 63 lexico lex_new, lex_next; 64 unsigned u_size; 65 bool o_dot_in_number; 66 67 u_size = 0; 68 o_dot_in_number = false; 69 state = s_init; 70 while (state != s_end) 71 { 72 lex_next = lex_get_class (b_top ()); 73 switch (state) 74 { 75 case s_init : 76 switch (lex_next) 77 { 78 case lex_bchar : 79 case lex_under : 80 // [XML:4] NCName ::= (Letter | '_') (NCNameChar)* 81 82 u_size = 1; 83 state = s_ncname; 84 b_pop (); 85 break; 86 case lex_null : 87 state = s_end; 88 break; 89 case lex_digit : 90 u_size = 1; 91 state = s_number; 92 o_dot_in_number = false; 93 b_pop (); 94 break; 95 case lex_dot : 96 if (lex_get_class (b_forward (1)) == lex_digit) 97 { 98 // [30] Number ::= Digits ('.' Digits?)? | '.' Digits 99 // [31] Digits ::= [0-9]+ 100 u_size = 1; 101 state = s_number; 102 o_dot_in_number = true; 103 b_pop (); 104 } 105 else 106 { 107 tlp_list -> v_add_token (lex_next, bp_get_backward (1), 1); 108 b_pop (); 109 } 110 break; 111 112 case lex_1_quote : 113 // [29] Literal ::= '"' [^"]* '"' | "'" [^']* "'" 114 u_size = 0; 115 b_pop (); 116 state = s_literal_1; 117 break; 118 119 case lex_2_quote : 120 // [29] Literal ::= '"' [^"]* '"' | "'" [^']* "'" 121 u_size = 0; 122 b_pop (); 123 state = s_literal_2; 124 break; 125 126 default : 127 tlp_list -> v_add_token (lex_next, bp_get_backward (1), 1); 128 b_pop (); 129 break; 130 } 131 break; 132 case s_literal_1 : 133 // [29] Literal ::= '"' [^"]* '"' | "'" [^']* "'" 134 switch (lex_next) 135 { 136 case lex_1_quote : 137 tlp_list -> v_add_token (lex_literal, bp_get_backward (u_size + 1), u_size); 138 b_pop (); 139 state = s_init; 140 break; 141 default : 142 u_size++; 143 b_pop (); 144 break; 145 } 146 break; 147 case s_literal_2 : 148 // [29] Literal ::= '"' [^"]* '"' | "'" [^']* "'" 149 switch (lex_next) 150 { 151 case lex_2_quote : 152 tlp_list -> v_add_token (lex_literal, bp_get_backward (u_size + 1), u_size); 153 b_pop (); 154 state = s_init; 155 break; 156 default : 157 u_size++; 158 b_pop (); 159 break; 160 } 161 break; 162 case s_ncname : 163 switch (lex_next) 164 { 165 // [XML:5] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | CombiningChar | Extender 166 case lex_bchar : 167 case lex_digit : 168 case lex_dot : 169 case lex_minus : 170 case lex_under : 171 case lex_extend : 172 u_size++; 173 b_pop (); 174 break; 175 default : 176 lex_new = lex_test_id (bp_get_backward (u_size + 1), u_size, lex_next); 177 tlp_list -> v_add_token (lex_new, bp_get_backward (u_size + 1), u_size); 178 state = s_init; 179 break; 180 } 181 break; 182 case s_number : 183 switch (lex_next) 184 { 185 // [30] Number ::= Digits ('.' Digits?)? | '.' Digits 186 // [31] Digits ::= [0-9]+ 187 case lex_dot : 188 if (o_dot_in_number) 189 { 190 tlp_list -> v_add_token (lex_number, bp_get_backward (u_size + 1), u_size); 191 state = s_init; 192 } 193 else 194 { 195 o_dot_in_number = true; 196 u_size++; 197 b_pop (); 198 } 199 break; 200 case lex_digit : 201 u_size++; 202 b_pop (); 203 break; 204 default : 205 tlp_list -> v_add_token (lex_number, bp_get_backward (u_size + 1), u_size); 206 state = s_init; 207 break; 208 } 209 break; 210 } 211 if (lex_next == lex_null) 212 state = s_end; 213 } 214 } 215 216 /// Evaluate a XPath expression \n 217 /// Decodes the lexical and syntax contents. v_evaluate()218 void v_evaluate () 219 { 220 v_lexico_decode (); 221 tlp_list -> v_syntax_decode (); 222 } 223 224 /// Callback used by token_syntax_decoder::v_syntax_decode to notify of an action to be made. Pure virtual 225 virtual void v_action (xpath_construct , unsigned , unsigned , const char * ) = 0; 226 227 /// Callback used by token_syntax_decoder::v_syntax_decode to know the action counter position. Pure virtual 228 /// \n This can be any kind of nomenclature, provided that the redefinition is coherent 229 virtual int i_get_action_counter () = 0; 230 } ; // class xpath_stream 231 232 } 233 234 #endif 235