1 /**
2  * Copyright (c) 2007-2012, Timothy Stack
3  *
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * * Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * * Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  * * Neither the name of Timothy Stack nor the names of its contributors
15  * may be used to endorse or promote products derived from this software
16  * without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #ifndef data_scanner_hh
31 #define data_scanner_hh
32 
33 #include <string>
34 
35 #include "pcrepp/pcrepp.hh"
36 #include "shared_buffer.hh"
37 
38 enum data_token_t {
39     DT_INVALID       = -1,
40 
41     DT_QUOTED_STRING = 0,
42     DT_URL,
43     DT_PATH,
44     DT_MAC_ADDRESS,
45     DT_DATE,
46     DT_TIME,
47     DT_IPV6_ADDRESS,
48     DT_HEX_DUMP,
49     DT_XML_EMPTY_TAG,
50     DT_XML_OPEN_TAG,
51     DT_XML_CLOSE_TAG,
52     /* DT_QUALIFIED_NAME, */
53 
54     DT_COLON,
55     DT_EQUALS,
56     DT_COMMA,
57     DT_SEMI,
58 
59     DT_EMPTY_CONTAINER,
60 
61     DT_LCURLY,
62     DT_RCURLY,
63 
64     DT_LSQUARE,
65     DT_RSQUARE,
66 
67     DT_LPAREN,
68     DT_RPAREN,
69 
70     DT_LANGLE,
71     DT_RANGLE,
72 
73     DT_IPV4_ADDRESS,
74     DT_UUID,
75 
76     DT_VERSION_NUMBER,
77     DT_OCTAL_NUMBER,
78     DT_PERCENTAGE,
79     DT_NUMBER,
80     DT_HEX_NUMBER,
81 
82     DT_EMAIL,
83     DT_CONSTANT,
84     DT_WORD,
85     DT_SYMBOL,
86     DT_LINE,
87     DT_WHITE,
88     DT_DOT,
89 
90     DT_GARBAGE,
91 
92     DT_TERMINAL_MAX = DT_GARBAGE + 1,
93 
94     DNT_KEY         = 50,
95     DNT_PAIR,
96     DNT_VALUE,
97     DNT_ROW,
98     DNT_UNITS,
99     DNT_MEASUREMENT,
100     DNT_VARIABLE_KEY,
101     DNT_ROWRANGE,
102     DNT_DATE_TIME,
103     DNT_GROUP,
104 
105     DNT_MAX,
106 
107     DT_ANY = 100,
108 };
109 
110 class data_scanner {
111 public:
112     static const char *token2name(data_token_t token);
113 
data_scanner(const std::string & line,size_t off=0,size_t len=(size_t)-1)114     data_scanner(const std::string &line, size_t off = 0, size_t len = (size_t) -1)
115         : ds_line(line),
116           ds_pcre_input(ds_line.c_str(), off, len)
117     {
118         if (!line.empty() && line[line.length() - 1] == '.') {
119             this->ds_pcre_input.pi_length -= 1;
120         }
121     };
122 
data_scanner(shared_buffer_ref & line,size_t off=0,size_t len=(size_t)-1)123     data_scanner(shared_buffer_ref &line, size_t off = 0, size_t len = (size_t) -1)
124         : ds_sbr(line), ds_pcre_input(line.get_data(), off, len == (size_t) -1 ? line.length() : len)
125     {
126         require(len == (size_t) -1 || len <= line.length());
127         if (line.length() > 0 && line.get_data()[line.length() - 1] == '.') {
128             this->ds_pcre_input.pi_length -= 1;
129         }
130     };
131 
132     bool tokenize(pcre_context &pc, data_token_t &token_out);
133     bool tokenize2(pcre_context &pc, data_token_t &token_out);
134 
get_input()135     pcre_input &get_input() { return this->ds_pcre_input; };
136 
reset()137     void reset() {
138         this->ds_pcre_input.reset_next_offset();
139     };
140 
141 private:
142     std::string ds_line;
143     shared_buffer_ref ds_sbr;
144     pcre_input ds_pcre_input;
145 };
146 
147 #endif
148