1 /** 2 * Copyright (c) 2007-2012, Timothy Stack 3 * 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * * Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 * * Neither the name of Timothy Stack nor the names of its contributors 15 * may be used to endorse or promote products derived from this software 16 * without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY 19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY 22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #ifndef data_scanner_hh 31 #define data_scanner_hh 32 33 #include <string> 34 35 #include "pcrepp/pcrepp.hh" 36 #include "shared_buffer.hh" 37 38 enum data_token_t { 39 DT_INVALID = -1, 40 41 DT_QUOTED_STRING = 0, 42 DT_URL, 43 DT_PATH, 44 DT_MAC_ADDRESS, 45 DT_DATE, 46 DT_TIME, 47 DT_IPV6_ADDRESS, 48 DT_HEX_DUMP, 49 DT_XML_EMPTY_TAG, 50 DT_XML_OPEN_TAG, 51 DT_XML_CLOSE_TAG, 52 /* DT_QUALIFIED_NAME, */ 53 54 DT_COLON, 55 DT_EQUALS, 56 DT_COMMA, 57 DT_SEMI, 58 59 DT_EMPTY_CONTAINER, 60 61 DT_LCURLY, 62 DT_RCURLY, 63 64 DT_LSQUARE, 65 DT_RSQUARE, 66 67 DT_LPAREN, 68 DT_RPAREN, 69 70 DT_LANGLE, 71 DT_RANGLE, 72 73 DT_IPV4_ADDRESS, 74 DT_UUID, 75 76 DT_VERSION_NUMBER, 77 DT_OCTAL_NUMBER, 78 DT_PERCENTAGE, 79 DT_NUMBER, 80 DT_HEX_NUMBER, 81 82 DT_EMAIL, 83 DT_CONSTANT, 84 DT_WORD, 85 DT_SYMBOL, 86 DT_LINE, 87 DT_WHITE, 88 DT_DOT, 89 90 DT_GARBAGE, 91 92 DT_TERMINAL_MAX = DT_GARBAGE + 1, 93 94 DNT_KEY = 50, 95 DNT_PAIR, 96 DNT_VALUE, 97 DNT_ROW, 98 DNT_UNITS, 99 DNT_MEASUREMENT, 100 DNT_VARIABLE_KEY, 101 DNT_ROWRANGE, 102 DNT_DATE_TIME, 103 DNT_GROUP, 104 105 DNT_MAX, 106 107 DT_ANY = 100, 108 }; 109 110 class data_scanner { 111 public: 112 static const char *token2name(data_token_t token); 113 data_scanner(const std::string & line,size_t off=0,size_t len=(size_t)-1)114 data_scanner(const std::string &line, size_t off = 0, size_t len = (size_t) -1) 115 : ds_line(line), 116 ds_pcre_input(ds_line.c_str(), off, len) 117 { 118 if (!line.empty() && line[line.length() - 1] == '.') { 119 this->ds_pcre_input.pi_length -= 1; 120 } 121 }; 122 data_scanner(shared_buffer_ref & line,size_t off=0,size_t len=(size_t)-1)123 data_scanner(shared_buffer_ref &line, size_t off = 0, size_t len = (size_t) -1) 124 : ds_sbr(line), ds_pcre_input(line.get_data(), off, len == (size_t) -1 ? line.length() : len) 125 { 126 require(len == (size_t) -1 || len <= line.length()); 127 if (line.length() > 0 && line.get_data()[line.length() - 1] == '.') { 128 this->ds_pcre_input.pi_length -= 1; 129 } 130 }; 131 132 bool tokenize(pcre_context &pc, data_token_t &token_out); 133 bool tokenize2(pcre_context &pc, data_token_t &token_out); 134 get_input()135 pcre_input &get_input() { return this->ds_pcre_input; }; 136 reset()137 void reset() { 138 this->ds_pcre_input.reset_next_offset(); 139 }; 140 141 private: 142 std::string ds_line; 143 shared_buffer_ref ds_sbr; 144 pcre_input ds_pcre_input; 145 }; 146 147 #endif 148