1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6  */
7 
8 #ifndef INCLUDED_ORCUS_PARSER_BASE_HPP
9 #define INCLUDED_ORCUS_PARSER_BASE_HPP
10 
11 #include "orcus/env.hpp"
12 #include "orcus/exception.hpp"
13 
14 #include <string>
15 #include <cstdlib>
16 #include <cstddef>
17 #include <cassert>
18 #include <functional>
19 
20 namespace orcus {
21 
22 /**
23  * Exception related to parsing error that includes the offset in the stream
24  * where the error occurred.
25  */
26 class ORCUS_PSR_DLLPUBLIC parse_error : public general_error
27 {
28     std::ptrdiff_t m_offset;  /// offset in the stream where the error occurred.
29 protected:
30     parse_error(const std::string& msg, std::ptrdiff_t offset);
31     parse_error(const std::string& cls, const std::string& msg, std::ptrdiff_t offset);
32 
33     static std::string build_message(const char* msg_before, char c, const char* msg_after);
34     static std::string build_message(const char* msg_before, const char* p, size_t n, const char* msg_after);
35 
36 public:
37     std::ptrdiff_t offset() const;
38 };
39 
40 class ORCUS_PSR_DLLPUBLIC parser_base
41 {
42 protected:
43     using numeric_parser_type = std::function<double(const char*&, size_t)>;
44 
45     const char* const mp_begin;
46     const char* mp_char;
47     const char* mp_end;
48     const bool m_transient_stream;
49 
50 private:
51     std::function<double(const char*&, size_t)> m_func_parse_numeric;
52 
53 protected:
54     parser_base(const char* p, size_t n, bool transient_stream);
55 
set_numeric_parser(const numeric_parser_type & func)56     void set_numeric_parser(const numeric_parser_type& func)
57     {
58         m_func_parse_numeric = func;
59     }
60 
transient_stream() const61     bool transient_stream() const { return m_transient_stream; }
62 
has_char() const63     bool has_char() const
64     {
65         assert(mp_char <= mp_end);
66         return mp_char != mp_end;
67     }
68 
has_next() const69     bool has_next() const
70     {
71         assert((mp_char+1) <= mp_end);
72         return (mp_char+1) != mp_end;
73     }
74 
next(size_t inc=1)75     void next(size_t inc=1) { mp_char += inc; }
76 
77     void prev(size_t dec=1);
78 
cur_char() const79     char cur_char() const { return *mp_char; }
80 
81     char next_char() const;
82 
83     void skip(const char* chars_to_skip, size_t n_chars_to_skip);
84 
85     /**
86      * Skip all characters that are 0-32 in ASCII range
87      */
88     void skip_space_and_control();
89 
90     /**
91      * Parse and check next characters to see if it matches specified
92      * character sequence.
93      *
94      * @param expected sequence of characters to match against.
95      * @param n_expected length of the character sequence.
96      *
97      * @return true if it matches specified character sequence, false
98      *         otherwise.
99      */
100     bool parse_expected(const char* expected, size_t n_expected);
101 
102     /**
103      * Try to parse the next characters as double, or return NaN in case of
104      * failure.
105      *
106      * @return double value on success, or NaN on failure.
107      */
108     double parse_double();
109 
110     /**
111      * Determine the number of characters remaining <strong>after</strong> the
112      * current character.  For instance, if the current character is on the
113      * last character in the stream, this method will return 0, whereas if
114      * it's on the first character, it will return the total length - 1.
115      *
116      * @return number of characters remaining after the current character.
117      */
118     size_t remaining_size() const;
119 
120     /**
121      * Determine the number of characters available from the current character
122      * to the end of the buffer.  The current character is included.
123      *
124      * @return number of characters available including the current character.
125      */
available_size() const126     size_t available_size() const
127     {
128         return std::distance(mp_char, mp_end);
129     }
130 
131     /**
132      * Return the current offset from the beginning of the character stream.
133      *
134      * @return current offset from the beginning of the character stream.
135      */
136     std::ptrdiff_t offset() const;
137 };
138 
139 }
140 
141 #endif
142 
143 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
144