1 //  NREX: Node RegEx
2 //  Version 0.2
3 //
4 //  Copyright (c) 2015-2016, Zher Huei Lee
5 //  All rights reserved.
6 //
7 //  This software is provided 'as-is', without any express or implied
8 //  warranty.  In no event will the authors be held liable for any damages
9 //  arising from the use of this software.
10 //
11 //  Permission is granted to anyone to use this software for any purpose,
12 //  including commercial applications, and to alter it and redistribute it
13 //  freely, subject to the following restrictions:
14 //
15 //   1. The origin of this software must not be misrepresented; you must not
16 //      claim that you wrote the original software. If you use this software
17 //      in a product, an acknowledgment in the product documentation would
18 //      be appreciated but is not required.
19 //
20 //   2. Altered source versions must be plainly marked as such, and must not
21 //      be misrepresented as being the original software.
22 //
23 //   3. This notice may not be removed or altered from any source
24 //      distribution.
25 //
26 
27 #ifndef NREX_HPP
28 #define NREX_HPP
29 
30 #include "nrex_config.h"
31 
32 #ifdef NREX_UNICODE
33 typedef wchar_t nrex_char;
34 #else
35 typedef char nrex_char;
36 #endif
37 
38 /*!
39  * \brief Struct to contain the range of a capture result
40  *
41  * The range provided is relative to the begining of the searched string.
42  *
43  * \see nrex_node::match()
44  */
45 struct nrex_result
46 {
47     public:
48         int start; /*!< Start of text range */
49         int length; /*!< Length of text range */
50 };
51 
52 class nrex_node;
53 
54 /*!
55  * \brief Holds the compiled regex pattern
56  */
57 class nrex
58 {
59     private:
60         unsigned int _capturing;
61         unsigned int _lookahead_depth;
62         nrex_node* _root;
63     public:
64 
65         /*!
66          * \brief Initialises an empty regex container
67          */
68         nrex();
69 
70         /*!
71          * \brief Initialises and compiles the regex pattern
72          *
73          * This calls nrex::compile() with the same arguments. To check whether
74          * the compilation was successfull, use nrex::valid().
75          *
76          * If the NREX_THROW_ERROR was defined it would automatically throw a
77          * runtime error nrex_compile_error if it encounters a problem when
78          * parsing the pattern.
79          *
80          * \param pattern   The regex pattern
81          * \param captures  The maximum number of capture groups to allow. Any
82          *                  extra would be converted to non-capturing groups.
83          *                  If negative, no limit would be imposed. Defaults
84          *                  to 9.
85          *
86          * \see nrex::compile()
87          */
88         nrex(const nrex_char* pattern, int captures = 9);
89 
90         ~nrex();
91 
92         /*!
93          * \brief Removes the compiled regex and frees up the memory
94          */
95         void reset();
96 
97         /*!
98          * \brief Checks if there is a compiled regex being stored
99          * \return True if present, False if not present
100          */
101         bool valid() const;
102 
103         /*!
104          * \brief Provides number of captures the compiled regex uses
105          *
106          * This is used to provide the array size of the captures needed for
107          * nrex::match() to work. The size is actually the number of capture
108          * groups + one for the matching of the entire pattern. This can be
109          * capped using the extra argument given in nrex::compile()
110          * (default 10).
111          *
112          * \return The number of captures
113          */
114         int capture_size() const;
115 
116         /*!
117          * \brief Compiles the provided regex pattern
118          *
119          * This automatically removes the existing compiled regex if already
120          * present.
121          *
122          * If the NREX_THROW_ERROR was defined it would automatically throw a
123          * runtime error nrex_compile_error if it encounters a problem when
124          * parsing the pattern.
125          *
126          * \param pattern   The regex pattern
127          * \param captures  The maximum number of capture groups to allow. Any
128          *                  extra would be converted to non-capturing groups.
129          *                  If negative, no limit would be imposed. Defaults
130          *                  to 9.
131          * \return True if the pattern was succesfully compiled
132          */
133         bool compile(const nrex_char* pattern, int captures = 9);
134 
135         /*!
136          * \brief Uses the pattern to search through the provided string
137          * \param str       The text to search through. It only needs to be
138          *                  null terminated if the end point is not provided.
139          *                  This also determines the starting anchor.
140          * \param captures  The array of results to store the capture results.
141          *                  The size of that array needs to be the same as the
142          *                  size given in nrex::capture_size(). As it matches
143          *                  the function fills the array with the results. 0 is
144          *                  the result for the entire pattern, 1 and above
145          *                  corresponds to the regex capture group if present.
146          * \param offset    The starting point of the search. This does not move
147          *                  the starting anchor. Defaults to 0.
148          * \param end       The end point of the search. This also determines
149          *                  the ending anchor. If a number less than the offset
150          *                  is provided, the search would be done until null
151          *                  termination. Defaults to -1.
152          * \return          True if a match was found. False otherwise.
153          */
154         bool match(const nrex_char* str, nrex_result* captures, int offset = 0, int end = -1) const;
155 };
156 
157 #ifdef NREX_THROW_ERROR
158 
159 #include <stdexcept>
160 
161 class nrex_compile_error : std::runtime_error
162 {
163     public:
nrex_compile_error(const char * message)164         nrex_compile_error(const char* message)
165             : std::runtime_error(message)
166         {
167         }
168 
~nrex_compile_error()169         ~nrex_compile_error() throw()
170         {
171         }
172 };
173 
174 #endif
175 
176 #endif // NREX_HPP
177