1 #if !defined(PQXX_ARRAY_COMPOSITE_HXX)
2 # define PQXX_ARRAY_COMPOSITE_HXX
3
4 # include <cassert>
5
6 # include "pqxx/strconv.hxx"
7
8 namespace pqxx::internal
9 {
10 // Find the end of a double-quoted string.
11 /** @c input[pos] must be the opening double quote.
12 *
13 * Returns the offset of the first position after the closing quote.
14 */
scan_double_quoted_string(char const input[],std::size_t size,std::size_t pos,pqxx::internal::glyph_scanner_func * scan)15 inline std::size_t scan_double_quoted_string(
16 char const input[], std::size_t size, std::size_t pos,
17 pqxx::internal::glyph_scanner_func *scan)
18 {
19 auto next{scan(input, size, pos)};
20 bool at_quote{false};
21 for (pos = next, next = scan(input, size, pos); pos < size;
22 pos = next, next = scan(input, size, pos))
23 {
24 if (at_quote)
25 {
26 if (next - pos == 1 and input[pos] == '"')
27 {
28 // We just read a pair of double quotes. Carry on.
29 at_quote = false;
30 }
31 else
32 {
33 // We just read one double quote, and now we're at a character that's
34 // not a second double quote. Ergo, that last character was the
35 // closing double quote and this is the position right after it.
36 return pos;
37 }
38 }
39 else if (next - pos == 1)
40 {
41 switch (input[pos])
42 {
43 case '\\':
44 // Backslash escape. Skip ahead by one more character.
45 pos = next;
46 next = scan(input, size, pos);
47 break;
48
49 case '"':
50 // This is either the closing double quote, or the first of a pair of
51 // double quotes.
52 at_quote = true;
53 break;
54 }
55 }
56 else
57 {
58 // Multibyte character. Carry on.
59 }
60 }
61 if (not at_quote)
62 throw argument_error{
63 "Missing closing double-quote: " + std::string{input}};
64 return pos;
65 }
66
67
68 /// Un-quote and un-escape a double-quoted SQL string.
parse_double_quoted_string(char const input[],std::size_t end,std::size_t pos,pqxx::internal::glyph_scanner_func * scan)69 inline std::string parse_double_quoted_string(
70 char const input[], std::size_t end, std::size_t pos,
71 pqxx::internal::glyph_scanner_func *scan)
72 {
73 std::string output;
74 // Maximum output size is same as the input size, minus the opening and
75 // closing quotes. Or in the extreme opposite case, the real number could be
76 // half that. Usually it'll be a pretty close estimate.
77 output.reserve(std::size_t(end - pos - 2));
78
79 for (auto here{scan(input, end, pos)}, next{scan(input, end, here)};
80 here < end - 1; here = next, next = scan(input, end, here))
81 {
82 // A backslash here is always an escape. So is a double-quote, since we're
83 // inside the double-quoted string. In either case, we can just ignore the
84 // escape character and use the next character. This is the one redeeming
85 // feature of SQL's escaping system.
86 if ((next - here == 1) and (input[here] == '\\' or input[here] == '"'))
87 {
88 // Skip escape.
89 here = next;
90 next = scan(input, end, here);
91 }
92 output.append(input + here, input + next);
93 }
94 return output;
95 }
96
97
98 /// Find the end of an unquoted string in an array or composite-type value.
99 /** Stops when it gets to the end of the input; or when it sees any of the
100 * characters in STOP which has not been escaped.
101 *
102 * For array values, STOP is a comma, a semicolon, or a closing brace. For
103 * a value of a composite type, STOP is a comma or a closing parenthesis.
104 */
105 template<char... STOP>
scan_unquoted_string(char const input[],std::size_t size,std::size_t pos,pqxx::internal::glyph_scanner_func * scan)106 inline std::size_t scan_unquoted_string(
107 char const input[], std::size_t size, std::size_t pos,
108 pqxx::internal::glyph_scanner_func *scan)
109 {
110 bool at_backslash{false};
111 auto next{scan(input, size, pos)};
112 while ((pos < size) and
113 ((next - pos) > 1 or at_backslash or ((input[pos] != STOP) and ...)))
114 {
115 pos = next;
116 next = scan(input, size, pos);
117 at_backslash =
118 ((not at_backslash) and ((next - pos) == 1) and (input[pos] == '\\'));
119 }
120 return pos;
121 }
122
123
124 /// Parse an unquoted array entry or cfield of a composite-type field.
parse_unquoted_string(char const input[],std::size_t end,std::size_t pos,pqxx::internal::glyph_scanner_func * scan)125 inline std::string parse_unquoted_string(
126 char const input[], std::size_t end, std::size_t pos,
127 pqxx::internal::glyph_scanner_func *scan)
128 {
129 std::string output;
130 bool at_backslash{false};
131 output.reserve(end - pos);
132 for (auto next{scan(input, end, pos)}; pos < end;
133 pos = next, next = scan(input, end, pos))
134 {
135 at_backslash =
136 ((not at_backslash) and ((next - pos) == 1) and (input[pos] == '\\'));
137 if (not at_backslash)
138 output.append(input + pos, next - pos);
139 }
140 return output;
141 }
142
143
144 /// Parse a field of a composite-type value.
145 /** @c T is the C++ type of the field we're parsing, and @c index is its
146 * zero-based number.
147 *
148 * @param index Index of the current field, zero-based. It will increment for
149 * the next field.
150 * @param input Full input text for the entire composite-type value.
151 * @param pos Starting position (in @c input) of the field that we're parsing.
152 * After parsing, this will point at the beginning of the next field if
153 * there is one, or one position past the last character otherwise.
154 * @param field Destination for the parsed value.
155 * @param scan Glyph scanning function for the relevant encoding type.
156 * @param last_field Number of the last field in the value (zero-based). When
157 * parsing the last field, this will equal @c index.
158 */
159 template<typename T>
parse_composite_field(std::size_t & index,std::string_view input,std::size_t & pos,T & field,glyph_scanner_func * scan,std::size_t last_field)160 inline void parse_composite_field(
161 std::size_t &index, std::string_view input, std::size_t &pos, T &field,
162 glyph_scanner_func *scan, std::size_t last_field)
163 {
164 assert(index <= last_field);
165 auto next{scan(input.data(), std::size(input), pos)};
166 if ((next - pos) != 1)
167 throw conversion_error{"Non-ASCII character in composite-type syntax."};
168
169 // Expect a field.
170 switch (input[pos])
171 {
172 case ',':
173 case ')':
174 // The field is empty, i.e, null.
175 if constexpr (nullness<T>::has_null)
176 field = nullness<T>::null();
177 else
178 throw conversion_error{
179 "Can't read composite field " + to_string(index) + ": C++ type " +
180 type_name<T> + " does not support nulls."};
181 break;
182
183 case '"': {
184 auto const stop{
185 scan_double_quoted_string(input.data(), std::size(input), pos, scan)};
186 auto const text{parse_double_quoted_string(input.data(), stop, pos, scan)};
187 field = from_string<T>(text);
188 pos = stop;
189 }
190 break;
191
192 default: {
193 auto const stop{scan_unquoted_string<',', ')'>(
194 input.data(), std::size(input), pos, scan)};
195 auto const text{parse_unquoted_string(input.data(), stop, pos, scan)};
196 field = from_string<T>(text);
197 pos = stop;
198 }
199 break;
200 }
201
202 // Expect a comma or a closing parenthesis.
203 next = scan(input.data(), std::size(input), pos);
204
205 if ((next - pos) != 1)
206 throw conversion_error{
207 "Unexpected non-ASCII character after composite field: " +
208 std::string{input}};
209
210 if (index < last_field)
211 {
212 if (input[pos] != ',')
213 throw conversion_error{
214 "Found '" + std::string{input[pos]} +
215 "' in composite value where comma was expected: " + input.data()};
216 }
217 else
218 {
219 if (input[pos] == ',')
220 throw conversion_error{
221 "Composite value contained more fields than the expected " +
222 to_string(last_field) + ": " + input.data()};
223 if (input[pos] != ')')
224 throw conversion_error{
225 "Composite value has unexpected characters where closing parenthesis "
226 "was expected: " +
227 std::string{input}};
228 if (next != std::size(input))
229 throw conversion_error{
230 "Composite value has unexpected text after closing parenthesis: " +
231 std::string{input}};
232 }
233
234 pos = next;
235 ++index;
236 }
237
238
239 /// Conservatively estimate buffer size needed for a composite field.
240 template<typename T>
size_composite_field_buffer(T const & field)241 inline std::size_t size_composite_field_buffer(T const &field)
242 {
243 if constexpr (is_unquoted_safe<T>)
244 {
245 // Safe to copy, without quotes or escaping. Drop the terminating zero.
246 return size_buffer(field) - 1;
247 }
248 else
249 {
250 // + Opening quote.
251 // + Field budget.
252 // - Terminating zero.
253 // + Escaping for each byte in the field's string representation.
254 // - Escaping for terminating zero.
255 // + Closing quote.
256 return 1 + 2 * (size_buffer(field) - 1) + 1;
257 }
258 }
259
260
261 template<typename T>
write_composite_field(char * & pos,char * end,T const & field)262 inline void write_composite_field(char *&pos, char *end, T const &field)
263 {
264 if constexpr (is_unquoted_safe<T>)
265 {
266 // No need for quoting or escaping. Convert it straight into its final
267 // place in the buffer, and "backspace" the trailing zero.
268 pos = string_traits<T>::into_buf(pos, end, field) - 1;
269 }
270 else
271 {
272 // The field may need escaping, which means we need an intermediate buffer.
273 // To avoid allocating that at run time, we use the end of the buffer that
274 // we have.
275 auto const budget{size_buffer(field)};
276 *pos++ = '"';
277
278 // Now escape buf into its final position.
279 for (char const c : string_traits<T>::to_buf(end - budget, end, field))
280 {
281 if ((c == '"') or (c == '\\'))
282 *pos++ = '\\';
283
284 *pos++ = c;
285 }
286
287 *pos++ = '"';
288 }
289
290 *pos++ = ',';
291 }
292 } // namespace pqxx::internal
293 #endif
294