1 #if !defined(PQXX_ARRAY_COMPOSITE_HXX)
2 #  define PQXX_ARRAY_COMPOSITE_HXX
3 
4 #  include <cassert>
5 
6 #  include "pqxx/strconv.hxx"
7 
8 namespace pqxx::internal
9 {
10 // Find the end of a double-quoted string.
11 /** @c input[pos] must be the opening double quote.
12  *
13  * Returns the offset of the first position after the closing quote.
14  */
scan_double_quoted_string(char const input[],std::size_t size,std::size_t pos,pqxx::internal::glyph_scanner_func * scan)15 inline std::size_t scan_double_quoted_string(
16   char const input[], std::size_t size, std::size_t pos,
17   pqxx::internal::glyph_scanner_func *scan)
18 {
19   auto next{scan(input, size, pos)};
20   bool at_quote{false};
21   for (pos = next, next = scan(input, size, pos); pos < size;
22        pos = next, next = scan(input, size, pos))
23   {
24     if (at_quote)
25     {
26       if (next - pos == 1 and input[pos] == '"')
27       {
28         // We just read a pair of double quotes.  Carry on.
29         at_quote = false;
30       }
31       else
32       {
33         // We just read one double quote, and now we're at a character that's
34         // not a second double quote.  Ergo, that last character was the
35         // closing double quote and this is the position right after it.
36         return pos;
37       }
38     }
39     else if (next - pos == 1)
40     {
41       switch (input[pos])
42       {
43       case '\\':
44         // Backslash escape.  Skip ahead by one more character.
45         pos = next;
46         next = scan(input, size, pos);
47         break;
48 
49       case '"':
50         // This is either the closing double quote, or the first of a pair of
51         // double quotes.
52         at_quote = true;
53         break;
54       }
55     }
56     else
57     {
58       // Multibyte character.  Carry on.
59     }
60   }
61   if (not at_quote)
62     throw argument_error{
63       "Missing closing double-quote: " + std::string{input}};
64   return pos;
65 }
66 
67 
68 /// Un-quote and un-escape a double-quoted SQL string.
parse_double_quoted_string(char const input[],std::size_t end,std::size_t pos,pqxx::internal::glyph_scanner_func * scan)69 inline std::string parse_double_quoted_string(
70   char const input[], std::size_t end, std::size_t pos,
71   pqxx::internal::glyph_scanner_func *scan)
72 {
73   std::string output;
74   // Maximum output size is same as the input size, minus the opening and
75   // closing quotes.  Or in the extreme opposite case, the real number could be
76   // half that.  Usually it'll be a pretty close estimate.
77   output.reserve(std::size_t(end - pos - 2));
78 
79   for (auto here{scan(input, end, pos)}, next{scan(input, end, here)};
80        here < end - 1; here = next, next = scan(input, end, here))
81   {
82     // A backslash here is always an escape.  So is a double-quote, since we're
83     // inside the double-quoted string.  In either case, we can just ignore the
84     // escape character and use the next character.  This is the one redeeming
85     // feature of SQL's escaping system.
86     if ((next - here == 1) and (input[here] == '\\' or input[here] == '"'))
87     {
88       // Skip escape.
89       here = next;
90       next = scan(input, end, here);
91     }
92     output.append(input + here, input + next);
93   }
94   return output;
95 }
96 
97 
98 /// Find the end of an unquoted string in an array or composite-type value.
99 /** Stops when it gets to the end of the input; or when it sees any of the
100  * characters in STOP which has not been escaped.
101  *
102  * For array values, STOP is a comma, a semicolon, or a closing brace.  For
103  * a value of a composite type, STOP is a comma or a closing parenthesis.
104  */
105 template<char... STOP>
scan_unquoted_string(char const input[],std::size_t size,std::size_t pos,pqxx::internal::glyph_scanner_func * scan)106 inline std::size_t scan_unquoted_string(
107   char const input[], std::size_t size, std::size_t pos,
108   pqxx::internal::glyph_scanner_func *scan)
109 {
110   bool at_backslash{false};
111   auto next{scan(input, size, pos)};
112   while ((pos < size) and
113          ((next - pos) > 1 or at_backslash or ((input[pos] != STOP) and ...)))
114   {
115     pos = next;
116     next = scan(input, size, pos);
117     at_backslash =
118       ((not at_backslash) and ((next - pos) == 1) and (input[pos] == '\\'));
119   }
120   return pos;
121 }
122 
123 
124 /// Parse an unquoted array entry or cfield of a composite-type field.
parse_unquoted_string(char const input[],std::size_t end,std::size_t pos,pqxx::internal::glyph_scanner_func * scan)125 inline std::string parse_unquoted_string(
126   char const input[], std::size_t end, std::size_t pos,
127   pqxx::internal::glyph_scanner_func *scan)
128 {
129   std::string output;
130   bool at_backslash{false};
131   output.reserve(end - pos);
132   for (auto next{scan(input, end, pos)}; pos < end;
133        pos = next, next = scan(input, end, pos))
134   {
135     at_backslash =
136       ((not at_backslash) and ((next - pos) == 1) and (input[pos] == '\\'));
137     if (not at_backslash)
138       output.append(input + pos, next - pos);
139   }
140   return output;
141 }
142 
143 
144 /// Parse a field of a composite-type value.
145 /** @c T is the C++ type of the field we're parsing, and @c index is its
146  * zero-based number.
147  *
148  * @param index Index of the current field, zero-based.  It will increment for
149  *     the next field.
150  * @param input Full input text for the entire composite-type value.
151  * @param pos Starting position (in @c input) of the field that we're parsing.
152  *     After parsing, this will point at the beginning of the next field if
153  *     there is one, or one position past the last character otherwise.
154  * @param field Destination for the parsed value.
155  * @param scan Glyph scanning function for the relevant encoding type.
156  * @param last_field Number of the last field in the value (zero-based).  When
157  *     parsing the last field, this will equal @c index.
158  */
159 template<typename T>
parse_composite_field(std::size_t & index,std::string_view input,std::size_t & pos,T & field,glyph_scanner_func * scan,std::size_t last_field)160 inline void parse_composite_field(
161   std::size_t &index, std::string_view input, std::size_t &pos, T &field,
162   glyph_scanner_func *scan, std::size_t last_field)
163 {
164   assert(index <= last_field);
165   auto next{scan(input.data(), std::size(input), pos)};
166   if ((next - pos) != 1)
167     throw conversion_error{"Non-ASCII character in composite-type syntax."};
168 
169   // Expect a field.
170   switch (input[pos])
171   {
172   case ',':
173   case ')':
174     // The field is empty, i.e, null.
175     if constexpr (nullness<T>::has_null)
176       field = nullness<T>::null();
177     else
178       throw conversion_error{
179         "Can't read composite field " + to_string(index) + ": C++ type " +
180         type_name<T> + " does not support nulls."};
181     break;
182 
183   case '"': {
184     auto const stop{
185       scan_double_quoted_string(input.data(), std::size(input), pos, scan)};
186     auto const text{parse_double_quoted_string(input.data(), stop, pos, scan)};
187     field = from_string<T>(text);
188     pos = stop;
189   }
190   break;
191 
192   default: {
193     auto const stop{scan_unquoted_string<',', ')'>(
194       input.data(), std::size(input), pos, scan)};
195     auto const text{parse_unquoted_string(input.data(), stop, pos, scan)};
196     field = from_string<T>(text);
197     pos = stop;
198   }
199   break;
200   }
201 
202   // Expect a comma or a closing parenthesis.
203   next = scan(input.data(), std::size(input), pos);
204 
205   if ((next - pos) != 1)
206     throw conversion_error{
207       "Unexpected non-ASCII character after composite field: " +
208       std::string{input}};
209 
210   if (index < last_field)
211   {
212     if (input[pos] != ',')
213       throw conversion_error{
214         "Found '" + std::string{input[pos]} +
215         "' in composite value where comma was expected: " + input.data()};
216   }
217   else
218   {
219     if (input[pos] == ',')
220       throw conversion_error{
221         "Composite value contained more fields than the expected " +
222         to_string(last_field) + ": " + input.data()};
223     if (input[pos] != ')')
224       throw conversion_error{
225         "Composite value has unexpected characters where closing parenthesis "
226         "was expected: " +
227         std::string{input}};
228     if (next != std::size(input))
229       throw conversion_error{
230         "Composite value has unexpected text after closing parenthesis: " +
231         std::string{input}};
232   }
233 
234   pos = next;
235   ++index;
236 }
237 
238 
239 /// Conservatively estimate buffer size needed for a composite field.
240 template<typename T>
size_composite_field_buffer(T const & field)241 inline std::size_t size_composite_field_buffer(T const &field)
242 {
243   if constexpr (is_unquoted_safe<T>)
244   {
245     // Safe to copy, without quotes or escaping.  Drop the terminating zero.
246     return size_buffer(field) - 1;
247   }
248   else
249   {
250     // + Opening quote.
251     // + Field budget.
252     // - Terminating zero.
253     // + Escaping for each byte in the field's string representation.
254     // - Escaping for terminating zero.
255     // + Closing quote.
256     return 1 + 2 * (size_buffer(field) - 1) + 1;
257   }
258 }
259 
260 
261 template<typename T>
write_composite_field(char * & pos,char * end,T const & field)262 inline void write_composite_field(char *&pos, char *end, T const &field)
263 {
264   if constexpr (is_unquoted_safe<T>)
265   {
266     // No need for quoting or escaping.  Convert it straight into its final
267     // place in the buffer, and "backspace" the trailing zero.
268     pos = string_traits<T>::into_buf(pos, end, field) - 1;
269   }
270   else
271   {
272     // The field may need escaping, which means we need an intermediate buffer.
273     // To avoid allocating that at run time, we use the end of the buffer that
274     // we have.
275     auto const budget{size_buffer(field)};
276     *pos++ = '"';
277 
278     // Now escape buf into its final position.
279     for (char const c : string_traits<T>::to_buf(end - budget, end, field))
280     {
281       if ((c == '"') or (c == '\\'))
282         *pos++ = '\\';
283 
284       *pos++ = c;
285     }
286 
287     *pos++ = '"';
288   }
289 
290   *pos++ = ',';
291 }
292 } // namespace pqxx::internal
293 #endif
294