1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 // This is a private header for number-to-string formatting utilities
19 
20 #pragma once
21 
22 #include <array>
23 #include <cassert>
24 #include <chrono>
25 #include <limits>
26 #include <memory>
27 #include <string>
28 #include <type_traits>
29 #include <utility>
30 
31 #include "arrow/status.h"
32 #include "arrow/type.h"
33 #include "arrow/type_traits.h"
34 #include "arrow/util/string_view.h"
35 #include "arrow/util/time.h"
36 #include "arrow/util/visibility.h"
37 #include "arrow/vendored/datetime.h"
38 
39 namespace arrow {
40 namespace internal {
41 
42 /// \brief The entry point for conversion to strings.
43 template <typename ARROW_TYPE, typename Enable = void>
44 class StringFormatter;
45 
46 template <typename T>
47 struct is_formattable {
48   template <typename U, typename = typename StringFormatter<U>::value_type>
49   static std::true_type Test(U*);
50 
51   template <typename U>
52   static std::false_type Test(...);
53 
54   static constexpr bool value = decltype(Test<T>(NULLPTR))::value;
55 };
56 
57 template <typename T, typename R = void>
58 using enable_if_formattable = enable_if_t<is_formattable<T>::value, R>;
59 
60 template <typename Appender>
61 using Return = decltype(std::declval<Appender>()(util::string_view{}));
62 
63 /////////////////////////////////////////////////////////////////////////
64 // Boolean formatting
65 
66 template <>
67 class StringFormatter<BooleanType> {
68  public:
69   explicit StringFormatter(const std::shared_ptr<DataType>& = NULLPTR) {}
70 
71   using value_type = bool;
72 
73   template <typename Appender>
operator()74   Return<Appender> operator()(bool value, Appender&& append) {
75     if (value) {
76       const char string[] = "true";
77       return append(util::string_view(string));
78     } else {
79       const char string[] = "false";
80       return append(util::string_view(string));
81     }
82   }
83 };
84 
85 /////////////////////////////////////////////////////////////////////////
86 // Integer formatting
87 
88 namespace detail {
89 
90 // A 2x100 direct table mapping integers in [0..99] to their decimal representations.
91 ARROW_EXPORT extern const char digit_pairs[];
92 
93 // Based on fmtlib's format_int class:
94 // Write digits from right to left into a stack allocated buffer
FormatOneChar(char c,char ** cursor)95 inline void FormatOneChar(char c, char** cursor) { *--*cursor = c; }
96 
97 template <typename Int>
FormatOneDigit(Int value,char ** cursor)98 void FormatOneDigit(Int value, char** cursor) {
99   assert(value >= 0 && value <= 9);
100   FormatOneChar(static_cast<char>('0' + value), cursor);
101 }
102 
103 template <typename Int>
FormatTwoDigits(Int value,char ** cursor)104 void FormatTwoDigits(Int value, char** cursor) {
105   assert(value >= 0 && value <= 99);
106   auto digit_pair = &digit_pairs[value * 2];
107   FormatOneChar(digit_pair[1], cursor);
108   FormatOneChar(digit_pair[0], cursor);
109 }
110 
111 template <typename Int>
FormatAllDigits(Int value,char ** cursor)112 void FormatAllDigits(Int value, char** cursor) {
113   assert(value >= 0);
114   while (value >= 100) {
115     FormatTwoDigits(value % 100, cursor);
116     value /= 100;
117   }
118 
119   if (value >= 10) {
120     FormatTwoDigits(value, cursor);
121   } else {
122     FormatOneDigit(value, cursor);
123   }
124 }
125 
126 template <typename Int>
FormatAllDigitsLeftPadded(Int value,size_t pad,char pad_char,char ** cursor)127 void FormatAllDigitsLeftPadded(Int value, size_t pad, char pad_char, char** cursor) {
128   auto end = *cursor - pad;
129   FormatAllDigits(value, cursor);
130   while (*cursor > end) {
131     FormatOneChar(pad_char, cursor);
132   }
133 }
134 
135 template <size_t BUFFER_SIZE>
ViewDigitBuffer(const std::array<char,BUFFER_SIZE> & buffer,char * cursor)136 util::string_view ViewDigitBuffer(const std::array<char, BUFFER_SIZE>& buffer,
137                                   char* cursor) {
138   auto buffer_end = buffer.data() + BUFFER_SIZE;
139   return {cursor, static_cast<size_t>(buffer_end - cursor)};
140 }
141 
142 template <typename Int, typename UInt = typename std::make_unsigned<Int>::type>
Abs(Int value)143 constexpr UInt Abs(Int value) {
144   return value < 0 ? ~static_cast<UInt>(value) + 1 : static_cast<UInt>(value);
145 }
146 
147 template <typename Int>
Digits10(Int value)148 constexpr size_t Digits10(Int value) {
149   return value <= 9 ? 1 : Digits10(value / 10) + 1;
150 }
151 
152 }  // namespace detail
153 
154 template <typename ARROW_TYPE>
155 class IntToStringFormatterMixin {
156  public:
157   explicit IntToStringFormatterMixin(const std::shared_ptr<DataType>& = NULLPTR) {}
158 
159   using value_type = typename ARROW_TYPE::c_type;
160 
161   template <typename Appender>
operator()162   Return<Appender> operator()(value_type value, Appender&& append) {
163     constexpr size_t buffer_size =
164         detail::Digits10(std::numeric_limits<value_type>::max()) + 1;
165 
166     std::array<char, buffer_size> buffer;
167     char* cursor = buffer.data() + buffer_size;
168     detail::FormatAllDigits(detail::Abs(value), &cursor);
169     if (value < 0) {
170       detail::FormatOneChar('-', &cursor);
171     }
172     return append(detail::ViewDigitBuffer(buffer, cursor));
173   }
174 };
175 
176 template <>
177 class StringFormatter<Int8Type> : public IntToStringFormatterMixin<Int8Type> {
178   using IntToStringFormatterMixin::IntToStringFormatterMixin;
179 };
180 
181 template <>
182 class StringFormatter<Int16Type> : public IntToStringFormatterMixin<Int16Type> {
183   using IntToStringFormatterMixin::IntToStringFormatterMixin;
184 };
185 
186 template <>
187 class StringFormatter<Int32Type> : public IntToStringFormatterMixin<Int32Type> {
188   using IntToStringFormatterMixin::IntToStringFormatterMixin;
189 };
190 
191 template <>
192 class StringFormatter<Int64Type> : public IntToStringFormatterMixin<Int64Type> {
193   using IntToStringFormatterMixin::IntToStringFormatterMixin;
194 };
195 
196 template <>
197 class StringFormatter<UInt8Type> : public IntToStringFormatterMixin<UInt8Type> {
198   using IntToStringFormatterMixin::IntToStringFormatterMixin;
199 };
200 
201 template <>
202 class StringFormatter<UInt16Type> : public IntToStringFormatterMixin<UInt16Type> {
203   using IntToStringFormatterMixin::IntToStringFormatterMixin;
204 };
205 
206 template <>
207 class StringFormatter<UInt32Type> : public IntToStringFormatterMixin<UInt32Type> {
208   using IntToStringFormatterMixin::IntToStringFormatterMixin;
209 };
210 
211 template <>
212 class StringFormatter<UInt64Type> : public IntToStringFormatterMixin<UInt64Type> {
213   using IntToStringFormatterMixin::IntToStringFormatterMixin;
214 };
215 
216 /////////////////////////////////////////////////////////////////////////
217 // Floating-point formatting
218 
219 class ARROW_EXPORT FloatToStringFormatter {
220  public:
221   FloatToStringFormatter();
222   ~FloatToStringFormatter();
223 
224   // Returns the number of characters written
225   int FormatFloat(float v, char* out_buffer, int out_size);
226   int FormatFloat(double v, char* out_buffer, int out_size);
227 
228  protected:
229   struct Impl;
230   std::unique_ptr<Impl> impl_;
231 };
232 
233 template <typename ARROW_TYPE>
234 class FloatToStringFormatterMixin : public FloatToStringFormatter {
235  public:
236   using value_type = typename ARROW_TYPE::c_type;
237 
238   static constexpr int buffer_size = 50;
239 
240   explicit FloatToStringFormatterMixin(const std::shared_ptr<DataType>& = NULLPTR) {}
241 
242   template <typename Appender>
operator()243   Return<Appender> operator()(value_type value, Appender&& append) {
244     char buffer[buffer_size];
245     int size = FormatFloat(value, buffer, buffer_size);
246     return append(util::string_view(buffer, size));
247   }
248 };
249 
250 template <>
251 class StringFormatter<FloatType> : public FloatToStringFormatterMixin<FloatType> {
252  public:
253   using FloatToStringFormatterMixin::FloatToStringFormatterMixin;
254 };
255 
256 template <>
257 class StringFormatter<DoubleType> : public FloatToStringFormatterMixin<DoubleType> {
258  public:
259   using FloatToStringFormatterMixin::FloatToStringFormatterMixin;
260 };
261 
262 /////////////////////////////////////////////////////////////////////////
263 // Temporal formatting
264 
265 namespace detail {
266 
267 template <typename V>
BufferSizeYYYY_MM_DD()268 constexpr size_t BufferSizeYYYY_MM_DD() {
269   return detail::Digits10(9999) + 1 + detail::Digits10(12) + 1 + detail::Digits10(31);
270 }
271 
FormatYYYY_MM_DD(arrow_vendored::date::year_month_day ymd,char ** cursor)272 inline void FormatYYYY_MM_DD(arrow_vendored::date::year_month_day ymd, char** cursor) {
273   FormatTwoDigits(static_cast<unsigned>(ymd.day()), cursor);
274   FormatOneChar('-', cursor);
275   FormatTwoDigits(static_cast<unsigned>(ymd.month()), cursor);
276   FormatOneChar('-', cursor);
277   auto year = static_cast<int>(ymd.year());
278   assert(year <= 9999);
279   FormatTwoDigits(year % 100, cursor);
280   FormatTwoDigits(year / 100, cursor);
281 }
282 
283 template <typename Duration>
BufferSizeHH_MM_SS()284 constexpr size_t BufferSizeHH_MM_SS() {
285   return detail::Digits10(23) + 1 + detail::Digits10(59) + 1 + detail::Digits10(59) + 1 +
286          detail::Digits10(Duration::period::den) - 1;
287 }
288 
289 template <typename Duration>
FormatHH_MM_SS(arrow_vendored::date::hh_mm_ss<Duration> hms,char ** cursor)290 void FormatHH_MM_SS(arrow_vendored::date::hh_mm_ss<Duration> hms, char** cursor) {
291   constexpr size_t subsecond_digits = Digits10(Duration::period::den) - 1;
292   if (subsecond_digits != 0) {
293     FormatAllDigitsLeftPadded(hms.subseconds().count(), subsecond_digits, '0', cursor);
294     FormatOneChar('.', cursor);
295   }
296   FormatTwoDigits(hms.seconds().count(), cursor);
297   FormatOneChar(':', cursor);
298   FormatTwoDigits(hms.minutes().count(), cursor);
299   FormatOneChar(':', cursor);
300   FormatTwoDigits(hms.hours().count(), cursor);
301 }
302 
303 }  // namespace detail
304 
305 template <>
306 class StringFormatter<DurationType> : public IntToStringFormatterMixin<DurationType> {
307   using IntToStringFormatterMixin::IntToStringFormatterMixin;
308 };
309 
310 template <typename T>
311 class StringFormatter<T, enable_if_date<T>> {
312  public:
313   using value_type = typename T::c_type;
314 
315   explicit StringFormatter(const std::shared_ptr<DataType>& = NULLPTR) {}
316 
317   template <typename Appender>
operator()318   Return<Appender> operator()(value_type value, Appender&& append) {
319     arrow_vendored::date::days since_epoch;
320     if (T::type_id == Type::DATE32) {
321       since_epoch = arrow_vendored::date::days{value};
322     } else {
323       since_epoch = std::chrono::duration_cast<arrow_vendored::date::days>(
324           std::chrono::milliseconds{value});
325     }
326 
327     arrow_vendored::date::sys_days timepoint_days{since_epoch};
328 
329     constexpr size_t buffer_size = detail::BufferSizeYYYY_MM_DD<value_type>();
330 
331     std::array<char, buffer_size> buffer;
332     char* cursor = buffer.data() + buffer_size;
333 
334     detail::FormatYYYY_MM_DD(arrow_vendored::date::year_month_day{timepoint_days},
335                              &cursor);
336     return append(detail::ViewDigitBuffer(buffer, cursor));
337   }
338 };
339 
340 template <typename T>
341 class StringFormatter<T, enable_if_time<T>> {
342  public:
343   using value_type = typename T::c_type;
344 
StringFormatter(const std::shared_ptr<DataType> & type)345   explicit StringFormatter(const std::shared_ptr<DataType>& type)
346       : unit_(checked_cast<const T&>(*type).unit()) {}
347 
348   template <typename Duration, typename Appender>
operator()349   Return<Appender> operator()(Duration, value_type count, Appender&& append) {
350     Duration since_midnight{count};
351 
352     constexpr size_t buffer_size = detail::BufferSizeHH_MM_SS<Duration>();
353 
354     std::array<char, buffer_size> buffer;
355     char* cursor = buffer.data() + buffer_size;
356 
357     detail::FormatHH_MM_SS(arrow_vendored::date::make_time(since_midnight), &cursor);
358     return append(detail::ViewDigitBuffer(buffer, cursor));
359   }
360 
361   template <typename Appender>
operator()362   Return<Appender> operator()(value_type value, Appender&& append) {
363     return util::VisitDuration(unit_, *this, value, std::forward<Appender>(append));
364   }
365 
366  private:
367   TimeUnit::type unit_;
368 };
369 
370 template <>
371 class StringFormatter<TimestampType> {
372  public:
373   using value_type = int64_t;
374 
StringFormatter(const std::shared_ptr<DataType> & type)375   explicit StringFormatter(const std::shared_ptr<DataType>& type)
376       : unit_(checked_cast<const TimestampType&>(*type).unit()) {}
377 
378   template <typename Duration, typename Appender>
operator()379   Return<Appender> operator()(Duration, value_type count, Appender&& append) {
380     Duration since_epoch{count};
381 
382     arrow_vendored::date::sys_days timepoint_days{
383         arrow_vendored::date::floor<arrow_vendored::date::days>(since_epoch)};
384 
385     Duration since_midnight = since_epoch - timepoint_days.time_since_epoch();
386 
387     constexpr size_t buffer_size = detail::BufferSizeYYYY_MM_DD<value_type>() + 1 +
388                                    detail::BufferSizeHH_MM_SS<Duration>();
389 
390     std::array<char, buffer_size> buffer;
391     char* cursor = buffer.data() + buffer_size;
392 
393     detail::FormatHH_MM_SS(arrow_vendored::date::make_time(since_midnight), &cursor);
394     detail::FormatOneChar(' ', &cursor);
395     detail::FormatYYYY_MM_DD(arrow_vendored::date::year_month_day{timepoint_days},
396                              &cursor);
397     return append(detail::ViewDigitBuffer(buffer, cursor));
398   }
399 
400   template <typename Appender>
operator()401   Return<Appender> operator()(value_type value, Appender&& append) {
402     return util::VisitDuration(unit_, *this, value, std::forward<Appender>(append));
403   }
404 
405  private:
406   TimeUnit::type unit_;
407 };
408 
409 }  // namespace internal
410 }  // namespace arrow
411