1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 // This is a private header for number-to-string formatting utilities
19
20 #pragma once
21
22 #include <array>
23 #include <cassert>
24 #include <chrono>
25 #include <limits>
26 #include <memory>
27 #include <string>
28 #include <type_traits>
29 #include <utility>
30
31 #include "arrow/status.h"
32 #include "arrow/type.h"
33 #include "arrow/type_traits.h"
34 #include "arrow/util/string_view.h"
35 #include "arrow/util/time.h"
36 #include "arrow/util/visibility.h"
37 #include "arrow/vendored/datetime.h"
38
39 namespace arrow {
40 namespace internal {
41
42 /// \brief The entry point for conversion to strings.
43 template <typename ARROW_TYPE, typename Enable = void>
44 class StringFormatter;
45
46 template <typename T>
47 struct is_formattable {
48 template <typename U, typename = typename StringFormatter<U>::value_type>
49 static std::true_type Test(U*);
50
51 template <typename U>
52 static std::false_type Test(...);
53
54 static constexpr bool value = decltype(Test<T>(NULLPTR))::value;
55 };
56
57 template <typename T, typename R = void>
58 using enable_if_formattable = enable_if_t<is_formattable<T>::value, R>;
59
60 template <typename Appender>
61 using Return = decltype(std::declval<Appender>()(util::string_view{}));
62
63 /////////////////////////////////////////////////////////////////////////
64 // Boolean formatting
65
66 template <>
67 class StringFormatter<BooleanType> {
68 public:
69 explicit StringFormatter(const std::shared_ptr<DataType>& = NULLPTR) {}
70
71 using value_type = bool;
72
73 template <typename Appender>
operator()74 Return<Appender> operator()(bool value, Appender&& append) {
75 if (value) {
76 const char string[] = "true";
77 return append(util::string_view(string));
78 } else {
79 const char string[] = "false";
80 return append(util::string_view(string));
81 }
82 }
83 };
84
85 /////////////////////////////////////////////////////////////////////////
86 // Integer formatting
87
88 namespace detail {
89
90 // A 2x100 direct table mapping integers in [0..99] to their decimal representations.
91 ARROW_EXPORT extern const char digit_pairs[];
92
93 // Based on fmtlib's format_int class:
94 // Write digits from right to left into a stack allocated buffer
FormatOneChar(char c,char ** cursor)95 inline void FormatOneChar(char c, char** cursor) { *--*cursor = c; }
96
97 template <typename Int>
FormatOneDigit(Int value,char ** cursor)98 void FormatOneDigit(Int value, char** cursor) {
99 assert(value >= 0 && value <= 9);
100 FormatOneChar(static_cast<char>('0' + value), cursor);
101 }
102
103 template <typename Int>
FormatTwoDigits(Int value,char ** cursor)104 void FormatTwoDigits(Int value, char** cursor) {
105 assert(value >= 0 && value <= 99);
106 auto digit_pair = &digit_pairs[value * 2];
107 FormatOneChar(digit_pair[1], cursor);
108 FormatOneChar(digit_pair[0], cursor);
109 }
110
111 template <typename Int>
FormatAllDigits(Int value,char ** cursor)112 void FormatAllDigits(Int value, char** cursor) {
113 assert(value >= 0);
114 while (value >= 100) {
115 FormatTwoDigits(value % 100, cursor);
116 value /= 100;
117 }
118
119 if (value >= 10) {
120 FormatTwoDigits(value, cursor);
121 } else {
122 FormatOneDigit(value, cursor);
123 }
124 }
125
126 template <typename Int>
FormatAllDigitsLeftPadded(Int value,size_t pad,char pad_char,char ** cursor)127 void FormatAllDigitsLeftPadded(Int value, size_t pad, char pad_char, char** cursor) {
128 auto end = *cursor - pad;
129 FormatAllDigits(value, cursor);
130 while (*cursor > end) {
131 FormatOneChar(pad_char, cursor);
132 }
133 }
134
135 template <size_t BUFFER_SIZE>
ViewDigitBuffer(const std::array<char,BUFFER_SIZE> & buffer,char * cursor)136 util::string_view ViewDigitBuffer(const std::array<char, BUFFER_SIZE>& buffer,
137 char* cursor) {
138 auto buffer_end = buffer.data() + BUFFER_SIZE;
139 return {cursor, static_cast<size_t>(buffer_end - cursor)};
140 }
141
142 template <typename Int, typename UInt = typename std::make_unsigned<Int>::type>
Abs(Int value)143 constexpr UInt Abs(Int value) {
144 return value < 0 ? ~static_cast<UInt>(value) + 1 : static_cast<UInt>(value);
145 }
146
147 template <typename Int>
Digits10(Int value)148 constexpr size_t Digits10(Int value) {
149 return value <= 9 ? 1 : Digits10(value / 10) + 1;
150 }
151
152 } // namespace detail
153
154 template <typename ARROW_TYPE>
155 class IntToStringFormatterMixin {
156 public:
157 explicit IntToStringFormatterMixin(const std::shared_ptr<DataType>& = NULLPTR) {}
158
159 using value_type = typename ARROW_TYPE::c_type;
160
161 template <typename Appender>
operator()162 Return<Appender> operator()(value_type value, Appender&& append) {
163 constexpr size_t buffer_size =
164 detail::Digits10(std::numeric_limits<value_type>::max()) + 1;
165
166 std::array<char, buffer_size> buffer;
167 char* cursor = buffer.data() + buffer_size;
168 detail::FormatAllDigits(detail::Abs(value), &cursor);
169 if (value < 0) {
170 detail::FormatOneChar('-', &cursor);
171 }
172 return append(detail::ViewDigitBuffer(buffer, cursor));
173 }
174 };
175
176 template <>
177 class StringFormatter<Int8Type> : public IntToStringFormatterMixin<Int8Type> {
178 using IntToStringFormatterMixin::IntToStringFormatterMixin;
179 };
180
181 template <>
182 class StringFormatter<Int16Type> : public IntToStringFormatterMixin<Int16Type> {
183 using IntToStringFormatterMixin::IntToStringFormatterMixin;
184 };
185
186 template <>
187 class StringFormatter<Int32Type> : public IntToStringFormatterMixin<Int32Type> {
188 using IntToStringFormatterMixin::IntToStringFormatterMixin;
189 };
190
191 template <>
192 class StringFormatter<Int64Type> : public IntToStringFormatterMixin<Int64Type> {
193 using IntToStringFormatterMixin::IntToStringFormatterMixin;
194 };
195
196 template <>
197 class StringFormatter<UInt8Type> : public IntToStringFormatterMixin<UInt8Type> {
198 using IntToStringFormatterMixin::IntToStringFormatterMixin;
199 };
200
201 template <>
202 class StringFormatter<UInt16Type> : public IntToStringFormatterMixin<UInt16Type> {
203 using IntToStringFormatterMixin::IntToStringFormatterMixin;
204 };
205
206 template <>
207 class StringFormatter<UInt32Type> : public IntToStringFormatterMixin<UInt32Type> {
208 using IntToStringFormatterMixin::IntToStringFormatterMixin;
209 };
210
211 template <>
212 class StringFormatter<UInt64Type> : public IntToStringFormatterMixin<UInt64Type> {
213 using IntToStringFormatterMixin::IntToStringFormatterMixin;
214 };
215
216 /////////////////////////////////////////////////////////////////////////
217 // Floating-point formatting
218
219 class ARROW_EXPORT FloatToStringFormatter {
220 public:
221 FloatToStringFormatter();
222 ~FloatToStringFormatter();
223
224 // Returns the number of characters written
225 int FormatFloat(float v, char* out_buffer, int out_size);
226 int FormatFloat(double v, char* out_buffer, int out_size);
227
228 protected:
229 struct Impl;
230 std::unique_ptr<Impl> impl_;
231 };
232
233 template <typename ARROW_TYPE>
234 class FloatToStringFormatterMixin : public FloatToStringFormatter {
235 public:
236 using value_type = typename ARROW_TYPE::c_type;
237
238 static constexpr int buffer_size = 50;
239
240 explicit FloatToStringFormatterMixin(const std::shared_ptr<DataType>& = NULLPTR) {}
241
242 template <typename Appender>
operator()243 Return<Appender> operator()(value_type value, Appender&& append) {
244 char buffer[buffer_size];
245 int size = FormatFloat(value, buffer, buffer_size);
246 return append(util::string_view(buffer, size));
247 }
248 };
249
250 template <>
251 class StringFormatter<FloatType> : public FloatToStringFormatterMixin<FloatType> {
252 public:
253 using FloatToStringFormatterMixin::FloatToStringFormatterMixin;
254 };
255
256 template <>
257 class StringFormatter<DoubleType> : public FloatToStringFormatterMixin<DoubleType> {
258 public:
259 using FloatToStringFormatterMixin::FloatToStringFormatterMixin;
260 };
261
262 /////////////////////////////////////////////////////////////////////////
263 // Temporal formatting
264
265 namespace detail {
266
267 template <typename V>
BufferSizeYYYY_MM_DD()268 constexpr size_t BufferSizeYYYY_MM_DD() {
269 return detail::Digits10(9999) + 1 + detail::Digits10(12) + 1 + detail::Digits10(31);
270 }
271
FormatYYYY_MM_DD(arrow_vendored::date::year_month_day ymd,char ** cursor)272 inline void FormatYYYY_MM_DD(arrow_vendored::date::year_month_day ymd, char** cursor) {
273 FormatTwoDigits(static_cast<unsigned>(ymd.day()), cursor);
274 FormatOneChar('-', cursor);
275 FormatTwoDigits(static_cast<unsigned>(ymd.month()), cursor);
276 FormatOneChar('-', cursor);
277 auto year = static_cast<int>(ymd.year());
278 assert(year <= 9999);
279 FormatTwoDigits(year % 100, cursor);
280 FormatTwoDigits(year / 100, cursor);
281 }
282
283 template <typename Duration>
BufferSizeHH_MM_SS()284 constexpr size_t BufferSizeHH_MM_SS() {
285 return detail::Digits10(23) + 1 + detail::Digits10(59) + 1 + detail::Digits10(59) + 1 +
286 detail::Digits10(Duration::period::den) - 1;
287 }
288
289 template <typename Duration>
FormatHH_MM_SS(arrow_vendored::date::hh_mm_ss<Duration> hms,char ** cursor)290 void FormatHH_MM_SS(arrow_vendored::date::hh_mm_ss<Duration> hms, char** cursor) {
291 constexpr size_t subsecond_digits = Digits10(Duration::period::den) - 1;
292 if (subsecond_digits != 0) {
293 FormatAllDigitsLeftPadded(hms.subseconds().count(), subsecond_digits, '0', cursor);
294 FormatOneChar('.', cursor);
295 }
296 FormatTwoDigits(hms.seconds().count(), cursor);
297 FormatOneChar(':', cursor);
298 FormatTwoDigits(hms.minutes().count(), cursor);
299 FormatOneChar(':', cursor);
300 FormatTwoDigits(hms.hours().count(), cursor);
301 }
302
303 } // namespace detail
304
305 template <>
306 class StringFormatter<DurationType> : public IntToStringFormatterMixin<DurationType> {
307 using IntToStringFormatterMixin::IntToStringFormatterMixin;
308 };
309
310 template <typename T>
311 class StringFormatter<T, enable_if_date<T>> {
312 public:
313 using value_type = typename T::c_type;
314
315 explicit StringFormatter(const std::shared_ptr<DataType>& = NULLPTR) {}
316
317 template <typename Appender>
operator()318 Return<Appender> operator()(value_type value, Appender&& append) {
319 arrow_vendored::date::days since_epoch;
320 if (T::type_id == Type::DATE32) {
321 since_epoch = arrow_vendored::date::days{value};
322 } else {
323 since_epoch = std::chrono::duration_cast<arrow_vendored::date::days>(
324 std::chrono::milliseconds{value});
325 }
326
327 arrow_vendored::date::sys_days timepoint_days{since_epoch};
328
329 constexpr size_t buffer_size = detail::BufferSizeYYYY_MM_DD<value_type>();
330
331 std::array<char, buffer_size> buffer;
332 char* cursor = buffer.data() + buffer_size;
333
334 detail::FormatYYYY_MM_DD(arrow_vendored::date::year_month_day{timepoint_days},
335 &cursor);
336 return append(detail::ViewDigitBuffer(buffer, cursor));
337 }
338 };
339
340 template <typename T>
341 class StringFormatter<T, enable_if_time<T>> {
342 public:
343 using value_type = typename T::c_type;
344
StringFormatter(const std::shared_ptr<DataType> & type)345 explicit StringFormatter(const std::shared_ptr<DataType>& type)
346 : unit_(checked_cast<const T&>(*type).unit()) {}
347
348 template <typename Duration, typename Appender>
operator()349 Return<Appender> operator()(Duration, value_type count, Appender&& append) {
350 Duration since_midnight{count};
351
352 constexpr size_t buffer_size = detail::BufferSizeHH_MM_SS<Duration>();
353
354 std::array<char, buffer_size> buffer;
355 char* cursor = buffer.data() + buffer_size;
356
357 detail::FormatHH_MM_SS(arrow_vendored::date::make_time(since_midnight), &cursor);
358 return append(detail::ViewDigitBuffer(buffer, cursor));
359 }
360
361 template <typename Appender>
operator()362 Return<Appender> operator()(value_type value, Appender&& append) {
363 return util::VisitDuration(unit_, *this, value, std::forward<Appender>(append));
364 }
365
366 private:
367 TimeUnit::type unit_;
368 };
369
370 template <>
371 class StringFormatter<TimestampType> {
372 public:
373 using value_type = int64_t;
374
StringFormatter(const std::shared_ptr<DataType> & type)375 explicit StringFormatter(const std::shared_ptr<DataType>& type)
376 : unit_(checked_cast<const TimestampType&>(*type).unit()) {}
377
378 template <typename Duration, typename Appender>
operator()379 Return<Appender> operator()(Duration, value_type count, Appender&& append) {
380 Duration since_epoch{count};
381
382 arrow_vendored::date::sys_days timepoint_days{
383 arrow_vendored::date::floor<arrow_vendored::date::days>(since_epoch)};
384
385 Duration since_midnight = since_epoch - timepoint_days.time_since_epoch();
386
387 constexpr size_t buffer_size = detail::BufferSizeYYYY_MM_DD<value_type>() + 1 +
388 detail::BufferSizeHH_MM_SS<Duration>();
389
390 std::array<char, buffer_size> buffer;
391 char* cursor = buffer.data() + buffer_size;
392
393 detail::FormatHH_MM_SS(arrow_vendored::date::make_time(since_midnight), &cursor);
394 detail::FormatOneChar(' ', &cursor);
395 detail::FormatYYYY_MM_DD(arrow_vendored::date::year_month_day{timepoint_days},
396 &cursor);
397 return append(detail::ViewDigitBuffer(buffer, cursor));
398 }
399
400 template <typename Appender>
operator()401 Return<Appender> operator()(value_type value, Appender&& append) {
402 return util::VisitDuration(unit_, *this, value, std::forward<Appender>(append));
403 }
404
405 private:
406 TimeUnit::type unit_;
407 };
408
409 } // namespace internal
410 } // namespace arrow
411