1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #pragma once
19 
20 #include <algorithm>
21 #include <cstdint>
22 #include <cstring>
23 #include <memory>
24 #include <sstream>
25 #include <string>
26 
27 #include "arrow/util/string_view.h"
28 
29 #include "parquet/platform.h"
30 #include "parquet/type_fwd.h"
31 
32 namespace arrow {
33 namespace util {
34 
35 class Codec;
36 
37 }  // namespace util
38 }  // namespace arrow
39 
40 namespace parquet {
41 
42 // ----------------------------------------------------------------------
43 // Metadata enums to match Thrift metadata
44 //
45 // The reason we maintain our own enums is to avoid transitive dependency on
46 // the compiled Thrift headers (and thus thrift/Thrift.h) for users of the
47 // public API. After building parquet-cpp, you should not need to include
48 // Thrift headers in your application. This means some boilerplate to convert
49 // between our types and Parquet's Thrift types.
50 //
51 // We can also add special values like NONE to distinguish between metadata
52 // values being set and not set. As an example consider ConvertedType and
53 // CompressionCodec
54 
55 // Mirrors parquet::Type
56 struct Type {
57   enum type {
58     BOOLEAN = 0,
59     INT32 = 1,
60     INT64 = 2,
61     INT96 = 3,
62     FLOAT = 4,
63     DOUBLE = 5,
64     BYTE_ARRAY = 6,
65     FIXED_LEN_BYTE_ARRAY = 7,
66     // Should always be last element.
67     UNDEFINED = 8
68   };
69 };
70 
71 // Mirrors parquet::ConvertedType
72 struct ConvertedType {
73   enum type {
74     NONE,
75     UTF8,
76     MAP,
77     MAP_KEY_VALUE,
78     LIST,
79     ENUM,
80     DECIMAL,
81     DATE,
82     TIME_MILLIS,
83     TIME_MICROS,
84     TIMESTAMP_MILLIS,
85     TIMESTAMP_MICROS,
86     UINT_8,
87     UINT_16,
88     UINT_32,
89     UINT_64,
90     INT_8,
91     INT_16,
92     INT_32,
93     INT_64,
94     JSON,
95     BSON,
96     INTERVAL,
97     NA = 25,
98     // Should always be last element.
99     UNDEFINED = 26
100   };
101 };
102 
103 // forward declaration
104 namespace format {
105 
106 class LogicalType;
107 
108 }
109 
110 // Mirrors parquet::FieldRepetitionType
111 struct Repetition {
112   enum type { REQUIRED = 0, OPTIONAL = 1, REPEATED = 2, /*Always last*/ UNDEFINED = 3 };
113 };
114 
115 // Reference:
116 // parquet-mr/parquet-hadoop/src/main/java/org/apache/parquet/
117 //                            format/converter/ParquetMetadataConverter.java
118 // Sort order for page and column statistics. Types are associated with sort
119 // orders (e.g., UTF8 columns should use UNSIGNED) and column stats are
120 // aggregated using a sort order. As of parquet-format version 2.3.1, the
121 // order used to aggregate stats is always SIGNED and is not stored in the
122 // Parquet file. These stats are discarded for types that need unsigned.
123 // See PARQUET-686.
124 struct SortOrder {
125   enum type { SIGNED, UNSIGNED, UNKNOWN };
126 };
127 
128 namespace schema {
129 
130 struct DecimalMetadata {
131   bool isset;
132   int32_t scale;
133   int32_t precision;
134 };
135 
136 }  // namespace schema
137 
138 /// \brief Implementation of parquet.thrift LogicalType types.
139 class PARQUET_EXPORT LogicalType {
140  public:
141   struct Type {
142     enum type {
143       UNKNOWN = 0,
144       STRING = 1,
145       MAP,
146       LIST,
147       ENUM,
148       DECIMAL,
149       DATE,
150       TIME,
151       TIMESTAMP,
152       INTERVAL,
153       INT,
154       NIL,  // Thrift NullType
155       JSON,
156       BSON,
157       UUID,
158       NONE
159     };
160   };
161 
162   struct TimeUnit {
163     enum unit { UNKNOWN = 0, MILLIS = 1, MICROS, NANOS };
164   };
165 
166   /// \brief If possible, return a logical type equivalent to the given legacy
167   /// converted type (and decimal metadata if applicable).
168   static std::shared_ptr<const LogicalType> FromConvertedType(
169       const parquet::ConvertedType::type converted_type,
170       const parquet::schema::DecimalMetadata converted_decimal_metadata = {false, -1,
171                                                                            -1});
172 
173   /// \brief Return the logical type represented by the Thrift intermediary object.
174   static std::shared_ptr<const LogicalType> FromThrift(
175       const parquet::format::LogicalType& thrift_logical_type);
176 
177   /// \brief Return the explicitly requested logical type.
178   static std::shared_ptr<const LogicalType> String();
179   static std::shared_ptr<const LogicalType> Map();
180   static std::shared_ptr<const LogicalType> List();
181   static std::shared_ptr<const LogicalType> Enum();
182   static std::shared_ptr<const LogicalType> Decimal(int32_t precision, int32_t scale = 0);
183   static std::shared_ptr<const LogicalType> Date();
184   static std::shared_ptr<const LogicalType> Time(bool is_adjusted_to_utc,
185                                                  LogicalType::TimeUnit::unit time_unit);
186 
187   /// \brief Create a Timestamp logical type
188   /// \param[in] is_adjusted_to_utc set true if the data is UTC-normalized
189   /// \param[in] time_unit the resolution of the timestamp
190   /// \param[in] is_from_converted_type if true, the timestamp was generated
191   /// by translating a legacy converted type of TIMESTAMP_MILLIS or
192   /// TIMESTAMP_MICROS. Default is false.
193   /// \param[in] force_set_converted_type if true, always set the
194   /// legacy ConvertedType TIMESTAMP_MICROS and TIMESTAMP_MILLIS
195   /// metadata. Default is false
196   static std::shared_ptr<const LogicalType> Timestamp(
197       bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit,
198       bool is_from_converted_type = false, bool force_set_converted_type = false);
199 
200   static std::shared_ptr<const LogicalType> Interval();
201   static std::shared_ptr<const LogicalType> Int(int bit_width, bool is_signed);
202   static std::shared_ptr<const LogicalType> Null();
203   static std::shared_ptr<const LogicalType> JSON();
204   static std::shared_ptr<const LogicalType> BSON();
205   static std::shared_ptr<const LogicalType> UUID();
206   static std::shared_ptr<const LogicalType> None();
207   static std::shared_ptr<const LogicalType> Unknown();
208 
209   /// \brief Return true if this logical type is consistent with the given underlying
210   /// physical type.
211   bool is_applicable(parquet::Type::type primitive_type,
212                      int32_t primitive_length = -1) const;
213 
214   /// \brief Return true if this logical type is equivalent to the given legacy converted
215   /// type (and decimal metadata if applicable).
216   bool is_compatible(parquet::ConvertedType::type converted_type,
217                      parquet::schema::DecimalMetadata converted_decimal_metadata = {
218                          false, -1, -1}) const;
219 
220   /// \brief If possible, return the legacy converted type (and decimal metadata if
221   /// applicable) equivalent to this logical type.
222   parquet::ConvertedType::type ToConvertedType(
223       parquet::schema::DecimalMetadata* out_decimal_metadata) const;
224 
225   /// \brief Return a printable representation of this logical type.
226   std::string ToString() const;
227 
228   /// \brief Return a JSON representation of this logical type.
229   std::string ToJSON() const;
230 
231   /// \brief Return a serializable Thrift object for this logical type.
232   parquet::format::LogicalType ToThrift() const;
233 
234   /// \brief Return true if the given logical type is equivalent to this logical type.
235   bool Equals(const LogicalType& other) const;
236 
237   /// \brief Return the enumerated type of this logical type.
238   LogicalType::Type::type type() const;
239 
240   /// \brief Return the appropriate sort order for this logical type.
241   SortOrder::type sort_order() const;
242 
243   // Type checks ...
244   bool is_string() const;
245   bool is_map() const;
246   bool is_list() const;
247   bool is_enum() const;
248   bool is_decimal() const;
249   bool is_date() const;
250   bool is_time() const;
251   bool is_timestamp() const;
252   bool is_interval() const;
253   bool is_int() const;
254   bool is_null() const;
255   bool is_JSON() const;
256   bool is_BSON() const;
257   bool is_UUID() const;
258   bool is_none() const;
259   /// \brief Return true if this logical type is of a known type.
260   bool is_valid() const;
261   bool is_invalid() const;
262   /// \brief Return true if this logical type is suitable for a schema GroupNode.
263   bool is_nested() const;
264   bool is_nonnested() const;
265   /// \brief Return true if this logical type is included in the Thrift output for its
266   /// node.
267   bool is_serialized() const;
268 
269   LogicalType(const LogicalType&) = delete;
270   LogicalType& operator=(const LogicalType&) = delete;
271   virtual ~LogicalType() noexcept;
272 
273  protected:
274   LogicalType();
275 
276   class Impl;
277   std::unique_ptr<const Impl> impl_;
278 };
279 
280 /// \brief Allowed for physical type BYTE_ARRAY, must be encoded as UTF-8.
281 class PARQUET_EXPORT StringLogicalType : public LogicalType {
282  public:
283   static std::shared_ptr<const LogicalType> Make();
284 
285  private:
286   StringLogicalType() = default;
287 };
288 
289 /// \brief Allowed for group nodes only.
290 class PARQUET_EXPORT MapLogicalType : public LogicalType {
291  public:
292   static std::shared_ptr<const LogicalType> Make();
293 
294  private:
295   MapLogicalType() = default;
296 };
297 
298 /// \brief Allowed for group nodes only.
299 class PARQUET_EXPORT ListLogicalType : public LogicalType {
300  public:
301   static std::shared_ptr<const LogicalType> Make();
302 
303  private:
304   ListLogicalType() = default;
305 };
306 
307 /// \brief Allowed for physical type BYTE_ARRAY, must be encoded as UTF-8.
308 class PARQUET_EXPORT EnumLogicalType : public LogicalType {
309  public:
310   static std::shared_ptr<const LogicalType> Make();
311 
312  private:
313   EnumLogicalType() = default;
314 };
315 
316 /// \brief Allowed for physical type INT32, INT64, FIXED_LEN_BYTE_ARRAY, or BYTE_ARRAY,
317 /// depending on the precision.
318 class PARQUET_EXPORT DecimalLogicalType : public LogicalType {
319  public:
320   static std::shared_ptr<const LogicalType> Make(int32_t precision, int32_t scale = 0);
321   int32_t precision() const;
322   int32_t scale() const;
323 
324  private:
325   DecimalLogicalType() = default;
326 };
327 
328 /// \brief Allowed for physical type INT32.
329 class PARQUET_EXPORT DateLogicalType : public LogicalType {
330  public:
331   static std::shared_ptr<const LogicalType> Make();
332 
333  private:
334   DateLogicalType() = default;
335 };
336 
337 /// \brief Allowed for physical type INT32 (for MILLIS) or INT64 (for MICROS and NANOS).
338 class PARQUET_EXPORT TimeLogicalType : public LogicalType {
339  public:
340   static std::shared_ptr<const LogicalType> Make(bool is_adjusted_to_utc,
341                                                  LogicalType::TimeUnit::unit time_unit);
342   bool is_adjusted_to_utc() const;
343   LogicalType::TimeUnit::unit time_unit() const;
344 
345  private:
346   TimeLogicalType() = default;
347 };
348 
349 /// \brief Allowed for physical type INT64.
350 class PARQUET_EXPORT TimestampLogicalType : public LogicalType {
351  public:
352   static std::shared_ptr<const LogicalType> Make(bool is_adjusted_to_utc,
353                                                  LogicalType::TimeUnit::unit time_unit,
354                                                  bool is_from_converted_type = false,
355                                                  bool force_set_converted_type = false);
356   bool is_adjusted_to_utc() const;
357   LogicalType::TimeUnit::unit time_unit() const;
358 
359   /// \brief If true, will not set LogicalType in Thrift metadata
360   bool is_from_converted_type() const;
361 
362   /// \brief If true, will set ConvertedType for micros and millis
363   /// resolution in legacy ConvertedType Thrift metadata
364   bool force_set_converted_type() const;
365 
366  private:
367   TimestampLogicalType() = default;
368 };
369 
370 /// \brief Allowed for physical type FIXED_LEN_BYTE_ARRAY with length 12
371 class PARQUET_EXPORT IntervalLogicalType : public LogicalType {
372  public:
373   static std::shared_ptr<const LogicalType> Make();
374 
375  private:
376   IntervalLogicalType() = default;
377 };
378 
379 /// \brief Allowed for physical type INT32 (for bit widths 8, 16, and 32) and INT64
380 /// (for bit width 64).
381 class PARQUET_EXPORT IntLogicalType : public LogicalType {
382  public:
383   static std::shared_ptr<const LogicalType> Make(int bit_width, bool is_signed);
384   int bit_width() const;
385   bool is_signed() const;
386 
387  private:
388   IntLogicalType() = default;
389 };
390 
391 /// \brief Allowed for any physical type.
392 class PARQUET_EXPORT NullLogicalType : public LogicalType {
393  public:
394   static std::shared_ptr<const LogicalType> Make();
395 
396  private:
397   NullLogicalType() = default;
398 };
399 
400 /// \brief Allowed for physical type BYTE_ARRAY.
401 class PARQUET_EXPORT JSONLogicalType : public LogicalType {
402  public:
403   static std::shared_ptr<const LogicalType> Make();
404 
405  private:
406   JSONLogicalType() = default;
407 };
408 
409 /// \brief Allowed for physical type BYTE_ARRAY.
410 class PARQUET_EXPORT BSONLogicalType : public LogicalType {
411  public:
412   static std::shared_ptr<const LogicalType> Make();
413 
414  private:
415   BSONLogicalType() = default;
416 };
417 
418 /// \brief Allowed for physical type FIXED_LEN_BYTE_ARRAY with length 16,
419 /// must encode raw UUID bytes.
420 class PARQUET_EXPORT UUIDLogicalType : public LogicalType {
421  public:
422   static std::shared_ptr<const LogicalType> Make();
423 
424  private:
425   UUIDLogicalType() = default;
426 };
427 
428 /// \brief Allowed for any physical type.
429 class PARQUET_EXPORT NoLogicalType : public LogicalType {
430  public:
431   static std::shared_ptr<const LogicalType> Make();
432 
433  private:
434   NoLogicalType() = default;
435 };
436 
437 /// \brief Allowed for any type.
438 class PARQUET_EXPORT UnknownLogicalType : public LogicalType {
439  public:
440   static std::shared_ptr<const LogicalType> Make();
441 
442  private:
443   UnknownLogicalType() = default;
444 };
445 
446 // Data encodings. Mirrors parquet::Encoding
447 struct Encoding {
448   enum type {
449     PLAIN = 0,
450     PLAIN_DICTIONARY = 2,
451     RLE = 3,
452     BIT_PACKED = 4,
453     DELTA_BINARY_PACKED = 5,
454     DELTA_LENGTH_BYTE_ARRAY = 6,
455     DELTA_BYTE_ARRAY = 7,
456     RLE_DICTIONARY = 8,
457     BYTE_STREAM_SPLIT = 9,
458     // Should always be last element (except UNKNOWN)
459     UNDEFINED = 10,
460     UNKNOWN = 999
461   };
462 };
463 
464 /// \brief Return true if Parquet supports indicated compression type
465 PARQUET_EXPORT
466 bool IsCodecSupported(Compression::type codec);
467 
468 PARQUET_EXPORT
469 std::unique_ptr<Codec> GetCodec(Compression::type codec);
470 
471 PARQUET_EXPORT
472 std::unique_ptr<Codec> GetCodec(Compression::type codec, int compression_level);
473 
474 struct ParquetCipher {
475   enum type { AES_GCM_V1 = 0, AES_GCM_CTR_V1 = 1 };
476 };
477 
478 struct AadMetadata {
479   std::string aad_prefix;
480   std::string aad_file_unique;
481   bool supply_aad_prefix;
482 };
483 
484 struct EncryptionAlgorithm {
485   ParquetCipher::type algorithm;
486   AadMetadata aad;
487 };
488 
489 // parquet::PageType
490 struct PageType {
491   enum type {
492     DATA_PAGE,
493     INDEX_PAGE,
494     DICTIONARY_PAGE,
495     DATA_PAGE_V2,
496     // Should always be last element
497     UNDEFINED
498   };
499 };
500 
501 class ColumnOrder {
502  public:
503   enum type { UNDEFINED, TYPE_DEFINED_ORDER };
ColumnOrder(ColumnOrder::type column_order)504   explicit ColumnOrder(ColumnOrder::type column_order) : column_order_(column_order) {}
505   // Default to Type Defined Order
ColumnOrder()506   ColumnOrder() : column_order_(type::TYPE_DEFINED_ORDER) {}
get_order()507   ColumnOrder::type get_order() { return column_order_; }
508 
509   static ColumnOrder undefined_;
510   static ColumnOrder type_defined_;
511 
512  private:
513   ColumnOrder::type column_order_;
514 };
515 
516 // ----------------------------------------------------------------------
517 
518 struct ByteArray {
ByteArrayByteArray519   ByteArray() : len(0), ptr(NULLPTR) {}
ByteArrayByteArray520   ByteArray(uint32_t len, const uint8_t* ptr) : len(len), ptr(ptr) {}
521 
ByteArrayByteArray522   ByteArray(::arrow::util::string_view view)  // NOLINT implicit conversion
523       : ByteArray(static_cast<uint32_t>(view.size()),
524                   reinterpret_cast<const uint8_t*>(view.data())) {}
525   uint32_t len;
526   const uint8_t* ptr;
527 };
528 
529 inline bool operator==(const ByteArray& left, const ByteArray& right) {
530   return left.len == right.len &&
531          (left.len == 0 || std::memcmp(left.ptr, right.ptr, left.len) == 0);
532 }
533 
534 inline bool operator!=(const ByteArray& left, const ByteArray& right) {
535   return !(left == right);
536 }
537 
538 struct FixedLenByteArray {
FixedLenByteArrayFixedLenByteArray539   FixedLenByteArray() : ptr(NULLPTR) {}
FixedLenByteArrayFixedLenByteArray540   explicit FixedLenByteArray(const uint8_t* ptr) : ptr(ptr) {}
541   const uint8_t* ptr;
542 };
543 
544 using FLBA = FixedLenByteArray;
545 
546 // Julian day at unix epoch.
547 //
548 // The Julian Day Number (JDN) is the integer assigned to a whole solar day in
549 // the Julian day count starting from noon Universal time, with Julian day
550 // number 0 assigned to the day starting at noon on Monday, January 1, 4713 BC,
551 // proleptic Julian calendar (November 24, 4714 BC, in the proleptic Gregorian
552 // calendar),
553 constexpr int64_t kJulianToUnixEpochDays = INT64_C(2440588);
554 constexpr int64_t kSecondsPerDay = INT64_C(60 * 60 * 24);
555 constexpr int64_t kMillisecondsPerDay = kSecondsPerDay * INT64_C(1000);
556 constexpr int64_t kMicrosecondsPerDay = kMillisecondsPerDay * INT64_C(1000);
557 constexpr int64_t kNanosecondsPerDay = kMicrosecondsPerDay * INT64_C(1000);
558 
559 MANUALLY_ALIGNED_STRUCT(1) Int96 { uint32_t value[3]; };
560 STRUCT_END(Int96, 12);
561 
562 inline bool operator==(const Int96& left, const Int96& right) {
563   return std::equal(left.value, left.value + 3, right.value);
564 }
565 
566 inline bool operator!=(const Int96& left, const Int96& right) { return !(left == right); }
567 
ByteArrayToString(const ByteArray & a)568 static inline std::string ByteArrayToString(const ByteArray& a) {
569   return std::string(reinterpret_cast<const char*>(a.ptr), a.len);
570 }
571 
Int96SetNanoSeconds(parquet::Int96 & i96,int64_t nanoseconds)572 static inline void Int96SetNanoSeconds(parquet::Int96& i96, int64_t nanoseconds) {
573   std::memcpy(&i96.value, &nanoseconds, sizeof(nanoseconds));
574 }
575 
Int96GetNanoSeconds(const parquet::Int96 & i96)576 static inline int64_t Int96GetNanoSeconds(const parquet::Int96& i96) {
577   // We do the computations in the unsigned domain to avoid unsigned behaviour
578   // on overflow.
579   uint64_t days_since_epoch =
580       i96.value[2] - static_cast<uint64_t>(kJulianToUnixEpochDays);
581   uint64_t nanoseconds = 0;
582 
583   memcpy(&nanoseconds, &i96.value, sizeof(uint64_t));
584   return static_cast<int64_t>(days_since_epoch * kNanosecondsPerDay + nanoseconds);
585 }
586 
Int96ToString(const Int96 & a)587 static inline std::string Int96ToString(const Int96& a) {
588   std::ostringstream result;
589   std::copy(a.value, a.value + 3, std::ostream_iterator<uint32_t>(result, " "));
590   return result.str();
591 }
592 
FixedLenByteArrayToString(const FixedLenByteArray & a,int len)593 static inline std::string FixedLenByteArrayToString(const FixedLenByteArray& a, int len) {
594   std::ostringstream result;
595   std::copy(a.ptr, a.ptr + len, std::ostream_iterator<uint32_t>(result, " "));
596   return result.str();
597 }
598 
599 template <Type::type TYPE>
600 struct type_traits {};
601 
602 template <>
603 struct type_traits<Type::BOOLEAN> {
604   using value_type = bool;
605 
606   static constexpr int value_byte_size = 1;
607   static constexpr const char* printf_code = "d";
608 };
609 
610 template <>
611 struct type_traits<Type::INT32> {
612   using value_type = int32_t;
613 
614   static constexpr int value_byte_size = 4;
615   static constexpr const char* printf_code = "d";
616 };
617 
618 template <>
619 struct type_traits<Type::INT64> {
620   using value_type = int64_t;
621 
622   static constexpr int value_byte_size = 8;
623   static constexpr const char* printf_code = "ld";
624 };
625 
626 template <>
627 struct type_traits<Type::INT96> {
628   using value_type = Int96;
629 
630   static constexpr int value_byte_size = 12;
631   static constexpr const char* printf_code = "s";
632 };
633 
634 template <>
635 struct type_traits<Type::FLOAT> {
636   using value_type = float;
637 
638   static constexpr int value_byte_size = 4;
639   static constexpr const char* printf_code = "f";
640 };
641 
642 template <>
643 struct type_traits<Type::DOUBLE> {
644   using value_type = double;
645 
646   static constexpr int value_byte_size = 8;
647   static constexpr const char* printf_code = "lf";
648 };
649 
650 template <>
651 struct type_traits<Type::BYTE_ARRAY> {
652   using value_type = ByteArray;
653 
654   static constexpr int value_byte_size = sizeof(ByteArray);
655   static constexpr const char* printf_code = "s";
656 };
657 
658 template <>
659 struct type_traits<Type::FIXED_LEN_BYTE_ARRAY> {
660   using value_type = FixedLenByteArray;
661 
662   static constexpr int value_byte_size = sizeof(FixedLenByteArray);
663   static constexpr const char* printf_code = "s";
664 };
665 
666 template <Type::type TYPE>
667 struct PhysicalType {
668   using c_type = typename type_traits<TYPE>::value_type;
669   static constexpr Type::type type_num = TYPE;
670 };
671 
672 using BooleanType = PhysicalType<Type::BOOLEAN>;
673 using Int32Type = PhysicalType<Type::INT32>;
674 using Int64Type = PhysicalType<Type::INT64>;
675 using Int96Type = PhysicalType<Type::INT96>;
676 using FloatType = PhysicalType<Type::FLOAT>;
677 using DoubleType = PhysicalType<Type::DOUBLE>;
678 using ByteArrayType = PhysicalType<Type::BYTE_ARRAY>;
679 using FLBAType = PhysicalType<Type::FIXED_LEN_BYTE_ARRAY>;
680 
681 template <typename Type>
682 inline std::string format_fwf(int width) {
683   std::stringstream ss;
684   ss << "%-" << width << type_traits<Type::type_num>::printf_code;
685   return ss.str();
686 }
687 
688 PARQUET_EXPORT std::string EncodingToString(Encoding::type t);
689 
690 PARQUET_EXPORT std::string ConvertedTypeToString(ConvertedType::type t);
691 
692 PARQUET_EXPORT std::string TypeToString(Type::type t);
693 
694 PARQUET_EXPORT std::string FormatStatValue(Type::type parquet_type,
695                                            ::arrow::util::string_view val);
696 
697 PARQUET_EXPORT int GetTypeByteSize(Type::type t);
698 
699 PARQUET_EXPORT SortOrder::type DefaultSortOrder(Type::type primitive);
700 
701 PARQUET_EXPORT SortOrder::type GetSortOrder(ConvertedType::type converted,
702                                             Type::type primitive);
703 
704 PARQUET_EXPORT SortOrder::type GetSortOrder(
705     const std::shared_ptr<const LogicalType>& logical_type, Type::type primitive);
706 
707 }  // namespace parquet
708