1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include "parquet/metadata.h"
19 
20 #include <gtest/gtest.h>
21 
22 #include "arrow/util/key_value_metadata.h"
23 #include "parquet/schema.h"
24 #include "parquet/statistics.h"
25 #include "parquet/thrift_internal.h"
26 #include "parquet/types.h"
27 
28 namespace parquet {
29 
30 namespace metadata {
31 
32 // Helper function for generating table metadata
GenerateTableMetaData(const parquet::SchemaDescriptor & schema,const std::shared_ptr<WriterProperties> & props,const int64_t & nrows,EncodedStatistics stats_int,EncodedStatistics stats_float)33 std::unique_ptr<parquet::FileMetaData> GenerateTableMetaData(
34     const parquet::SchemaDescriptor& schema,
35     const std::shared_ptr<WriterProperties>& props, const int64_t& nrows,
36     EncodedStatistics stats_int, EncodedStatistics stats_float) {
37   auto f_builder = FileMetaDataBuilder::Make(&schema, props);
38   auto rg1_builder = f_builder->AppendRowGroup();
39   // Write the metadata
40   // rowgroup1 metadata
41   auto col1_builder = rg1_builder->NextColumnChunk();
42   auto col2_builder = rg1_builder->NextColumnChunk();
43   // column metadata
44   std::map<Encoding::type, int32_t> dict_encoding_stats({{Encoding::RLE_DICTIONARY, 1}});
45   std::map<Encoding::type, int32_t> data_encoding_stats(
46       {{Encoding::PLAIN, 1}, {Encoding::RLE, 1}});
47   stats_int.set_is_signed(true);
48   col1_builder->SetStatistics(stats_int);
49   stats_float.set_is_signed(true);
50   col2_builder->SetStatistics(stats_float);
51   col1_builder->Finish(nrows / 2, 4, 0, 10, 512, 600, true, false, dict_encoding_stats,
52                        data_encoding_stats);
53   col2_builder->Finish(nrows / 2, 24, 0, 30, 512, 600, true, false, dict_encoding_stats,
54                        data_encoding_stats);
55 
56   rg1_builder->set_num_rows(nrows / 2);
57   rg1_builder->Finish(1024);
58 
59   // rowgroup2 metadata
60   auto rg2_builder = f_builder->AppendRowGroup();
61   col1_builder = rg2_builder->NextColumnChunk();
62   col2_builder = rg2_builder->NextColumnChunk();
63   // column metadata
64   col1_builder->SetStatistics(stats_int);
65   col2_builder->SetStatistics(stats_float);
66   dict_encoding_stats.clear();
67   col1_builder->Finish(nrows / 2, /*dictionary_page_offset=*/0, 0, 10, 512, 600,
68                        /*has_dictionary=*/false, false, dict_encoding_stats,
69                        data_encoding_stats);
70   col2_builder->Finish(nrows / 2, 16, 0, 26, 512, 600, true, false, dict_encoding_stats,
71                        data_encoding_stats);
72 
73   rg2_builder->set_num_rows(nrows / 2);
74   rg2_builder->Finish(1024);
75 
76   // Return the metadata accessor
77   return f_builder->Finish();
78 }
79 
TEST(Metadata,TestBuildAccess)80 TEST(Metadata, TestBuildAccess) {
81   parquet::schema::NodeVector fields;
82   parquet::schema::NodePtr root;
83   parquet::SchemaDescriptor schema;
84 
85   WriterProperties::Builder prop_builder;
86 
87   std::shared_ptr<WriterProperties> props =
88       prop_builder.version(ParquetVersion::PARQUET_2_6)->build();
89 
90   fields.push_back(parquet::schema::Int32("int_col", Repetition::REQUIRED));
91   fields.push_back(parquet::schema::Float("float_col", Repetition::REQUIRED));
92   root = parquet::schema::GroupNode::Make("schema", Repetition::REPEATED, fields);
93   schema.Init(root);
94 
95   int64_t nrows = 1000;
96   int32_t int_min = 100, int_max = 200;
97   EncodedStatistics stats_int;
98   stats_int.set_null_count(0)
99       .set_distinct_count(nrows)
100       .set_min(std::string(reinterpret_cast<const char*>(&int_min), 4))
101       .set_max(std::string(reinterpret_cast<const char*>(&int_max), 4));
102   EncodedStatistics stats_float;
103   float float_min = 100.100f, float_max = 200.200f;
104   stats_float.set_null_count(0)
105       .set_distinct_count(nrows)
106       .set_min(std::string(reinterpret_cast<const char*>(&float_min), 4))
107       .set_max(std::string(reinterpret_cast<const char*>(&float_max), 4));
108 
109   // Generate the metadata
110   auto f_accessor = GenerateTableMetaData(schema, props, nrows, stats_int, stats_float);
111 
112   std::string f_accessor_serialized_metadata = f_accessor->SerializeToString();
113   uint32_t expected_len = static_cast<uint32_t>(f_accessor_serialized_metadata.length());
114 
115   // decoded_len is an in-out parameter
116   uint32_t decoded_len = expected_len;
117   auto f_accessor_copy =
118       FileMetaData::Make(f_accessor_serialized_metadata.data(), &decoded_len);
119 
120   // Check that all of the serialized data is consumed
121   ASSERT_EQ(expected_len, decoded_len);
122 
123   // Run this block twice, one for f_accessor, one for f_accessor_copy.
124   // To make sure SerializedMetadata was deserialized correctly.
125   std::vector<FileMetaData*> f_accessors = {f_accessor.get(), f_accessor_copy.get()};
126   for (int loop_index = 0; loop_index < 2; loop_index++) {
127     // file metadata
128     ASSERT_EQ(nrows, f_accessors[loop_index]->num_rows());
129     ASSERT_LE(0, static_cast<int>(f_accessors[loop_index]->size()));
130     ASSERT_EQ(2, f_accessors[loop_index]->num_row_groups());
131     ASSERT_EQ(ParquetVersion::PARQUET_2_6, f_accessors[loop_index]->version());
132     ASSERT_EQ(DEFAULT_CREATED_BY, f_accessors[loop_index]->created_by());
133     ASSERT_EQ(3, f_accessors[loop_index]->num_schema_elements());
134 
135     // row group1 metadata
136     auto rg1_accessor = f_accessors[loop_index]->RowGroup(0);
137     ASSERT_EQ(2, rg1_accessor->num_columns());
138     ASSERT_EQ(nrows / 2, rg1_accessor->num_rows());
139     ASSERT_EQ(1024, rg1_accessor->total_byte_size());
140     ASSERT_EQ(1024, rg1_accessor->total_compressed_size());
141     EXPECT_EQ(rg1_accessor->file_offset(),
142               rg1_accessor->ColumnChunk(0)->dictionary_page_offset());
143 
144     auto rg1_column1 = rg1_accessor->ColumnChunk(0);
145     auto rg1_column2 = rg1_accessor->ColumnChunk(1);
146     ASSERT_EQ(true, rg1_column1->is_stats_set());
147     ASSERT_EQ(true, rg1_column2->is_stats_set());
148     ASSERT_EQ(stats_float.min(), rg1_column2->statistics()->EncodeMin());
149     ASSERT_EQ(stats_float.max(), rg1_column2->statistics()->EncodeMax());
150     ASSERT_EQ(stats_int.min(), rg1_column1->statistics()->EncodeMin());
151     ASSERT_EQ(stats_int.max(), rg1_column1->statistics()->EncodeMax());
152     ASSERT_EQ(0, rg1_column1->statistics()->null_count());
153     ASSERT_EQ(0, rg1_column2->statistics()->null_count());
154     ASSERT_EQ(nrows, rg1_column1->statistics()->distinct_count());
155     ASSERT_EQ(nrows, rg1_column2->statistics()->distinct_count());
156     ASSERT_EQ(DEFAULT_COMPRESSION_TYPE, rg1_column1->compression());
157     ASSERT_EQ(DEFAULT_COMPRESSION_TYPE, rg1_column2->compression());
158     ASSERT_EQ(nrows / 2, rg1_column1->num_values());
159     ASSERT_EQ(nrows / 2, rg1_column2->num_values());
160     ASSERT_EQ(3, rg1_column1->encodings().size());
161     ASSERT_EQ(3, rg1_column2->encodings().size());
162     ASSERT_EQ(512, rg1_column1->total_compressed_size());
163     ASSERT_EQ(512, rg1_column2->total_compressed_size());
164     ASSERT_EQ(600, rg1_column1->total_uncompressed_size());
165     ASSERT_EQ(600, rg1_column2->total_uncompressed_size());
166     ASSERT_EQ(4, rg1_column1->dictionary_page_offset());
167     ASSERT_EQ(24, rg1_column2->dictionary_page_offset());
168     ASSERT_EQ(10, rg1_column1->data_page_offset());
169     ASSERT_EQ(30, rg1_column2->data_page_offset());
170     ASSERT_EQ(3, rg1_column1->encoding_stats().size());
171     ASSERT_EQ(3, rg1_column2->encoding_stats().size());
172 
173     auto rg2_accessor = f_accessors[loop_index]->RowGroup(1);
174     ASSERT_EQ(2, rg2_accessor->num_columns());
175     ASSERT_EQ(nrows / 2, rg2_accessor->num_rows());
176     ASSERT_EQ(1024, rg2_accessor->total_byte_size());
177     ASSERT_EQ(1024, rg2_accessor->total_compressed_size());
178     EXPECT_EQ(rg2_accessor->file_offset(),
179               rg2_accessor->ColumnChunk(0)->data_page_offset());
180 
181     auto rg2_column1 = rg2_accessor->ColumnChunk(0);
182     auto rg2_column2 = rg2_accessor->ColumnChunk(1);
183     ASSERT_EQ(true, rg2_column1->is_stats_set());
184     ASSERT_EQ(true, rg2_column2->is_stats_set());
185     ASSERT_EQ(stats_float.min(), rg2_column2->statistics()->EncodeMin());
186     ASSERT_EQ(stats_float.max(), rg2_column2->statistics()->EncodeMax());
187     ASSERT_EQ(stats_int.min(), rg1_column1->statistics()->EncodeMin());
188     ASSERT_EQ(stats_int.max(), rg1_column1->statistics()->EncodeMax());
189     ASSERT_EQ(0, rg2_column1->statistics()->null_count());
190     ASSERT_EQ(0, rg2_column2->statistics()->null_count());
191     ASSERT_EQ(nrows, rg2_column1->statistics()->distinct_count());
192     ASSERT_EQ(nrows, rg2_column2->statistics()->distinct_count());
193     ASSERT_EQ(nrows / 2, rg2_column1->num_values());
194     ASSERT_EQ(nrows / 2, rg2_column2->num_values());
195     ASSERT_EQ(DEFAULT_COMPRESSION_TYPE, rg2_column1->compression());
196     ASSERT_EQ(DEFAULT_COMPRESSION_TYPE, rg2_column2->compression());
197     ASSERT_EQ(2, rg2_column1->encodings().size());
198     ASSERT_EQ(3, rg2_column2->encodings().size());
199     ASSERT_EQ(512, rg2_column1->total_compressed_size());
200     ASSERT_EQ(512, rg2_column2->total_compressed_size());
201     ASSERT_EQ(600, rg2_column1->total_uncompressed_size());
202     ASSERT_EQ(600, rg2_column2->total_uncompressed_size());
203     EXPECT_FALSE(rg2_column1->has_dictionary_page());
204     ASSERT_EQ(0, rg2_column1->dictionary_page_offset());
205     ASSERT_EQ(16, rg2_column2->dictionary_page_offset());
206     ASSERT_EQ(10, rg2_column1->data_page_offset());
207     ASSERT_EQ(26, rg2_column2->data_page_offset());
208     ASSERT_EQ(2, rg2_column1->encoding_stats().size());
209     ASSERT_EQ(2, rg2_column2->encoding_stats().size());
210 
211     // Test FileMetaData::set_file_path
212     ASSERT_TRUE(rg2_column1->file_path().empty());
213     f_accessors[loop_index]->set_file_path("/foo/bar/bar.parquet");
214     ASSERT_EQ("/foo/bar/bar.parquet", rg2_column1->file_path());
215   }
216 
217   // Test AppendRowGroups
218   auto f_accessor_2 = GenerateTableMetaData(schema, props, nrows, stats_int, stats_float);
219   f_accessor->AppendRowGroups(*f_accessor_2);
220   ASSERT_EQ(4, f_accessor->num_row_groups());
221   ASSERT_EQ(nrows * 2, f_accessor->num_rows());
222   ASSERT_LE(0, static_cast<int>(f_accessor->size()));
223   ASSERT_EQ(ParquetVersion::PARQUET_2_6, f_accessor->version());
224   ASSERT_EQ(DEFAULT_CREATED_BY, f_accessor->created_by());
225   ASSERT_EQ(3, f_accessor->num_schema_elements());
226 
227   // Test AppendRowGroups from self (ARROW-13654)
228   f_accessor->AppendRowGroups(*f_accessor);
229   ASSERT_EQ(8, f_accessor->num_row_groups());
230   ASSERT_EQ(nrows * 4, f_accessor->num_rows());
231   ASSERT_EQ(3, f_accessor->num_schema_elements());
232 
233   // Test Subset
234   auto f_accessor_1 = f_accessor->Subset({2, 3});
235   ASSERT_TRUE(f_accessor_1->Equals(*f_accessor_2));
236 
237   f_accessor_1 = f_accessor_2->Subset({0});
238   f_accessor_1->AppendRowGroups(*f_accessor->Subset({0}));
239   ASSERT_TRUE(f_accessor_1->Equals(*f_accessor->Subset({2, 0})));
240 }
241 
TEST(Metadata,TestV1Version)242 TEST(Metadata, TestV1Version) {
243   // PARQUET-839
244   parquet::schema::NodeVector fields;
245   parquet::schema::NodePtr root;
246   parquet::SchemaDescriptor schema;
247 
248   WriterProperties::Builder prop_builder;
249 
250   std::shared_ptr<WriterProperties> props =
251       prop_builder.version(ParquetVersion::PARQUET_1_0)->build();
252 
253   fields.push_back(parquet::schema::Int32("int_col", Repetition::REQUIRED));
254   fields.push_back(parquet::schema::Float("float_col", Repetition::REQUIRED));
255   root = parquet::schema::GroupNode::Make("schema", Repetition::REPEATED, fields);
256   schema.Init(root);
257 
258   auto f_builder = FileMetaDataBuilder::Make(&schema, props);
259 
260   // Read the metadata
261   auto f_accessor = f_builder->Finish();
262 
263   // file metadata
264   ASSERT_EQ(ParquetVersion::PARQUET_1_0, f_accessor->version());
265 }
266 
TEST(Metadata,TestKeyValueMetadata)267 TEST(Metadata, TestKeyValueMetadata) {
268   parquet::schema::NodeVector fields;
269   parquet::schema::NodePtr root;
270   parquet::SchemaDescriptor schema;
271 
272   WriterProperties::Builder prop_builder;
273 
274   std::shared_ptr<WriterProperties> props =
275       prop_builder.version(ParquetVersion::PARQUET_1_0)->build();
276 
277   fields.push_back(parquet::schema::Int32("int_col", Repetition::REQUIRED));
278   fields.push_back(parquet::schema::Float("float_col", Repetition::REQUIRED));
279   root = parquet::schema::GroupNode::Make("schema", Repetition::REPEATED, fields);
280   schema.Init(root);
281 
282   auto kvmeta = std::make_shared<KeyValueMetadata>();
283   kvmeta->Append("test_key", "test_value");
284 
285   auto f_builder = FileMetaDataBuilder::Make(&schema, props, kvmeta);
286 
287   // Read the metadata
288   auto f_accessor = f_builder->Finish();
289 
290   // Key value metadata
291   ASSERT_TRUE(f_accessor->key_value_metadata());
292   EXPECT_TRUE(f_accessor->key_value_metadata()->Equals(*kvmeta));
293 }
294 
TEST(ApplicationVersion,Basics)295 TEST(ApplicationVersion, Basics) {
296   ApplicationVersion version("parquet-mr version 1.7.9");
297   ApplicationVersion version1("parquet-mr version 1.8.0");
298   ApplicationVersion version2("parquet-cpp version 1.0.0");
299   ApplicationVersion version3("");
300   ApplicationVersion version4("parquet-mr version 1.5.0ab-cdh5.5.0+cd (build abcd)");
301   ApplicationVersion version5("parquet-mr");
302 
303   ASSERT_EQ("parquet-mr", version.application_);
304   ASSERT_EQ(1, version.version.major);
305   ASSERT_EQ(7, version.version.minor);
306   ASSERT_EQ(9, version.version.patch);
307 
308   ASSERT_EQ("parquet-cpp", version2.application_);
309   ASSERT_EQ(1, version2.version.major);
310   ASSERT_EQ(0, version2.version.minor);
311   ASSERT_EQ(0, version2.version.patch);
312 
313   ASSERT_EQ("parquet-mr", version4.application_);
314   ASSERT_EQ("abcd", version4.build_);
315   ASSERT_EQ(1, version4.version.major);
316   ASSERT_EQ(5, version4.version.minor);
317   ASSERT_EQ(0, version4.version.patch);
318   ASSERT_EQ("ab", version4.version.unknown);
319   ASSERT_EQ("cdh5.5.0", version4.version.pre_release);
320   ASSERT_EQ("cd", version4.version.build_info);
321 
322   ASSERT_EQ("parquet-mr", version5.application_);
323   ASSERT_EQ(0, version5.version.major);
324   ASSERT_EQ(0, version5.version.minor);
325   ASSERT_EQ(0, version5.version.patch);
326 
327   ASSERT_EQ(true, version.VersionLt(version1));
328 
329   EncodedStatistics stats;
330   ASSERT_FALSE(version1.HasCorrectStatistics(Type::INT96, stats, SortOrder::UNKNOWN));
331   ASSERT_TRUE(version.HasCorrectStatistics(Type::INT32, stats, SortOrder::SIGNED));
332   ASSERT_FALSE(version.HasCorrectStatistics(Type::BYTE_ARRAY, stats, SortOrder::SIGNED));
333   ASSERT_TRUE(version1.HasCorrectStatistics(Type::BYTE_ARRAY, stats, SortOrder::SIGNED));
334   ASSERT_FALSE(
335       version1.HasCorrectStatistics(Type::BYTE_ARRAY, stats, SortOrder::UNSIGNED));
336   ASSERT_TRUE(version3.HasCorrectStatistics(Type::FIXED_LEN_BYTE_ARRAY, stats,
337                                             SortOrder::SIGNED));
338 
339   // Check that the old stats are correct if min and max are the same
340   // regardless of sort order
341   EncodedStatistics stats_str;
342   stats_str.set_min("a").set_max("b");
343   ASSERT_FALSE(
344       version1.HasCorrectStatistics(Type::BYTE_ARRAY, stats_str, SortOrder::UNSIGNED));
345   stats_str.set_max("a");
346   ASSERT_TRUE(
347       version1.HasCorrectStatistics(Type::BYTE_ARRAY, stats_str, SortOrder::UNSIGNED));
348 
349   // Check that the same holds true for ints
350   int32_t int_min = 100, int_max = 200;
351   EncodedStatistics stats_int;
352   stats_int.set_min(std::string(reinterpret_cast<const char*>(&int_min), 4))
353       .set_max(std::string(reinterpret_cast<const char*>(&int_max), 4));
354   ASSERT_FALSE(
355       version1.HasCorrectStatistics(Type::BYTE_ARRAY, stats_int, SortOrder::UNSIGNED));
356   stats_int.set_max(std::string(reinterpret_cast<const char*>(&int_min), 4));
357   ASSERT_TRUE(
358       version1.HasCorrectStatistics(Type::BYTE_ARRAY, stats_int, SortOrder::UNSIGNED));
359 }
360 
TEST(ApplicationVersion,Empty)361 TEST(ApplicationVersion, Empty) {
362   ApplicationVersion version("");
363 
364   ASSERT_EQ("", version.application_);
365   ASSERT_EQ("", version.build_);
366   ASSERT_EQ(0, version.version.major);
367   ASSERT_EQ(0, version.version.minor);
368   ASSERT_EQ(0, version.version.patch);
369   ASSERT_EQ("", version.version.unknown);
370   ASSERT_EQ("", version.version.pre_release);
371   ASSERT_EQ("", version.version.build_info);
372 }
373 
TEST(ApplicationVersion,NoVersion)374 TEST(ApplicationVersion, NoVersion) {
375   ApplicationVersion version("parquet-mr (build abcd)");
376 
377   ASSERT_EQ("parquet-mr (build abcd)", version.application_);
378   ASSERT_EQ("", version.build_);
379   ASSERT_EQ(0, version.version.major);
380   ASSERT_EQ(0, version.version.minor);
381   ASSERT_EQ(0, version.version.patch);
382   ASSERT_EQ("", version.version.unknown);
383   ASSERT_EQ("", version.version.pre_release);
384   ASSERT_EQ("", version.version.build_info);
385 }
386 
TEST(ApplicationVersion,VersionEmpty)387 TEST(ApplicationVersion, VersionEmpty) {
388   ApplicationVersion version("parquet-mr version ");
389 
390   ASSERT_EQ("parquet-mr", version.application_);
391   ASSERT_EQ("", version.build_);
392   ASSERT_EQ(0, version.version.major);
393   ASSERT_EQ(0, version.version.minor);
394   ASSERT_EQ(0, version.version.patch);
395   ASSERT_EQ("", version.version.unknown);
396   ASSERT_EQ("", version.version.pre_release);
397   ASSERT_EQ("", version.version.build_info);
398 }
399 
TEST(ApplicationVersion,VersionNoMajor)400 TEST(ApplicationVersion, VersionNoMajor) {
401   ApplicationVersion version("parquet-mr version .");
402 
403   ASSERT_EQ("parquet-mr", version.application_);
404   ASSERT_EQ("", version.build_);
405   ASSERT_EQ(0, version.version.major);
406   ASSERT_EQ(0, version.version.minor);
407   ASSERT_EQ(0, version.version.patch);
408   ASSERT_EQ("", version.version.unknown);
409   ASSERT_EQ("", version.version.pre_release);
410   ASSERT_EQ("", version.version.build_info);
411 }
412 
TEST(ApplicationVersion,VersionInvalidMajor)413 TEST(ApplicationVersion, VersionInvalidMajor) {
414   ApplicationVersion version("parquet-mr version x1");
415 
416   ASSERT_EQ("parquet-mr", version.application_);
417   ASSERT_EQ("", version.build_);
418   ASSERT_EQ(0, version.version.major);
419   ASSERT_EQ(0, version.version.minor);
420   ASSERT_EQ(0, version.version.patch);
421   ASSERT_EQ("", version.version.unknown);
422   ASSERT_EQ("", version.version.pre_release);
423   ASSERT_EQ("", version.version.build_info);
424 }
425 
TEST(ApplicationVersion,VersionMajorOnly)426 TEST(ApplicationVersion, VersionMajorOnly) {
427   ApplicationVersion version("parquet-mr version 1");
428 
429   ASSERT_EQ("parquet-mr", version.application_);
430   ASSERT_EQ("", version.build_);
431   ASSERT_EQ(1, version.version.major);
432   ASSERT_EQ(0, version.version.minor);
433   ASSERT_EQ(0, version.version.patch);
434   ASSERT_EQ("", version.version.unknown);
435   ASSERT_EQ("", version.version.pre_release);
436   ASSERT_EQ("", version.version.build_info);
437 }
438 
TEST(ApplicationVersion,VersionNoMinor)439 TEST(ApplicationVersion, VersionNoMinor) {
440   ApplicationVersion version("parquet-mr version 1.");
441 
442   ASSERT_EQ("parquet-mr", version.application_);
443   ASSERT_EQ("", version.build_);
444   ASSERT_EQ(1, version.version.major);
445   ASSERT_EQ(0, version.version.minor);
446   ASSERT_EQ(0, version.version.patch);
447   ASSERT_EQ("", version.version.unknown);
448   ASSERT_EQ("", version.version.pre_release);
449   ASSERT_EQ("", version.version.build_info);
450 }
451 
TEST(ApplicationVersion,VersionMajorMinorOnly)452 TEST(ApplicationVersion, VersionMajorMinorOnly) {
453   ApplicationVersion version("parquet-mr version 1.7");
454 
455   ASSERT_EQ("parquet-mr", version.application_);
456   ASSERT_EQ("", version.build_);
457   ASSERT_EQ(1, version.version.major);
458   ASSERT_EQ(7, version.version.minor);
459   ASSERT_EQ(0, version.version.patch);
460   ASSERT_EQ("", version.version.unknown);
461   ASSERT_EQ("", version.version.pre_release);
462   ASSERT_EQ("", version.version.build_info);
463 }
464 
TEST(ApplicationVersion,VersionInvalidMinor)465 TEST(ApplicationVersion, VersionInvalidMinor) {
466   ApplicationVersion version("parquet-mr version 1.x7");
467 
468   ASSERT_EQ("parquet-mr", version.application_);
469   ASSERT_EQ("", version.build_);
470   ASSERT_EQ(1, version.version.major);
471   ASSERT_EQ(0, version.version.minor);
472   ASSERT_EQ(0, version.version.patch);
473   ASSERT_EQ("", version.version.unknown);
474   ASSERT_EQ("", version.version.pre_release);
475   ASSERT_EQ("", version.version.build_info);
476 }
477 
TEST(ApplicationVersion,VersionNoPatch)478 TEST(ApplicationVersion, VersionNoPatch) {
479   ApplicationVersion version("parquet-mr version 1.7.");
480 
481   ASSERT_EQ("parquet-mr", version.application_);
482   ASSERT_EQ("", version.build_);
483   ASSERT_EQ(1, version.version.major);
484   ASSERT_EQ(7, version.version.minor);
485   ASSERT_EQ(0, version.version.patch);
486   ASSERT_EQ("", version.version.unknown);
487   ASSERT_EQ("", version.version.pre_release);
488   ASSERT_EQ("", version.version.build_info);
489 }
490 
TEST(ApplicationVersion,VersionInvalidPatch)491 TEST(ApplicationVersion, VersionInvalidPatch) {
492   ApplicationVersion version("parquet-mr version 1.7.x9");
493 
494   ASSERT_EQ("parquet-mr", version.application_);
495   ASSERT_EQ("", version.build_);
496   ASSERT_EQ(1, version.version.major);
497   ASSERT_EQ(7, version.version.minor);
498   ASSERT_EQ(0, version.version.patch);
499   ASSERT_EQ("", version.version.unknown);
500   ASSERT_EQ("", version.version.pre_release);
501   ASSERT_EQ("", version.version.build_info);
502 }
503 
TEST(ApplicationVersion,VersionNoUnknown)504 TEST(ApplicationVersion, VersionNoUnknown) {
505   ApplicationVersion version("parquet-mr version 1.7.9-cdh5.5.0+cd");
506 
507   ASSERT_EQ("parquet-mr", version.application_);
508   ASSERT_EQ("", version.build_);
509   ASSERT_EQ(1, version.version.major);
510   ASSERT_EQ(7, version.version.minor);
511   ASSERT_EQ(9, version.version.patch);
512   ASSERT_EQ("", version.version.unknown);
513   ASSERT_EQ("cdh5.5.0", version.version.pre_release);
514   ASSERT_EQ("cd", version.version.build_info);
515 }
516 
TEST(ApplicationVersion,VersionNoPreRelease)517 TEST(ApplicationVersion, VersionNoPreRelease) {
518   ApplicationVersion version("parquet-mr version 1.7.9ab+cd");
519 
520   ASSERT_EQ("parquet-mr", version.application_);
521   ASSERT_EQ("", version.build_);
522   ASSERT_EQ(1, version.version.major);
523   ASSERT_EQ(7, version.version.minor);
524   ASSERT_EQ(9, version.version.patch);
525   ASSERT_EQ("ab", version.version.unknown);
526   ASSERT_EQ("", version.version.pre_release);
527   ASSERT_EQ("cd", version.version.build_info);
528 }
529 
TEST(ApplicationVersion,VersionNoUnknownNoPreRelease)530 TEST(ApplicationVersion, VersionNoUnknownNoPreRelease) {
531   ApplicationVersion version("parquet-mr version 1.7.9+cd");
532 
533   ASSERT_EQ("parquet-mr", version.application_);
534   ASSERT_EQ("", version.build_);
535   ASSERT_EQ(1, version.version.major);
536   ASSERT_EQ(7, version.version.minor);
537   ASSERT_EQ(9, version.version.patch);
538   ASSERT_EQ("", version.version.unknown);
539   ASSERT_EQ("", version.version.pre_release);
540   ASSERT_EQ("cd", version.version.build_info);
541 }
542 
TEST(ApplicationVersion,VersionNoUnknownBuildInfoPreRelease)543 TEST(ApplicationVersion, VersionNoUnknownBuildInfoPreRelease) {
544   ApplicationVersion version("parquet-mr version 1.7.9+cd-cdh5.5.0");
545 
546   ASSERT_EQ("parquet-mr", version.application_);
547   ASSERT_EQ("", version.build_);
548   ASSERT_EQ(1, version.version.major);
549   ASSERT_EQ(7, version.version.minor);
550   ASSERT_EQ(9, version.version.patch);
551   ASSERT_EQ("", version.version.unknown);
552   ASSERT_EQ("", version.version.pre_release);
553   ASSERT_EQ("cd-cdh5.5.0", version.version.build_info);
554 }
555 
TEST(ApplicationVersion,FullWithSpaces)556 TEST(ApplicationVersion, FullWithSpaces) {
557   ApplicationVersion version(
558       " parquet-mr \t version \v 1.5.3ab-cdh5.5.0+cd \r (build \n abcd \f) ");
559 
560   ASSERT_EQ("parquet-mr", version.application_);
561   ASSERT_EQ("abcd", version.build_);
562   ASSERT_EQ(1, version.version.major);
563   ASSERT_EQ(5, version.version.minor);
564   ASSERT_EQ(3, version.version.patch);
565   ASSERT_EQ("ab", version.version.unknown);
566   ASSERT_EQ("cdh5.5.0", version.version.pre_release);
567   ASSERT_EQ("cd", version.version.build_info);
568 }
569 
570 }  // namespace metadata
571 }  // namespace parquet
572