1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include "parquet/metadata.h"
19
20 #include <gtest/gtest.h>
21
22 #include "arrow/util/key_value_metadata.h"
23 #include "parquet/schema.h"
24 #include "parquet/statistics.h"
25 #include "parquet/thrift_internal.h"
26 #include "parquet/types.h"
27
28 namespace parquet {
29
30 namespace metadata {
31
32 // Helper function for generating table metadata
GenerateTableMetaData(const parquet::SchemaDescriptor & schema,const std::shared_ptr<WriterProperties> & props,const int64_t & nrows,EncodedStatistics stats_int,EncodedStatistics stats_float)33 std::unique_ptr<parquet::FileMetaData> GenerateTableMetaData(
34 const parquet::SchemaDescriptor& schema,
35 const std::shared_ptr<WriterProperties>& props, const int64_t& nrows,
36 EncodedStatistics stats_int, EncodedStatistics stats_float) {
37 auto f_builder = FileMetaDataBuilder::Make(&schema, props);
38 auto rg1_builder = f_builder->AppendRowGroup();
39 // Write the metadata
40 // rowgroup1 metadata
41 auto col1_builder = rg1_builder->NextColumnChunk();
42 auto col2_builder = rg1_builder->NextColumnChunk();
43 // column metadata
44 std::map<Encoding::type, int32_t> dict_encoding_stats({{Encoding::RLE_DICTIONARY, 1}});
45 std::map<Encoding::type, int32_t> data_encoding_stats(
46 {{Encoding::PLAIN, 1}, {Encoding::RLE, 1}});
47 stats_int.set_is_signed(true);
48 col1_builder->SetStatistics(stats_int);
49 stats_float.set_is_signed(true);
50 col2_builder->SetStatistics(stats_float);
51 col1_builder->Finish(nrows / 2, 4, 0, 10, 512, 600, true, false, dict_encoding_stats,
52 data_encoding_stats);
53 col2_builder->Finish(nrows / 2, 24, 0, 30, 512, 600, true, false, dict_encoding_stats,
54 data_encoding_stats);
55
56 rg1_builder->set_num_rows(nrows / 2);
57 rg1_builder->Finish(1024);
58
59 // rowgroup2 metadata
60 auto rg2_builder = f_builder->AppendRowGroup();
61 col1_builder = rg2_builder->NextColumnChunk();
62 col2_builder = rg2_builder->NextColumnChunk();
63 // column metadata
64 col1_builder->SetStatistics(stats_int);
65 col2_builder->SetStatistics(stats_float);
66 dict_encoding_stats.clear();
67 col1_builder->Finish(nrows / 2, /*dictionary_page_offset=*/0, 0, 10, 512, 600,
68 /*has_dictionary=*/false, false, dict_encoding_stats,
69 data_encoding_stats);
70 col2_builder->Finish(nrows / 2, 16, 0, 26, 512, 600, true, false, dict_encoding_stats,
71 data_encoding_stats);
72
73 rg2_builder->set_num_rows(nrows / 2);
74 rg2_builder->Finish(1024);
75
76 // Return the metadata accessor
77 return f_builder->Finish();
78 }
79
TEST(Metadata,TestBuildAccess)80 TEST(Metadata, TestBuildAccess) {
81 parquet::schema::NodeVector fields;
82 parquet::schema::NodePtr root;
83 parquet::SchemaDescriptor schema;
84
85 WriterProperties::Builder prop_builder;
86
87 std::shared_ptr<WriterProperties> props =
88 prop_builder.version(ParquetVersion::PARQUET_2_6)->build();
89
90 fields.push_back(parquet::schema::Int32("int_col", Repetition::REQUIRED));
91 fields.push_back(parquet::schema::Float("float_col", Repetition::REQUIRED));
92 root = parquet::schema::GroupNode::Make("schema", Repetition::REPEATED, fields);
93 schema.Init(root);
94
95 int64_t nrows = 1000;
96 int32_t int_min = 100, int_max = 200;
97 EncodedStatistics stats_int;
98 stats_int.set_null_count(0)
99 .set_distinct_count(nrows)
100 .set_min(std::string(reinterpret_cast<const char*>(&int_min), 4))
101 .set_max(std::string(reinterpret_cast<const char*>(&int_max), 4));
102 EncodedStatistics stats_float;
103 float float_min = 100.100f, float_max = 200.200f;
104 stats_float.set_null_count(0)
105 .set_distinct_count(nrows)
106 .set_min(std::string(reinterpret_cast<const char*>(&float_min), 4))
107 .set_max(std::string(reinterpret_cast<const char*>(&float_max), 4));
108
109 // Generate the metadata
110 auto f_accessor = GenerateTableMetaData(schema, props, nrows, stats_int, stats_float);
111
112 std::string f_accessor_serialized_metadata = f_accessor->SerializeToString();
113 uint32_t expected_len = static_cast<uint32_t>(f_accessor_serialized_metadata.length());
114
115 // decoded_len is an in-out parameter
116 uint32_t decoded_len = expected_len;
117 auto f_accessor_copy =
118 FileMetaData::Make(f_accessor_serialized_metadata.data(), &decoded_len);
119
120 // Check that all of the serialized data is consumed
121 ASSERT_EQ(expected_len, decoded_len);
122
123 // Run this block twice, one for f_accessor, one for f_accessor_copy.
124 // To make sure SerializedMetadata was deserialized correctly.
125 std::vector<FileMetaData*> f_accessors = {f_accessor.get(), f_accessor_copy.get()};
126 for (int loop_index = 0; loop_index < 2; loop_index++) {
127 // file metadata
128 ASSERT_EQ(nrows, f_accessors[loop_index]->num_rows());
129 ASSERT_LE(0, static_cast<int>(f_accessors[loop_index]->size()));
130 ASSERT_EQ(2, f_accessors[loop_index]->num_row_groups());
131 ASSERT_EQ(ParquetVersion::PARQUET_2_6, f_accessors[loop_index]->version());
132 ASSERT_EQ(DEFAULT_CREATED_BY, f_accessors[loop_index]->created_by());
133 ASSERT_EQ(3, f_accessors[loop_index]->num_schema_elements());
134
135 // row group1 metadata
136 auto rg1_accessor = f_accessors[loop_index]->RowGroup(0);
137 ASSERT_EQ(2, rg1_accessor->num_columns());
138 ASSERT_EQ(nrows / 2, rg1_accessor->num_rows());
139 ASSERT_EQ(1024, rg1_accessor->total_byte_size());
140 ASSERT_EQ(1024, rg1_accessor->total_compressed_size());
141 EXPECT_EQ(rg1_accessor->file_offset(),
142 rg1_accessor->ColumnChunk(0)->dictionary_page_offset());
143
144 auto rg1_column1 = rg1_accessor->ColumnChunk(0);
145 auto rg1_column2 = rg1_accessor->ColumnChunk(1);
146 ASSERT_EQ(true, rg1_column1->is_stats_set());
147 ASSERT_EQ(true, rg1_column2->is_stats_set());
148 ASSERT_EQ(stats_float.min(), rg1_column2->statistics()->EncodeMin());
149 ASSERT_EQ(stats_float.max(), rg1_column2->statistics()->EncodeMax());
150 ASSERT_EQ(stats_int.min(), rg1_column1->statistics()->EncodeMin());
151 ASSERT_EQ(stats_int.max(), rg1_column1->statistics()->EncodeMax());
152 ASSERT_EQ(0, rg1_column1->statistics()->null_count());
153 ASSERT_EQ(0, rg1_column2->statistics()->null_count());
154 ASSERT_EQ(nrows, rg1_column1->statistics()->distinct_count());
155 ASSERT_EQ(nrows, rg1_column2->statistics()->distinct_count());
156 ASSERT_EQ(DEFAULT_COMPRESSION_TYPE, rg1_column1->compression());
157 ASSERT_EQ(DEFAULT_COMPRESSION_TYPE, rg1_column2->compression());
158 ASSERT_EQ(nrows / 2, rg1_column1->num_values());
159 ASSERT_EQ(nrows / 2, rg1_column2->num_values());
160 ASSERT_EQ(3, rg1_column1->encodings().size());
161 ASSERT_EQ(3, rg1_column2->encodings().size());
162 ASSERT_EQ(512, rg1_column1->total_compressed_size());
163 ASSERT_EQ(512, rg1_column2->total_compressed_size());
164 ASSERT_EQ(600, rg1_column1->total_uncompressed_size());
165 ASSERT_EQ(600, rg1_column2->total_uncompressed_size());
166 ASSERT_EQ(4, rg1_column1->dictionary_page_offset());
167 ASSERT_EQ(24, rg1_column2->dictionary_page_offset());
168 ASSERT_EQ(10, rg1_column1->data_page_offset());
169 ASSERT_EQ(30, rg1_column2->data_page_offset());
170 ASSERT_EQ(3, rg1_column1->encoding_stats().size());
171 ASSERT_EQ(3, rg1_column2->encoding_stats().size());
172
173 auto rg2_accessor = f_accessors[loop_index]->RowGroup(1);
174 ASSERT_EQ(2, rg2_accessor->num_columns());
175 ASSERT_EQ(nrows / 2, rg2_accessor->num_rows());
176 ASSERT_EQ(1024, rg2_accessor->total_byte_size());
177 ASSERT_EQ(1024, rg2_accessor->total_compressed_size());
178 EXPECT_EQ(rg2_accessor->file_offset(),
179 rg2_accessor->ColumnChunk(0)->data_page_offset());
180
181 auto rg2_column1 = rg2_accessor->ColumnChunk(0);
182 auto rg2_column2 = rg2_accessor->ColumnChunk(1);
183 ASSERT_EQ(true, rg2_column1->is_stats_set());
184 ASSERT_EQ(true, rg2_column2->is_stats_set());
185 ASSERT_EQ(stats_float.min(), rg2_column2->statistics()->EncodeMin());
186 ASSERT_EQ(stats_float.max(), rg2_column2->statistics()->EncodeMax());
187 ASSERT_EQ(stats_int.min(), rg1_column1->statistics()->EncodeMin());
188 ASSERT_EQ(stats_int.max(), rg1_column1->statistics()->EncodeMax());
189 ASSERT_EQ(0, rg2_column1->statistics()->null_count());
190 ASSERT_EQ(0, rg2_column2->statistics()->null_count());
191 ASSERT_EQ(nrows, rg2_column1->statistics()->distinct_count());
192 ASSERT_EQ(nrows, rg2_column2->statistics()->distinct_count());
193 ASSERT_EQ(nrows / 2, rg2_column1->num_values());
194 ASSERT_EQ(nrows / 2, rg2_column2->num_values());
195 ASSERT_EQ(DEFAULT_COMPRESSION_TYPE, rg2_column1->compression());
196 ASSERT_EQ(DEFAULT_COMPRESSION_TYPE, rg2_column2->compression());
197 ASSERT_EQ(2, rg2_column1->encodings().size());
198 ASSERT_EQ(3, rg2_column2->encodings().size());
199 ASSERT_EQ(512, rg2_column1->total_compressed_size());
200 ASSERT_EQ(512, rg2_column2->total_compressed_size());
201 ASSERT_EQ(600, rg2_column1->total_uncompressed_size());
202 ASSERT_EQ(600, rg2_column2->total_uncompressed_size());
203 EXPECT_FALSE(rg2_column1->has_dictionary_page());
204 ASSERT_EQ(0, rg2_column1->dictionary_page_offset());
205 ASSERT_EQ(16, rg2_column2->dictionary_page_offset());
206 ASSERT_EQ(10, rg2_column1->data_page_offset());
207 ASSERT_EQ(26, rg2_column2->data_page_offset());
208 ASSERT_EQ(2, rg2_column1->encoding_stats().size());
209 ASSERT_EQ(2, rg2_column2->encoding_stats().size());
210
211 // Test FileMetaData::set_file_path
212 ASSERT_TRUE(rg2_column1->file_path().empty());
213 f_accessors[loop_index]->set_file_path("/foo/bar/bar.parquet");
214 ASSERT_EQ("/foo/bar/bar.parquet", rg2_column1->file_path());
215 }
216
217 // Test AppendRowGroups
218 auto f_accessor_2 = GenerateTableMetaData(schema, props, nrows, stats_int, stats_float);
219 f_accessor->AppendRowGroups(*f_accessor_2);
220 ASSERT_EQ(4, f_accessor->num_row_groups());
221 ASSERT_EQ(nrows * 2, f_accessor->num_rows());
222 ASSERT_LE(0, static_cast<int>(f_accessor->size()));
223 ASSERT_EQ(ParquetVersion::PARQUET_2_6, f_accessor->version());
224 ASSERT_EQ(DEFAULT_CREATED_BY, f_accessor->created_by());
225 ASSERT_EQ(3, f_accessor->num_schema_elements());
226
227 // Test AppendRowGroups from self (ARROW-13654)
228 f_accessor->AppendRowGroups(*f_accessor);
229 ASSERT_EQ(8, f_accessor->num_row_groups());
230 ASSERT_EQ(nrows * 4, f_accessor->num_rows());
231 ASSERT_EQ(3, f_accessor->num_schema_elements());
232
233 // Test Subset
234 auto f_accessor_1 = f_accessor->Subset({2, 3});
235 ASSERT_TRUE(f_accessor_1->Equals(*f_accessor_2));
236
237 f_accessor_1 = f_accessor_2->Subset({0});
238 f_accessor_1->AppendRowGroups(*f_accessor->Subset({0}));
239 ASSERT_TRUE(f_accessor_1->Equals(*f_accessor->Subset({2, 0})));
240 }
241
TEST(Metadata,TestV1Version)242 TEST(Metadata, TestV1Version) {
243 // PARQUET-839
244 parquet::schema::NodeVector fields;
245 parquet::schema::NodePtr root;
246 parquet::SchemaDescriptor schema;
247
248 WriterProperties::Builder prop_builder;
249
250 std::shared_ptr<WriterProperties> props =
251 prop_builder.version(ParquetVersion::PARQUET_1_0)->build();
252
253 fields.push_back(parquet::schema::Int32("int_col", Repetition::REQUIRED));
254 fields.push_back(parquet::schema::Float("float_col", Repetition::REQUIRED));
255 root = parquet::schema::GroupNode::Make("schema", Repetition::REPEATED, fields);
256 schema.Init(root);
257
258 auto f_builder = FileMetaDataBuilder::Make(&schema, props);
259
260 // Read the metadata
261 auto f_accessor = f_builder->Finish();
262
263 // file metadata
264 ASSERT_EQ(ParquetVersion::PARQUET_1_0, f_accessor->version());
265 }
266
TEST(Metadata,TestKeyValueMetadata)267 TEST(Metadata, TestKeyValueMetadata) {
268 parquet::schema::NodeVector fields;
269 parquet::schema::NodePtr root;
270 parquet::SchemaDescriptor schema;
271
272 WriterProperties::Builder prop_builder;
273
274 std::shared_ptr<WriterProperties> props =
275 prop_builder.version(ParquetVersion::PARQUET_1_0)->build();
276
277 fields.push_back(parquet::schema::Int32("int_col", Repetition::REQUIRED));
278 fields.push_back(parquet::schema::Float("float_col", Repetition::REQUIRED));
279 root = parquet::schema::GroupNode::Make("schema", Repetition::REPEATED, fields);
280 schema.Init(root);
281
282 auto kvmeta = std::make_shared<KeyValueMetadata>();
283 kvmeta->Append("test_key", "test_value");
284
285 auto f_builder = FileMetaDataBuilder::Make(&schema, props, kvmeta);
286
287 // Read the metadata
288 auto f_accessor = f_builder->Finish();
289
290 // Key value metadata
291 ASSERT_TRUE(f_accessor->key_value_metadata());
292 EXPECT_TRUE(f_accessor->key_value_metadata()->Equals(*kvmeta));
293 }
294
TEST(ApplicationVersion,Basics)295 TEST(ApplicationVersion, Basics) {
296 ApplicationVersion version("parquet-mr version 1.7.9");
297 ApplicationVersion version1("parquet-mr version 1.8.0");
298 ApplicationVersion version2("parquet-cpp version 1.0.0");
299 ApplicationVersion version3("");
300 ApplicationVersion version4("parquet-mr version 1.5.0ab-cdh5.5.0+cd (build abcd)");
301 ApplicationVersion version5("parquet-mr");
302
303 ASSERT_EQ("parquet-mr", version.application_);
304 ASSERT_EQ(1, version.version.major);
305 ASSERT_EQ(7, version.version.minor);
306 ASSERT_EQ(9, version.version.patch);
307
308 ASSERT_EQ("parquet-cpp", version2.application_);
309 ASSERT_EQ(1, version2.version.major);
310 ASSERT_EQ(0, version2.version.minor);
311 ASSERT_EQ(0, version2.version.patch);
312
313 ASSERT_EQ("parquet-mr", version4.application_);
314 ASSERT_EQ("abcd", version4.build_);
315 ASSERT_EQ(1, version4.version.major);
316 ASSERT_EQ(5, version4.version.minor);
317 ASSERT_EQ(0, version4.version.patch);
318 ASSERT_EQ("ab", version4.version.unknown);
319 ASSERT_EQ("cdh5.5.0", version4.version.pre_release);
320 ASSERT_EQ("cd", version4.version.build_info);
321
322 ASSERT_EQ("parquet-mr", version5.application_);
323 ASSERT_EQ(0, version5.version.major);
324 ASSERT_EQ(0, version5.version.minor);
325 ASSERT_EQ(0, version5.version.patch);
326
327 ASSERT_EQ(true, version.VersionLt(version1));
328
329 EncodedStatistics stats;
330 ASSERT_FALSE(version1.HasCorrectStatistics(Type::INT96, stats, SortOrder::UNKNOWN));
331 ASSERT_TRUE(version.HasCorrectStatistics(Type::INT32, stats, SortOrder::SIGNED));
332 ASSERT_FALSE(version.HasCorrectStatistics(Type::BYTE_ARRAY, stats, SortOrder::SIGNED));
333 ASSERT_TRUE(version1.HasCorrectStatistics(Type::BYTE_ARRAY, stats, SortOrder::SIGNED));
334 ASSERT_FALSE(
335 version1.HasCorrectStatistics(Type::BYTE_ARRAY, stats, SortOrder::UNSIGNED));
336 ASSERT_TRUE(version3.HasCorrectStatistics(Type::FIXED_LEN_BYTE_ARRAY, stats,
337 SortOrder::SIGNED));
338
339 // Check that the old stats are correct if min and max are the same
340 // regardless of sort order
341 EncodedStatistics stats_str;
342 stats_str.set_min("a").set_max("b");
343 ASSERT_FALSE(
344 version1.HasCorrectStatistics(Type::BYTE_ARRAY, stats_str, SortOrder::UNSIGNED));
345 stats_str.set_max("a");
346 ASSERT_TRUE(
347 version1.HasCorrectStatistics(Type::BYTE_ARRAY, stats_str, SortOrder::UNSIGNED));
348
349 // Check that the same holds true for ints
350 int32_t int_min = 100, int_max = 200;
351 EncodedStatistics stats_int;
352 stats_int.set_min(std::string(reinterpret_cast<const char*>(&int_min), 4))
353 .set_max(std::string(reinterpret_cast<const char*>(&int_max), 4));
354 ASSERT_FALSE(
355 version1.HasCorrectStatistics(Type::BYTE_ARRAY, stats_int, SortOrder::UNSIGNED));
356 stats_int.set_max(std::string(reinterpret_cast<const char*>(&int_min), 4));
357 ASSERT_TRUE(
358 version1.HasCorrectStatistics(Type::BYTE_ARRAY, stats_int, SortOrder::UNSIGNED));
359 }
360
TEST(ApplicationVersion,Empty)361 TEST(ApplicationVersion, Empty) {
362 ApplicationVersion version("");
363
364 ASSERT_EQ("", version.application_);
365 ASSERT_EQ("", version.build_);
366 ASSERT_EQ(0, version.version.major);
367 ASSERT_EQ(0, version.version.minor);
368 ASSERT_EQ(0, version.version.patch);
369 ASSERT_EQ("", version.version.unknown);
370 ASSERT_EQ("", version.version.pre_release);
371 ASSERT_EQ("", version.version.build_info);
372 }
373
TEST(ApplicationVersion,NoVersion)374 TEST(ApplicationVersion, NoVersion) {
375 ApplicationVersion version("parquet-mr (build abcd)");
376
377 ASSERT_EQ("parquet-mr (build abcd)", version.application_);
378 ASSERT_EQ("", version.build_);
379 ASSERT_EQ(0, version.version.major);
380 ASSERT_EQ(0, version.version.minor);
381 ASSERT_EQ(0, version.version.patch);
382 ASSERT_EQ("", version.version.unknown);
383 ASSERT_EQ("", version.version.pre_release);
384 ASSERT_EQ("", version.version.build_info);
385 }
386
TEST(ApplicationVersion,VersionEmpty)387 TEST(ApplicationVersion, VersionEmpty) {
388 ApplicationVersion version("parquet-mr version ");
389
390 ASSERT_EQ("parquet-mr", version.application_);
391 ASSERT_EQ("", version.build_);
392 ASSERT_EQ(0, version.version.major);
393 ASSERT_EQ(0, version.version.minor);
394 ASSERT_EQ(0, version.version.patch);
395 ASSERT_EQ("", version.version.unknown);
396 ASSERT_EQ("", version.version.pre_release);
397 ASSERT_EQ("", version.version.build_info);
398 }
399
TEST(ApplicationVersion,VersionNoMajor)400 TEST(ApplicationVersion, VersionNoMajor) {
401 ApplicationVersion version("parquet-mr version .");
402
403 ASSERT_EQ("parquet-mr", version.application_);
404 ASSERT_EQ("", version.build_);
405 ASSERT_EQ(0, version.version.major);
406 ASSERT_EQ(0, version.version.minor);
407 ASSERT_EQ(0, version.version.patch);
408 ASSERT_EQ("", version.version.unknown);
409 ASSERT_EQ("", version.version.pre_release);
410 ASSERT_EQ("", version.version.build_info);
411 }
412
TEST(ApplicationVersion,VersionInvalidMajor)413 TEST(ApplicationVersion, VersionInvalidMajor) {
414 ApplicationVersion version("parquet-mr version x1");
415
416 ASSERT_EQ("parquet-mr", version.application_);
417 ASSERT_EQ("", version.build_);
418 ASSERT_EQ(0, version.version.major);
419 ASSERT_EQ(0, version.version.minor);
420 ASSERT_EQ(0, version.version.patch);
421 ASSERT_EQ("", version.version.unknown);
422 ASSERT_EQ("", version.version.pre_release);
423 ASSERT_EQ("", version.version.build_info);
424 }
425
TEST(ApplicationVersion,VersionMajorOnly)426 TEST(ApplicationVersion, VersionMajorOnly) {
427 ApplicationVersion version("parquet-mr version 1");
428
429 ASSERT_EQ("parquet-mr", version.application_);
430 ASSERT_EQ("", version.build_);
431 ASSERT_EQ(1, version.version.major);
432 ASSERT_EQ(0, version.version.minor);
433 ASSERT_EQ(0, version.version.patch);
434 ASSERT_EQ("", version.version.unknown);
435 ASSERT_EQ("", version.version.pre_release);
436 ASSERT_EQ("", version.version.build_info);
437 }
438
TEST(ApplicationVersion,VersionNoMinor)439 TEST(ApplicationVersion, VersionNoMinor) {
440 ApplicationVersion version("parquet-mr version 1.");
441
442 ASSERT_EQ("parquet-mr", version.application_);
443 ASSERT_EQ("", version.build_);
444 ASSERT_EQ(1, version.version.major);
445 ASSERT_EQ(0, version.version.minor);
446 ASSERT_EQ(0, version.version.patch);
447 ASSERT_EQ("", version.version.unknown);
448 ASSERT_EQ("", version.version.pre_release);
449 ASSERT_EQ("", version.version.build_info);
450 }
451
TEST(ApplicationVersion,VersionMajorMinorOnly)452 TEST(ApplicationVersion, VersionMajorMinorOnly) {
453 ApplicationVersion version("parquet-mr version 1.7");
454
455 ASSERT_EQ("parquet-mr", version.application_);
456 ASSERT_EQ("", version.build_);
457 ASSERT_EQ(1, version.version.major);
458 ASSERT_EQ(7, version.version.minor);
459 ASSERT_EQ(0, version.version.patch);
460 ASSERT_EQ("", version.version.unknown);
461 ASSERT_EQ("", version.version.pre_release);
462 ASSERT_EQ("", version.version.build_info);
463 }
464
TEST(ApplicationVersion,VersionInvalidMinor)465 TEST(ApplicationVersion, VersionInvalidMinor) {
466 ApplicationVersion version("parquet-mr version 1.x7");
467
468 ASSERT_EQ("parquet-mr", version.application_);
469 ASSERT_EQ("", version.build_);
470 ASSERT_EQ(1, version.version.major);
471 ASSERT_EQ(0, version.version.minor);
472 ASSERT_EQ(0, version.version.patch);
473 ASSERT_EQ("", version.version.unknown);
474 ASSERT_EQ("", version.version.pre_release);
475 ASSERT_EQ("", version.version.build_info);
476 }
477
TEST(ApplicationVersion,VersionNoPatch)478 TEST(ApplicationVersion, VersionNoPatch) {
479 ApplicationVersion version("parquet-mr version 1.7.");
480
481 ASSERT_EQ("parquet-mr", version.application_);
482 ASSERT_EQ("", version.build_);
483 ASSERT_EQ(1, version.version.major);
484 ASSERT_EQ(7, version.version.minor);
485 ASSERT_EQ(0, version.version.patch);
486 ASSERT_EQ("", version.version.unknown);
487 ASSERT_EQ("", version.version.pre_release);
488 ASSERT_EQ("", version.version.build_info);
489 }
490
TEST(ApplicationVersion,VersionInvalidPatch)491 TEST(ApplicationVersion, VersionInvalidPatch) {
492 ApplicationVersion version("parquet-mr version 1.7.x9");
493
494 ASSERT_EQ("parquet-mr", version.application_);
495 ASSERT_EQ("", version.build_);
496 ASSERT_EQ(1, version.version.major);
497 ASSERT_EQ(7, version.version.minor);
498 ASSERT_EQ(0, version.version.patch);
499 ASSERT_EQ("", version.version.unknown);
500 ASSERT_EQ("", version.version.pre_release);
501 ASSERT_EQ("", version.version.build_info);
502 }
503
TEST(ApplicationVersion,VersionNoUnknown)504 TEST(ApplicationVersion, VersionNoUnknown) {
505 ApplicationVersion version("parquet-mr version 1.7.9-cdh5.5.0+cd");
506
507 ASSERT_EQ("parquet-mr", version.application_);
508 ASSERT_EQ("", version.build_);
509 ASSERT_EQ(1, version.version.major);
510 ASSERT_EQ(7, version.version.minor);
511 ASSERT_EQ(9, version.version.patch);
512 ASSERT_EQ("", version.version.unknown);
513 ASSERT_EQ("cdh5.5.0", version.version.pre_release);
514 ASSERT_EQ("cd", version.version.build_info);
515 }
516
TEST(ApplicationVersion,VersionNoPreRelease)517 TEST(ApplicationVersion, VersionNoPreRelease) {
518 ApplicationVersion version("parquet-mr version 1.7.9ab+cd");
519
520 ASSERT_EQ("parquet-mr", version.application_);
521 ASSERT_EQ("", version.build_);
522 ASSERT_EQ(1, version.version.major);
523 ASSERT_EQ(7, version.version.minor);
524 ASSERT_EQ(9, version.version.patch);
525 ASSERT_EQ("ab", version.version.unknown);
526 ASSERT_EQ("", version.version.pre_release);
527 ASSERT_EQ("cd", version.version.build_info);
528 }
529
TEST(ApplicationVersion,VersionNoUnknownNoPreRelease)530 TEST(ApplicationVersion, VersionNoUnknownNoPreRelease) {
531 ApplicationVersion version("parquet-mr version 1.7.9+cd");
532
533 ASSERT_EQ("parquet-mr", version.application_);
534 ASSERT_EQ("", version.build_);
535 ASSERT_EQ(1, version.version.major);
536 ASSERT_EQ(7, version.version.minor);
537 ASSERT_EQ(9, version.version.patch);
538 ASSERT_EQ("", version.version.unknown);
539 ASSERT_EQ("", version.version.pre_release);
540 ASSERT_EQ("cd", version.version.build_info);
541 }
542
TEST(ApplicationVersion,VersionNoUnknownBuildInfoPreRelease)543 TEST(ApplicationVersion, VersionNoUnknownBuildInfoPreRelease) {
544 ApplicationVersion version("parquet-mr version 1.7.9+cd-cdh5.5.0");
545
546 ASSERT_EQ("parquet-mr", version.application_);
547 ASSERT_EQ("", version.build_);
548 ASSERT_EQ(1, version.version.major);
549 ASSERT_EQ(7, version.version.minor);
550 ASSERT_EQ(9, version.version.patch);
551 ASSERT_EQ("", version.version.unknown);
552 ASSERT_EQ("", version.version.pre_release);
553 ASSERT_EQ("cd-cdh5.5.0", version.version.build_info);
554 }
555
TEST(ApplicationVersion,FullWithSpaces)556 TEST(ApplicationVersion, FullWithSpaces) {
557 ApplicationVersion version(
558 " parquet-mr \t version \v 1.5.3ab-cdh5.5.0+cd \r (build \n abcd \f) ");
559
560 ASSERT_EQ("parquet-mr", version.application_);
561 ASSERT_EQ("abcd", version.build_);
562 ASSERT_EQ(1, version.version.major);
563 ASSERT_EQ(5, version.version.minor);
564 ASSERT_EQ(3, version.version.patch);
565 ASSERT_EQ("ab", version.version.unknown);
566 ASSERT_EQ("cdh5.5.0", version.version.pre_release);
567 ASSERT_EQ("cd", version.version.build_info);
568 }
569
570 } // namespace metadata
571 } // namespace parquet
572