1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 // This module defines an abstract interface for iterating through pages in a
19 // Parquet column chunk within a row group. It could be extended in the future
20 // to iterate through all data pages in all chunks in a file.
21
22 #pragma once
23
24 #include <memory>
25 #include <string>
26 #include <unordered_map>
27
28 #include <gtest/gtest.h>
29
30 #include "arrow/util/io_util.h"
31
32 #include "parquet/encryption/encryption.h"
33 #include "parquet/test_util.h"
34
35 namespace parquet {
36 class ParquetFileReader;
37 namespace encryption {
38 namespace test {
39
40 using ::arrow::internal::TemporaryDir;
41
42 constexpr int kFixedLength = 10;
43
44 const char kFooterEncryptionKey[] = "0123456789012345"; // 128bit/16
45 const char kColumnEncryptionKey1[] = "1234567890123450";
46 const char kColumnEncryptionKey2[] = "1234567890123451";
47 const char kFileName[] = "tester";
48
49 // Get the path of file inside parquet test data directory
50 std::string data_file(const char* file);
51
52 // A temporary directory that contains the encrypted files generated in the tests.
53 extern std::unique_ptr<TemporaryDir> temp_dir;
54
temp_data_dir()55 inline ::arrow::Result<std::unique_ptr<TemporaryDir>> temp_data_dir() {
56 return TemporaryDir::Make("parquet-encryption-test-");
57 }
58
59 const char kDoubleFieldName[] = "double_field";
60 const char kFloatFieldName[] = "float_field";
61 const char kBooleanFieldName[] = "boolean_field";
62 const char kInt32FieldName[] = "int32_field";
63 const char kInt64FieldName[] = "int64_field";
64 const char kInt96FieldName[] = "int96_field";
65 const char kByteArrayFieldName[] = "ba_field";
66 const char kFixedLenByteArrayFieldName[] = "flba_field";
67
68 const char kFooterMasterKey[] = "0123456789112345";
69 const char kFooterMasterKeyId[] = "kf";
70 const char* const kColumnMasterKeys[] = {"1234567890123450", "1234567890123451",
71 "1234567890123452", "1234567890123453",
72 "1234567890123454", "1234567890123455"};
73 const char* const kColumnMasterKeyIds[] = {"kc1", "kc2", "kc3", "kc4", "kc5", "kc6"};
74
75 // The result of this function will be used to set into TestOnlyInMemoryKmsClientFactory
76 // as the key mapping to look at.
77 std::unordered_map<std::string, std::string> BuildKeyMap(const char* const* column_ids,
78 const char* const* column_keys,
79 const char* footer_id,
80 const char* footer_key);
81
82 // The result of this function will be used to set into EncryptionConfiguration
83 // as colum keys.
84 std::string BuildColumnKeyMapping();
85
86 // FileEncryptor and FileDecryptor are helper classes to write/read an encrypted parquet
87 // file corresponding to each pair of FileEncryptionProperties/FileDecryptionProperties.
88 // FileEncryptor writes the file with fixed data values and FileDecryptor reads the file
89 // and verify the correctness of data values.
90 class FileEncryptor {
91 public:
92 FileEncryptor();
93
94 void EncryptFile(
95 std::string file,
96 std::shared_ptr<parquet::FileEncryptionProperties> encryption_configurations);
97
98 private:
99 std::shared_ptr<schema::GroupNode> SetupEncryptionSchema();
100
101 int num_rowgroups_ = 5;
102 int rows_per_rowgroup_ = 50;
103 std::shared_ptr<schema::GroupNode> schema_;
104 };
105
106 class FileDecryptor {
107 public:
108 void DecryptFile(std::string file_name,
109 std::shared_ptr<FileDecryptionProperties> file_decryption_properties);
110
111 private:
112 void CheckFile(parquet::ParquetFileReader* file_reader,
113 FileDecryptionProperties* file_decryption_properties);
114 };
115
116 } // namespace test
117 } // namespace encryption
118 } // namespace parquet
119