1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 // This module defines an abstract interface for iterating through pages in a
19 // Parquet column chunk within a row group. It could be extended in the future
20 // to iterate through all data pages in all chunks in a file.
21 
22 #pragma once
23 
24 #include <memory>
25 #include <string>
26 #include <unordered_map>
27 
28 #include <gtest/gtest.h>
29 
30 #include "arrow/util/io_util.h"
31 
32 #include "parquet/encryption/encryption.h"
33 #include "parquet/test_util.h"
34 
35 namespace parquet {
36 class ParquetFileReader;
37 namespace encryption {
38 namespace test {
39 
40 using ::arrow::internal::TemporaryDir;
41 
42 constexpr int kFixedLength = 10;
43 
44 const char kFooterEncryptionKey[] = "0123456789012345";  // 128bit/16
45 const char kColumnEncryptionKey1[] = "1234567890123450";
46 const char kColumnEncryptionKey2[] = "1234567890123451";
47 const char kFileName[] = "tester";
48 
49 // Get the path of file inside parquet test data directory
50 std::string data_file(const char* file);
51 
52 // A temporary directory that contains the encrypted files generated in the tests.
53 extern std::unique_ptr<TemporaryDir> temp_dir;
54 
temp_data_dir()55 inline ::arrow::Result<std::unique_ptr<TemporaryDir>> temp_data_dir() {
56   return TemporaryDir::Make("parquet-encryption-test-");
57 }
58 
59 const char kDoubleFieldName[] = "double_field";
60 const char kFloatFieldName[] = "float_field";
61 const char kBooleanFieldName[] = "boolean_field";
62 const char kInt32FieldName[] = "int32_field";
63 const char kInt64FieldName[] = "int64_field";
64 const char kInt96FieldName[] = "int96_field";
65 const char kByteArrayFieldName[] = "ba_field";
66 const char kFixedLenByteArrayFieldName[] = "flba_field";
67 
68 const char kFooterMasterKey[] = "0123456789112345";
69 const char kFooterMasterKeyId[] = "kf";
70 const char* const kColumnMasterKeys[] = {"1234567890123450", "1234567890123451",
71                                          "1234567890123452", "1234567890123453",
72                                          "1234567890123454", "1234567890123455"};
73 const char* const kColumnMasterKeyIds[] = {"kc1", "kc2", "kc3", "kc4", "kc5", "kc6"};
74 
75 // The result of this function will be used to set into TestOnlyInMemoryKmsClientFactory
76 // as the key mapping to look at.
77 std::unordered_map<std::string, std::string> BuildKeyMap(const char* const* column_ids,
78                                                          const char* const* column_keys,
79                                                          const char* footer_id,
80                                                          const char* footer_key);
81 
82 // The result of this function will be used to set into EncryptionConfiguration
83 // as colum keys.
84 std::string BuildColumnKeyMapping();
85 
86 // FileEncryptor and FileDecryptor are helper classes to write/read an encrypted parquet
87 // file corresponding to each pair of FileEncryptionProperties/FileDecryptionProperties.
88 // FileEncryptor writes the file with fixed data values and FileDecryptor reads the file
89 // and verify the correctness of data values.
90 class FileEncryptor {
91  public:
92   FileEncryptor();
93 
94   void EncryptFile(
95       std::string file,
96       std::shared_ptr<parquet::FileEncryptionProperties> encryption_configurations);
97 
98  private:
99   std::shared_ptr<schema::GroupNode> SetupEncryptionSchema();
100 
101   int num_rowgroups_ = 5;
102   int rows_per_rowgroup_ = 50;
103   std::shared_ptr<schema::GroupNode> schema_;
104 };
105 
106 class FileDecryptor {
107  public:
108   void DecryptFile(std::string file_name,
109                    std::shared_ptr<FileDecryptionProperties> file_decryption_properties);
110 
111  private:
112   void CheckFile(parquet::ParquetFileReader* file_reader,
113                  FileDecryptionProperties* file_decryption_properties);
114 };
115 
116 }  // namespace test
117 }  // namespace encryption
118 }  // namespace parquet
119