1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include <gtest/gtest.h>
19 #include <stdio.h>
20
21 #include <fstream>
22
23 #include "arrow/io/file.h"
24 #include "arrow/testing/gtest_compat.h"
25
26 #include "parquet/column_reader.h"
27 #include "parquet/column_writer.h"
28 #include "parquet/encryption/test_encryption_util.h"
29 #include "parquet/file_reader.h"
30 #include "parquet/test_util.h"
31
32 /*
33 * This file contains a unit-test for reading encrypted Parquet files with
34 * different decryption configurations.
35 *
36 * The unit-test is called multiple times, each time to decrypt parquet files using
37 * different decryption configuration as described below.
38 * In each call two encrypted files are read: one temporary file that was generated using
39 * encryption-write-configurations-test.cc test and will be deleted upon
40 * reading it, while the second resides in
41 * parquet-testing/data repository. Those two encrypted files were encrypted using the
42 * same encryption configuration.
43 * The encrypted parquet file names are passed as parameter to the unit-test.
44 *
45 * A detailed description of the Parquet Modular Encryption specification can be found
46 * here:
47 * https://github.com/apache/parquet-format/blob/encryption/Encryption.md
48 *
49 * The following decryption configurations are used to decrypt each parquet file:
50 *
51 * - Decryption configuration 1: Decrypt using key retriever that holds the keys of
52 * two encrypted columns and the footer key.
53 * - Decryption configuration 2: Decrypt using key retriever that holds the keys of
54 * two encrypted columns and the footer key. Supplies
55 * aad_prefix to verify file identity.
56 * - Decryption configuration 3: Decrypt using explicit column and footer keys
57 * (instead of key retrieval callback).
58 * - Decryption Configuration 4: PlainText Footer mode - test legacy reads,
59 * read the footer + all non-encrypted columns.
60 * (pairs with encryption configuration 3)
61 *
62 * The encrypted parquet files that is read was encrypted using one of the configurations
63 * below:
64 *
65 * - Encryption configuration 1: Encrypt all columns and the footer with the same key.
66 * (uniform encryption)
67 * - Encryption configuration 2: Encrypt two columns and the footer, with different
68 * keys.
69 * - Encryption configuration 3: Encrypt two columns, with different keys.
70 * Don’t encrypt footer (to enable legacy readers)
71 * - plaintext footer mode.
72 * - Encryption configuration 4: Encrypt two columns and the footer, with different
73 * keys. Supply aad_prefix for file identity
74 * verification.
75 * - Encryption configuration 5: Encrypt two columns and the footer, with different
76 * keys. Supply aad_prefix, and call
77 * disable_aad_prefix_storage to prevent file
78 * identity storage in file metadata.
79 * - Encryption configuration 6: Encrypt two columns and the footer, with different
80 * keys. Use the alternative (AES_GCM_CTR_V1) algorithm.
81
82 */
83
84 namespace parquet {
85 namespace encryption {
86 namespace test {
87
88 using parquet::test::ParquetTestException;
89
90 class TestDecryptionConfiguration
91 : public testing::TestWithParam<std::tuple<int, const char*>> {
92 public:
SetUp()93 void SetUp() { CreateDecryptionConfigurations(); }
94
95 protected:
96 FileDecryptor decryptor_;
97 std::string path_to_double_field_ = kDoubleFieldName;
98 std::string path_to_float_field_ = kFloatFieldName;
99 // This vector will hold various decryption configurations.
100 std::vector<std::shared_ptr<parquet::FileDecryptionProperties>>
101 vector_of_decryption_configurations_;
102 std::string kFooterEncryptionKey_ = std::string(kFooterEncryptionKey);
103 std::string kColumnEncryptionKey1_ = std::string(kColumnEncryptionKey1);
104 std::string kColumnEncryptionKey2_ = std::string(kColumnEncryptionKey2);
105 std::string kFileName_ = std::string(kFileName);
106
CreateDecryptionConfigurations()107 void CreateDecryptionConfigurations() {
108 /**********************************************************************************
109 Creating a number of Decryption configurations
110 **********************************************************************************/
111
112 // Decryption configuration 1: Decrypt using key retriever callback that holds the
113 // keys of two encrypted columns and the footer key.
114 std::shared_ptr<parquet::StringKeyIdRetriever> string_kr1 =
115 std::make_shared<parquet::StringKeyIdRetriever>();
116 string_kr1->PutKey("kf", kFooterEncryptionKey_);
117 string_kr1->PutKey("kc1", kColumnEncryptionKey1_);
118 string_kr1->PutKey("kc2", kColumnEncryptionKey2_);
119 std::shared_ptr<parquet::DecryptionKeyRetriever> kr1 =
120 std::static_pointer_cast<parquet::StringKeyIdRetriever>(string_kr1);
121
122 parquet::FileDecryptionProperties::Builder file_decryption_builder_1;
123 vector_of_decryption_configurations_.push_back(
124 file_decryption_builder_1.key_retriever(kr1)->build());
125
126 // Decryption configuration 2: Decrypt using key retriever callback that holds the
127 // keys of two encrypted columns and the footer key. Supply aad_prefix.
128 std::shared_ptr<parquet::StringKeyIdRetriever> string_kr2 =
129 std::make_shared<parquet::StringKeyIdRetriever>();
130 string_kr2->PutKey("kf", kFooterEncryptionKey_);
131 string_kr2->PutKey("kc1", kColumnEncryptionKey1_);
132 string_kr2->PutKey("kc2", kColumnEncryptionKey2_);
133 std::shared_ptr<parquet::DecryptionKeyRetriever> kr2 =
134 std::static_pointer_cast<parquet::StringKeyIdRetriever>(string_kr2);
135
136 parquet::FileDecryptionProperties::Builder file_decryption_builder_2;
137 vector_of_decryption_configurations_.push_back(
138 file_decryption_builder_2.key_retriever(kr2)->aad_prefix(kFileName_)->build());
139
140 // Decryption configuration 3: Decrypt using explicit column and footer keys. Supply
141 // aad_prefix.
142 std::string path_float_ptr = kFloatFieldName;
143 std::string path_double_ptr = kDoubleFieldName;
144 std::map<std::string, std::shared_ptr<parquet::ColumnDecryptionProperties>>
145 decryption_cols;
146 parquet::ColumnDecryptionProperties::Builder decryption_col_builder31(
147 path_double_ptr);
148 parquet::ColumnDecryptionProperties::Builder decryption_col_builder32(path_float_ptr);
149
150 decryption_cols[path_double_ptr] =
151 decryption_col_builder31.key(kColumnEncryptionKey1_)->build();
152 decryption_cols[path_float_ptr] =
153 decryption_col_builder32.key(kColumnEncryptionKey2_)->build();
154
155 parquet::FileDecryptionProperties::Builder file_decryption_builder_3;
156 vector_of_decryption_configurations_.push_back(
157 file_decryption_builder_3.footer_key(kFooterEncryptionKey_)
158 ->column_keys(decryption_cols)
159 ->build());
160
161 // Decryption Configuration 4: use plaintext footer mode, read only footer + plaintext
162 // columns.
163 vector_of_decryption_configurations_.push_back(NULL);
164 }
165
DecryptFile(std::string file,int decryption_config_num)166 void DecryptFile(std::string file, int decryption_config_num) {
167 std::string exception_msg;
168 std::shared_ptr<FileDecryptionProperties> file_decryption_properties;
169 // if we get decryption_config_num = x then it means the actual number is x+1
170 // and since we want decryption_config_num=4 we set the condition to 3
171 if (decryption_config_num != 3) {
172 file_decryption_properties =
173 vector_of_decryption_configurations_[decryption_config_num]->DeepClone();
174 }
175
176 decryptor_.DecryptFile(file, file_decryption_properties);
177 }
178
179 // Check that the decryption result is as expected.
CheckResults(const std::string file_name,unsigned decryption_config_num,unsigned encryption_config_num)180 void CheckResults(const std::string file_name, unsigned decryption_config_num,
181 unsigned encryption_config_num) {
182 // Encryption_configuration number five contains aad_prefix and
183 // disable_aad_prefix_storage.
184 // An exception is expected to be thrown if the file is not decrypted with aad_prefix.
185 if (encryption_config_num == 5) {
186 if (decryption_config_num == 1 || decryption_config_num == 3) {
187 EXPECT_THROW(DecryptFile(file_name, decryption_config_num - 1), ParquetException);
188 return;
189 }
190 }
191 // Decryption configuration number two contains aad_prefix. An exception is expected
192 // to be thrown if the file was not encrypted with the same aad_prefix.
193 if (decryption_config_num == 2) {
194 if (encryption_config_num != 5 && encryption_config_num != 4) {
195 EXPECT_THROW(DecryptFile(file_name, decryption_config_num - 1), ParquetException);
196 return;
197 }
198 }
199
200 // decryption config 4 can only work when the encryption configuration is 3
201 if (decryption_config_num == 4 && encryption_config_num != 3) {
202 return;
203 }
204 EXPECT_NO_THROW(DecryptFile(file_name, decryption_config_num - 1));
205 }
206
207 // Returns true if file exists. Otherwise returns false.
fexists(const std::string & filename)208 bool fexists(const std::string& filename) {
209 std::ifstream ifile(filename.c_str());
210 return ifile.good();
211 }
212 };
213
214 // Read encrypted parquet file.
215 // The test reads two parquet files that were encrypted using the same encryption
216 // configuration:
217 // one was generated in encryption-write-configurations-test.cc tests and is deleted
218 // once the file is read and the second exists in parquet-testing/data folder.
219 // The name of the files are passed as parameters to the unit-test.
TEST_P(TestDecryptionConfiguration,TestDecryption)220 TEST_P(TestDecryptionConfiguration, TestDecryption) {
221 int encryption_config_num = std::get<0>(GetParam());
222 const char* param_file_name = std::get<1>(GetParam());
223 // Decrypt parquet file that was generated in encryption-write-configurations-test.cc
224 // test.
225 std::string tmp_file_name = "tmp_" + std::string(param_file_name);
226 std::string file_name = temp_dir->path().ToString() + tmp_file_name;
227 if (!fexists(file_name)) {
228 std::stringstream ss;
229 ss << "File " << file_name << " is missing from temporary dir.";
230 throw ParquetTestException(ss.str());
231 }
232
233 // Iterate over the decryption configurations and use each one to read the encrypted
234 // parqeut file.
235 for (unsigned index = 0; index < vector_of_decryption_configurations_.size(); ++index) {
236 unsigned decryption_config_num = index + 1;
237 CheckResults(file_name, decryption_config_num, encryption_config_num);
238 }
239 // Delete temporary test file.
240 ASSERT_EQ(std::remove(file_name.c_str()), 0);
241
242 // Decrypt parquet file that resides in parquet-testing/data directory.
243 file_name = data_file(param_file_name);
244
245 if (!fexists(file_name)) {
246 std::stringstream ss;
247 ss << "File " << file_name << " is missing from parquet-testing repo.";
248 throw ParquetTestException(ss.str());
249 }
250
251 // Iterate over the decryption configurations and use each one to read the encrypted
252 // parqeut file.
253 for (unsigned index = 0; index < vector_of_decryption_configurations_.size(); ++index) {
254 unsigned decryption_config_num = index + 1;
255 CheckResults(file_name, decryption_config_num, encryption_config_num);
256 }
257 }
258
259 INSTANTIATE_TEST_SUITE_P(
260 DecryptionTests, TestDecryptionConfiguration,
261 ::testing::Values(
262 std::make_tuple(1, "uniform_encryption.parquet.encrypted"),
263 std::make_tuple(2, "encrypt_columns_and_footer.parquet.encrypted"),
264 std::make_tuple(3, "encrypt_columns_plaintext_footer.parquet.encrypted"),
265 std::make_tuple(4, "encrypt_columns_and_footer_aad.parquet.encrypted"),
266 std::make_tuple(
267 5, "encrypt_columns_and_footer_disable_aad_storage.parquet.encrypted"),
268 std::make_tuple(6, "encrypt_columns_and_footer_ctr.parquet.encrypted")));
269
270 } // namespace test
271 } // namespace encryption
272 } // namespace parquet
273