1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include <gtest/gtest.h>
19 #include <stdio.h>
20 
21 #include <fstream>
22 
23 #include "arrow/io/file.h"
24 #include "arrow/testing/gtest_compat.h"
25 
26 #include "parquet/column_reader.h"
27 #include "parquet/column_writer.h"
28 #include "parquet/encryption/test_encryption_util.h"
29 #include "parquet/file_reader.h"
30 #include "parquet/test_util.h"
31 
32 /*
33  * This file contains a unit-test for reading encrypted Parquet files with
34  * different decryption configurations.
35  *
36  * The unit-test is called multiple times, each time to decrypt parquet files using
37  * different decryption configuration as described below.
38  * In each call two encrypted files are read: one temporary file that was generated using
39  * encryption-write-configurations-test.cc test and will be deleted upon
40  * reading it, while the second resides in
41  * parquet-testing/data repository. Those two encrypted files were encrypted using the
42  * same encryption configuration.
43  * The encrypted parquet file names are passed as parameter to the unit-test.
44  *
45  * A detailed description of the Parquet Modular Encryption specification can be found
46  * here:
47  * https://github.com/apache/parquet-format/blob/encryption/Encryption.md
48  *
49  * The following decryption configurations are used to decrypt each parquet file:
50  *
51  *  - Decryption configuration 1:   Decrypt using key retriever that holds the keys of
52  *                                  two encrypted columns and the footer key.
53  *  - Decryption configuration 2:   Decrypt using key retriever that holds the keys of
54  *                                  two encrypted columns and the footer key. Supplies
55  *                                  aad_prefix to verify file identity.
56  *  - Decryption configuration 3:   Decrypt using explicit column and footer keys
57  *                                  (instead of key retrieval callback).
58  *  - Decryption Configuration 4:   PlainText Footer mode - test legacy reads,
59  *                                  read the footer + all non-encrypted columns.
60  *                                  (pairs with encryption configuration 3)
61  *
62  * The encrypted parquet files that is read was encrypted using one of the configurations
63  * below:
64  *
65  *  - Encryption configuration 1:   Encrypt all columns and the footer with the same key.
66  *                                  (uniform encryption)
67  *  - Encryption configuration 2:   Encrypt two columns and the footer, with different
68  *                                  keys.
69  *  - Encryption configuration 3:   Encrypt two columns, with different keys.
70  *                                  Don’t encrypt footer (to enable legacy readers)
71  *                                  - plaintext footer mode.
72  *  - Encryption configuration 4:   Encrypt two columns and the footer, with different
73  *                                  keys. Supply aad_prefix for file identity
74  *                                  verification.
75  *  - Encryption configuration 5:   Encrypt two columns and the footer, with different
76  *                                  keys. Supply aad_prefix, and call
77  *                                  disable_aad_prefix_storage to prevent file
78  *                                  identity storage in file metadata.
79  *  - Encryption configuration 6:   Encrypt two columns and the footer, with different
80  *                                  keys. Use the alternative (AES_GCM_CTR_V1) algorithm.
81 
82  */
83 
84 namespace parquet {
85 namespace encryption {
86 namespace test {
87 
88 using parquet::test::ParquetTestException;
89 
90 class TestDecryptionConfiguration
91     : public testing::TestWithParam<std::tuple<int, const char*>> {
92  public:
SetUp()93   void SetUp() { CreateDecryptionConfigurations(); }
94 
95  protected:
96   FileDecryptor decryptor_;
97   std::string path_to_double_field_ = kDoubleFieldName;
98   std::string path_to_float_field_ = kFloatFieldName;
99   // This vector will hold various decryption configurations.
100   std::vector<std::shared_ptr<parquet::FileDecryptionProperties>>
101       vector_of_decryption_configurations_;
102   std::string kFooterEncryptionKey_ = std::string(kFooterEncryptionKey);
103   std::string kColumnEncryptionKey1_ = std::string(kColumnEncryptionKey1);
104   std::string kColumnEncryptionKey2_ = std::string(kColumnEncryptionKey2);
105   std::string kFileName_ = std::string(kFileName);
106 
CreateDecryptionConfigurations()107   void CreateDecryptionConfigurations() {
108     /**********************************************************************************
109                            Creating a number of Decryption configurations
110      **********************************************************************************/
111 
112     // Decryption configuration 1: Decrypt using key retriever callback that holds the
113     // keys of two encrypted columns and the footer key.
114     std::shared_ptr<parquet::StringKeyIdRetriever> string_kr1 =
115         std::make_shared<parquet::StringKeyIdRetriever>();
116     string_kr1->PutKey("kf", kFooterEncryptionKey_);
117     string_kr1->PutKey("kc1", kColumnEncryptionKey1_);
118     string_kr1->PutKey("kc2", kColumnEncryptionKey2_);
119     std::shared_ptr<parquet::DecryptionKeyRetriever> kr1 =
120         std::static_pointer_cast<parquet::StringKeyIdRetriever>(string_kr1);
121 
122     parquet::FileDecryptionProperties::Builder file_decryption_builder_1;
123     vector_of_decryption_configurations_.push_back(
124         file_decryption_builder_1.key_retriever(kr1)->build());
125 
126     // Decryption configuration 2: Decrypt using key retriever callback that holds the
127     // keys of two encrypted columns and the footer key. Supply aad_prefix.
128     std::shared_ptr<parquet::StringKeyIdRetriever> string_kr2 =
129         std::make_shared<parquet::StringKeyIdRetriever>();
130     string_kr2->PutKey("kf", kFooterEncryptionKey_);
131     string_kr2->PutKey("kc1", kColumnEncryptionKey1_);
132     string_kr2->PutKey("kc2", kColumnEncryptionKey2_);
133     std::shared_ptr<parquet::DecryptionKeyRetriever> kr2 =
134         std::static_pointer_cast<parquet::StringKeyIdRetriever>(string_kr2);
135 
136     parquet::FileDecryptionProperties::Builder file_decryption_builder_2;
137     vector_of_decryption_configurations_.push_back(
138         file_decryption_builder_2.key_retriever(kr2)->aad_prefix(kFileName_)->build());
139 
140     // Decryption configuration 3: Decrypt using explicit column and footer keys. Supply
141     // aad_prefix.
142     std::string path_float_ptr = kFloatFieldName;
143     std::string path_double_ptr = kDoubleFieldName;
144     std::map<std::string, std::shared_ptr<parquet::ColumnDecryptionProperties>>
145         decryption_cols;
146     parquet::ColumnDecryptionProperties::Builder decryption_col_builder31(
147         path_double_ptr);
148     parquet::ColumnDecryptionProperties::Builder decryption_col_builder32(path_float_ptr);
149 
150     decryption_cols[path_double_ptr] =
151         decryption_col_builder31.key(kColumnEncryptionKey1_)->build();
152     decryption_cols[path_float_ptr] =
153         decryption_col_builder32.key(kColumnEncryptionKey2_)->build();
154 
155     parquet::FileDecryptionProperties::Builder file_decryption_builder_3;
156     vector_of_decryption_configurations_.push_back(
157         file_decryption_builder_3.footer_key(kFooterEncryptionKey_)
158             ->column_keys(decryption_cols)
159             ->build());
160 
161     // Decryption Configuration 4: use plaintext footer mode, read only footer + plaintext
162     // columns.
163     vector_of_decryption_configurations_.push_back(NULL);
164   }
165 
DecryptFile(std::string file,int decryption_config_num)166   void DecryptFile(std::string file, int decryption_config_num) {
167     std::string exception_msg;
168     std::shared_ptr<FileDecryptionProperties> file_decryption_properties;
169     // if we get decryption_config_num = x then it means the actual number is x+1
170     // and since we want decryption_config_num=4 we set the condition to 3
171     if (decryption_config_num != 3) {
172       file_decryption_properties =
173           vector_of_decryption_configurations_[decryption_config_num]->DeepClone();
174     }
175 
176     decryptor_.DecryptFile(file, file_decryption_properties);
177   }
178 
179   // Check that the decryption result is as expected.
CheckResults(const std::string file_name,unsigned decryption_config_num,unsigned encryption_config_num)180   void CheckResults(const std::string file_name, unsigned decryption_config_num,
181                     unsigned encryption_config_num) {
182     // Encryption_configuration number five contains aad_prefix and
183     // disable_aad_prefix_storage.
184     // An exception is expected to be thrown if the file is not decrypted with aad_prefix.
185     if (encryption_config_num == 5) {
186       if (decryption_config_num == 1 || decryption_config_num == 3) {
187         EXPECT_THROW(DecryptFile(file_name, decryption_config_num - 1), ParquetException);
188         return;
189       }
190     }
191     // Decryption configuration number two contains aad_prefix. An exception is expected
192     // to be thrown if the file was not encrypted with the same aad_prefix.
193     if (decryption_config_num == 2) {
194       if (encryption_config_num != 5 && encryption_config_num != 4) {
195         EXPECT_THROW(DecryptFile(file_name, decryption_config_num - 1), ParquetException);
196         return;
197       }
198     }
199 
200     // decryption config 4 can only work when the encryption configuration is 3
201     if (decryption_config_num == 4 && encryption_config_num != 3) {
202       return;
203     }
204     EXPECT_NO_THROW(DecryptFile(file_name, decryption_config_num - 1));
205   }
206 
207   // Returns true if file exists. Otherwise returns false.
fexists(const std::string & filename)208   bool fexists(const std::string& filename) {
209     std::ifstream ifile(filename.c_str());
210     return ifile.good();
211   }
212 };
213 
214 // Read encrypted parquet file.
215 // The test reads two parquet files that were encrypted using the same encryption
216 // configuration:
217 // one was generated in encryption-write-configurations-test.cc tests and is deleted
218 // once the file is read and the second exists in parquet-testing/data folder.
219 // The name of the files are passed as parameters to the unit-test.
TEST_P(TestDecryptionConfiguration,TestDecryption)220 TEST_P(TestDecryptionConfiguration, TestDecryption) {
221   int encryption_config_num = std::get<0>(GetParam());
222   const char* param_file_name = std::get<1>(GetParam());
223   // Decrypt parquet file that was generated in encryption-write-configurations-test.cc
224   // test.
225   std::string tmp_file_name = "tmp_" + std::string(param_file_name);
226   std::string file_name = temp_dir->path().ToString() + tmp_file_name;
227   if (!fexists(file_name)) {
228     std::stringstream ss;
229     ss << "File " << file_name << " is missing from temporary dir.";
230     throw ParquetTestException(ss.str());
231   }
232 
233   // Iterate over the decryption configurations and use each one to read the encrypted
234   // parqeut file.
235   for (unsigned index = 0; index < vector_of_decryption_configurations_.size(); ++index) {
236     unsigned decryption_config_num = index + 1;
237     CheckResults(file_name, decryption_config_num, encryption_config_num);
238   }
239   // Delete temporary test file.
240   ASSERT_EQ(std::remove(file_name.c_str()), 0);
241 
242   // Decrypt parquet file that resides in parquet-testing/data directory.
243   file_name = data_file(param_file_name);
244 
245   if (!fexists(file_name)) {
246     std::stringstream ss;
247     ss << "File " << file_name << " is missing from parquet-testing repo.";
248     throw ParquetTestException(ss.str());
249   }
250 
251   // Iterate over the decryption configurations and use each one to read the encrypted
252   // parqeut file.
253   for (unsigned index = 0; index < vector_of_decryption_configurations_.size(); ++index) {
254     unsigned decryption_config_num = index + 1;
255     CheckResults(file_name, decryption_config_num, encryption_config_num);
256   }
257 }
258 
259 INSTANTIATE_TEST_SUITE_P(
260     DecryptionTests, TestDecryptionConfiguration,
261     ::testing::Values(
262         std::make_tuple(1, "uniform_encryption.parquet.encrypted"),
263         std::make_tuple(2, "encrypt_columns_and_footer.parquet.encrypted"),
264         std::make_tuple(3, "encrypt_columns_plaintext_footer.parquet.encrypted"),
265         std::make_tuple(4, "encrypt_columns_and_footer_aad.parquet.encrypted"),
266         std::make_tuple(
267             5, "encrypt_columns_and_footer_disable_aad_storage.parquet.encrypted"),
268         std::make_tuple(6, "encrypt_columns_and_footer_ctr.parquet.encrypted")));
269 
270 }  // namespace test
271 }  // namespace encryption
272 }  // namespace parquet
273