1 /**
2  * @file   unit-azure.cc
3  *
4  * @section LICENSE
5  *
6  * The MIT License
7  *
8  * @copyright Copyright (c) 2017-2021 TileDB, Inc.
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  * THE SOFTWARE.
27  *
28  * @section DESCRIPTION
29  *
30  * Tests for AZURE API filesystem functions.
31  */
32 
33 #ifdef HAVE_AZURE
34 
35 #include "catch.hpp"
36 #include "tiledb/common/thread_pool.h"
37 #include "tiledb/sm/config/config.h"
38 #include "tiledb/sm/filesystem/azure.h"
39 #include "tiledb/sm/global_state/unit_test_config.h"
40 #include "tiledb/sm/misc/utils.h"
41 
42 #include <fstream>
43 #include <thread>
44 
45 using namespace tiledb::common;
46 using namespace tiledb::sm;
47 
48 using ConfMap = std::map<std::string, std::string>;
49 using ConfList = std::vector<ConfMap>;
50 
51 static ConfList test_settings = {
52     {{"vfs.azure.storage_account_name", "devstoreaccount1"},
53      {"vfs.azure.storage_account_key",
54       "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/"
55       "K1SZFPTOtr/KBHBeksoGMGw=="},
56      {"vfs.azure.blob_endpoint", "127.0.0.1:10000/devstoreaccount1"}},
57     // Currently disabled because it does not work with the Azurite emulator
58     // The SAS path was manually tested against the Azure Blob Service.
59     //{{"vfs.azure.storage_account_name", "devstoreaccount2"},
60     // {"vfs.azure.storage_sas_token", ""},
61     // {"vfs.azure.blob_endpoint", "127.0.0.1:10000/devstoreaccount2"}}
62 };
63 
64 struct AzureFx {
65   const std::string AZURE_PREFIX = "azure://";
66   const tiledb::sm::URI AZURE_CONTAINER =
67       tiledb::sm::URI(AZURE_PREFIX + random_container_name("tiledb") + "/");
68   const std::string TEST_DIR = AZURE_CONTAINER.to_string() + "tiledb_test_dir/";
69 
70   tiledb::sm::Azure azure_;
71   ThreadPool thread_pool_;
72 
73   AzureFx() = default;
74   ~AzureFx();
75 
76   void init_azure(Config&& config, ConfMap);
77 
78   static std::string random_container_name(const std::string& prefix);
79 };
80 
~AzureFx()81 AzureFx::~AzureFx() {
82   // Empty container
83   bool is_empty;
84   REQUIRE(azure_.is_empty_container(AZURE_CONTAINER, &is_empty).ok());
85   if (!is_empty) {
86     REQUIRE(azure_.empty_container(AZURE_CONTAINER).ok());
87     REQUIRE(azure_.is_empty_container(AZURE_CONTAINER, &is_empty).ok());
88     REQUIRE(is_empty);
89   }
90 
91   // Delete container
92   REQUIRE(azure_.remove_container(AZURE_CONTAINER).ok());
93 }
94 
init_azure(Config && config,ConfMap settings)95 void AzureFx::init_azure(Config&& config, ConfMap settings) {
96   auto set_conf = [&](auto iter) {
97     std::string key = iter.first;
98     std::string val = iter.second;
99     REQUIRE(config.set(key, val).ok());
100   };
101 
102   // Set provided config settings for connection
103   std::for_each(settings.begin(), settings.end(), set_conf);
104   REQUIRE(config.set("vfs.azure.use_https", "false").ok());
105   REQUIRE(thread_pool_.init(2).ok());
106   REQUIRE(azure_.init(config, &thread_pool_).ok());
107 
108   // Create container
109   bool is_container;
110   REQUIRE(azure_.is_container(AZURE_CONTAINER, &is_container).ok());
111   if (is_container) {
112     REQUIRE(azure_.remove_container(AZURE_CONTAINER).ok());
113   }
114 
115   REQUIRE(azure_.is_container(AZURE_CONTAINER, &is_container).ok());
116   REQUIRE(!is_container);
117   REQUIRE(azure_.create_container(AZURE_CONTAINER).ok());
118 
119   // Check if container is empty
120   bool is_empty;
121   REQUIRE(azure_.is_empty_container(AZURE_CONTAINER, &is_empty).ok());
122   REQUIRE(is_empty);
123 }
124 
random_container_name(const std::string & prefix)125 std::string AzureFx::random_container_name(const std::string& prefix) {
126   std::stringstream ss;
127   ss << prefix << "-" << std::this_thread::get_id() << "-"
128      << tiledb::sm::utils::time::timestamp_now_ms();
129   return ss.str();
130 }
131 
132 TEST_CASE_METHOD(AzureFx, "Test Azure filesystem, file management", "[azure]") {
133   Config config;
134   config.set("vfs.azure.use_block_list_upload", "true");
135 
136   auto settings =
137       GENERATE(from_range(test_settings.begin(), test_settings.end()));
138   init_azure(std::move(config), settings);
139 
140   /* Create the following file hierarchy:
141    *
142    * TEST_DIR/dir/subdir/file1
143    * TEST_DIR/dir/subdir/file2
144    * TEST_DIR/dir/file3
145    * TEST_DIR/file4
146    * TEST_DIR/file5
147    */
148   auto dir = TEST_DIR + "dir/";
149   auto dir2 = TEST_DIR + "dir2/";
150   auto subdir = dir + "subdir/";
151   auto file1 = subdir + "file1";
152   auto file2 = subdir + "file2";
153   auto file3 = dir + "file3";
154   auto file4 = TEST_DIR + "file4";
155   auto file5 = TEST_DIR + "file5";
156   auto file6 = TEST_DIR + "file6";
157 
158   // Check that container is empty
159   bool is_empty;
160   REQUIRE(azure_.is_empty_container(AZURE_CONTAINER, &is_empty).ok());
161   REQUIRE(is_empty);
162 
163   // Continue building the hierarchy
164   bool is_blob = false;
165   REQUIRE(azure_.touch(URI(file1)).ok());
166   REQUIRE(azure_.is_blob(URI(file1), &is_blob).ok());
167   REQUIRE(is_blob);
168   REQUIRE(azure_.touch(URI(file2)).ok());
169   REQUIRE(azure_.is_blob(URI(file2), &is_blob).ok());
170   REQUIRE(is_blob);
171   REQUIRE(azure_.touch(URI(file3)).ok());
172   REQUIRE(azure_.is_blob(URI(file3), &is_blob).ok());
173   REQUIRE(is_blob);
174   REQUIRE(azure_.touch(URI(file4)).ok());
175   REQUIRE(azure_.is_blob(URI(file4), &is_blob).ok());
176   REQUIRE(is_blob);
177   REQUIRE(azure_.touch(URI(file5)).ok());
178   REQUIRE(azure_.is_blob(URI(file5), &is_blob).ok());
179   REQUIRE(is_blob);
180 
181   // Check that container is not empty
182   REQUIRE(azure_.is_empty_container(AZURE_CONTAINER, &is_empty).ok());
183   REQUIRE(!is_empty);
184 
185   // Check invalid file
186   REQUIRE(azure_.is_blob(URI(TEST_DIR + "foo"), &is_blob).ok());
187   REQUIRE(!is_blob);
188 
189   // List with prefix
190   std::vector<std::string> paths;
191   REQUIRE(azure_.ls(URI(TEST_DIR), &paths).ok());
192   REQUIRE(paths.size() == 3);
193   paths.clear();
194   REQUIRE(azure_.ls(URI(dir), &paths).ok());
195   REQUIRE(paths.size() == 2);
196   paths.clear();
197   REQUIRE(azure_.ls(URI(subdir), &paths).ok());
198   REQUIRE(paths.size() == 2);
199   paths.clear();
200   REQUIRE(azure_.ls(AZURE_CONTAINER, &paths, "").ok());  // No delimiter
201   REQUIRE(paths.size() == 5);
202 
203   // Check if a directory exists
204   bool is_dir = false;
205   REQUIRE(azure_.is_dir(URI(file1), &is_dir).ok());
206   REQUIRE(!is_dir);  // Not a dir
207   REQUIRE(azure_.is_dir(URI(file4), &is_dir).ok());
208   REQUIRE(!is_dir);  // Not a dir
209   REQUIRE(azure_.is_dir(URI(dir), &is_dir).ok());
210   REQUIRE(is_dir);  // This is viewed as a dir
211   REQUIRE(azure_.is_dir(URI(TEST_DIR + "dir"), &is_dir).ok());
212   REQUIRE(is_dir);  // This is viewed as a dir
213 
214   // Move file
215   REQUIRE(azure_.move_object(URI(file5), URI(file6)).ok());
216   REQUIRE(azure_.is_blob(URI(file5), &is_blob).ok());
217   REQUIRE(!is_blob);
218   REQUIRE(azure_.is_blob(URI(file6), &is_blob).ok());
219   REQUIRE(is_blob);
220   paths.clear();
221   REQUIRE(azure_.ls(AZURE_CONTAINER, &paths, "").ok());  // No delimiter
222   REQUIRE(paths.size() == 5);
223 
224   // Move directory
225   REQUIRE(azure_.move_dir(URI(dir), URI(dir2)).ok());
226   REQUIRE(azure_.is_dir(URI(dir), &is_dir).ok());
227   REQUIRE(!is_dir);
228   REQUIRE(azure_.is_dir(URI(dir2), &is_dir).ok());
229   REQUIRE(is_dir);
230   paths.clear();
231   REQUIRE(azure_.ls(AZURE_CONTAINER, &paths, "").ok());  // No delimiter
232   REQUIRE(paths.size() == 5);
233 
234   // Remove files
235   REQUIRE(azure_.remove_blob(URI(file4)).ok());
236   REQUIRE(azure_.is_blob(URI(file4), &is_blob).ok());
237   REQUIRE(!is_blob);
238 
239   // Remove directories
240   REQUIRE(azure_.remove_dir(URI(dir2)).ok());
241   REQUIRE(azure_.is_blob(URI(file1), &is_blob).ok());
242   REQUIRE(!is_blob);
243   REQUIRE(azure_.is_blob(URI(file2), &is_blob).ok());
244   REQUIRE(!is_blob);
245   REQUIRE(azure_.is_blob(URI(file3), &is_blob).ok());
246   REQUIRE(!is_blob);
247 }
248 
249 TEST_CASE_METHOD(
250     AzureFx, "Test Azure filesystem, file I/O", "[azure][multipart]") {
251   Config config;
252   const uint64_t max_parallel_ops = 2;
253   const uint64_t block_list_block_size = 4 * 1024 * 1024;
254   config.set("vfs.azure.use_block_list_upload", "true");
255   config.set("vfs.azure.max_parallel_ops", std::to_string(max_parallel_ops));
256   config.set(
257       "vfs.azure.block_list_block_size", std::to_string(block_list_block_size));
258 
259   auto settings =
260       GENERATE(from_range(test_settings.begin(), test_settings.end()));
261   init_azure(std::move(config), settings);
262 
263   const uint64_t write_cache_max_size =
264       max_parallel_ops * block_list_block_size;
265 
266   // Prepare buffers
267   uint64_t buffer_size = write_cache_max_size * 5;
268   auto write_buffer = new char[buffer_size];
269   for (uint64_t i = 0; i < buffer_size; i++)
270     write_buffer[i] = (char)('a' + (i % 26));
271   uint64_t buffer_size_small = 1024 * 1024;
272   auto write_buffer_small = new char[buffer_size_small];
273   for (uint64_t i = 0; i < buffer_size_small; i++)
274     write_buffer_small[i] = (char)('a' + (i % 26));
275 
276   // Write to two files
277   auto largefile = TEST_DIR + "largefile";
278   REQUIRE(azure_.write(URI(largefile), write_buffer, buffer_size).ok());
279   REQUIRE(
280       azure_.write(URI(largefile), write_buffer_small, buffer_size_small).ok());
281   auto smallfile = TEST_DIR + "smallfile";
282   REQUIRE(
283       azure_.write(URI(smallfile), write_buffer_small, buffer_size_small).ok());
284 
285   // Before flushing, the files do not exist
286   bool is_blob = false;
287   REQUIRE(azure_.is_blob(URI(largefile), &is_blob).ok());
288   REQUIRE(!is_blob);
289   REQUIRE(azure_.is_blob(URI(smallfile), &is_blob).ok());
290   REQUIRE(!is_blob);
291 
292   // Flush the files
293   REQUIRE(azure_.flush_blob(URI(largefile)).ok());
294   REQUIRE(azure_.flush_blob(URI(smallfile)).ok());
295 
296   // After flushing, the files exist
297   REQUIRE(azure_.is_blob(URI(largefile), &is_blob).ok());
298   REQUIRE(is_blob);
299   REQUIRE(azure_.is_blob(URI(smallfile), &is_blob).ok());
300   REQUIRE(is_blob);
301 
302   // Get file sizes
303   uint64_t nbytes = 0;
304   REQUIRE(azure_.blob_size(URI(largefile), &nbytes).ok());
305   REQUIRE(nbytes == (buffer_size + buffer_size_small));
306   REQUIRE(azure_.blob_size(URI(smallfile), &nbytes).ok());
307   REQUIRE(nbytes == buffer_size_small);
308 
309   // Read from the beginning
310   auto read_buffer = new char[26];
311   uint64_t bytes_read;
312   REQUIRE(azure_.read(URI(largefile), 0, read_buffer, 26, 0, &bytes_read).ok());
313   CHECK(26 == bytes_read);
314   bool allok = true;
315   for (int i = 0; i < 26; i++) {
316     if (read_buffer[i] != static_cast<char>('a' + i)) {
317       allok = false;
318       break;
319     }
320   }
321   REQUIRE(allok);
322 
323   // Read from a different offset
324   REQUIRE(
325       azure_.read(URI(largefile), 11, read_buffer, 26, 0, &bytes_read).ok());
326   CHECK(26 == bytes_read);
327   allok = true;
328   for (int i = 0; i < 26; i++) {
329     if (read_buffer[i] != static_cast<char>('a' + (i + 11) % 26)) {
330       allok = false;
331       break;
332     }
333   }
334   REQUIRE(allok);
335 }
336 
337 TEST_CASE_METHOD(
338     AzureFx,
339     "Test Azure filesystem, file I/O, no multipart",
340     "[azure][no_multipart]") {
341   Config config;
342   const uint64_t max_parallel_ops = 2;
343   const uint64_t block_list_block_size = 4 * 1024 * 1024;
344   config.set("vfs.azure.use_block_list_upload", "false");
345   config.set("vfs.azure.max_parallel_ops", std::to_string(max_parallel_ops));
346   config.set(
347       "vfs.azure.block_list_block_size", std::to_string(block_list_block_size));
348 
349   auto settings =
350       GENERATE(from_range(test_settings.begin(), test_settings.end()));
351   init_azure(std::move(config), settings);
352 
353   const uint64_t write_cache_max_size =
354       max_parallel_ops * block_list_block_size;
355 
356   // Prepare a large buffer that can fit in the write cache.
357   uint64_t large_buffer_size = write_cache_max_size;
358   auto large_write_buffer = new char[large_buffer_size];
359   for (uint64_t i = 0; i < large_buffer_size; i++)
360     large_write_buffer[i] = (char)('a' + (i % 26));
361 
362   // Prepare a small buffer that can fit in the write cache.
363   uint64_t small_buffer_size = write_cache_max_size / 1024;
364   auto small_write_buffer = new char[small_buffer_size];
365   for (uint64_t i = 0; i < small_buffer_size; i++)
366     small_write_buffer[i] = (char)('a' + (i % 26));
367 
368   // Prepare a buffer too large to fit in the write cache.
369   uint64_t oob_buffer_size = write_cache_max_size + 1;
370   auto oob_write_buffer = new char[oob_buffer_size];
371   for (uint64_t i = 0; i < oob_buffer_size; i++)
372     oob_write_buffer[i] = (char)('a' + (i % 26));
373 
374   auto large_file = TEST_DIR + "largefile";
375   REQUIRE(azure_.write(URI(large_file), large_write_buffer, large_buffer_size)
376               .ok());
377 
378   auto small_file_1 = TEST_DIR + "smallfile1";
379   REQUIRE(azure_.write(URI(small_file_1), small_write_buffer, small_buffer_size)
380               .ok());
381 
382   auto small_file_2 = TEST_DIR + "smallfile2";
383   REQUIRE(azure_.write(URI(small_file_2), small_write_buffer, small_buffer_size)
384               .ok());
385   REQUIRE(azure_.write(URI(small_file_2), small_write_buffer, small_buffer_size)
386               .ok());
387 
388   auto oob_file = TEST_DIR + "oobfile";
389   REQUIRE(!azure_.write(URI(oob_file), oob_write_buffer, oob_buffer_size).ok());
390 
391   // Before flushing, the files do not exist
392   bool is_blob = false;
393   REQUIRE(azure_.is_blob(URI(large_file), &is_blob).ok());
394   REQUIRE(!is_blob);
395   REQUIRE(azure_.is_blob(URI(small_file_1), &is_blob).ok());
396   REQUIRE(!is_blob);
397   REQUIRE(azure_.is_blob(URI(small_file_2), &is_blob).ok());
398   REQUIRE(!is_blob);
399   REQUIRE(azure_.is_blob(URI(oob_file), &is_blob).ok());
400   REQUIRE(!is_blob);
401 
402   // Flush the files
403   REQUIRE(azure_.flush_blob(URI(small_file_1)).ok());
404   REQUIRE(azure_.flush_blob(URI(small_file_2)).ok());
405   REQUIRE(azure_.flush_blob(URI(large_file)).ok());
406 
407   // After flushing, the files exist
408   REQUIRE(azure_.is_blob(URI(large_file), &is_blob).ok());
409   REQUIRE(is_blob);
410   REQUIRE(azure_.is_blob(URI(small_file_1), &is_blob).ok());
411   REQUIRE(is_blob);
412   REQUIRE(azure_.is_blob(URI(small_file_2), &is_blob).ok());
413   REQUIRE(is_blob);
414 
415   // Get file sizes
416   uint64_t nbytes = 0;
417   REQUIRE(azure_.blob_size(URI(large_file), &nbytes).ok());
418   CHECK(nbytes == large_buffer_size);
419   REQUIRE(azure_.blob_size(URI(small_file_1), &nbytes).ok());
420   CHECK(nbytes == small_buffer_size);
421   REQUIRE(azure_.blob_size(URI(small_file_2), &nbytes).ok());
422   CHECK(nbytes == (small_buffer_size + small_buffer_size));
423 
424   // Read from the beginning
425   auto read_buffer = new char[26];
426   uint64_t bytes_read;
427   REQUIRE(
428       azure_.read(URI(large_file), 0, read_buffer, 26, 0, &bytes_read).ok());
429   CHECK(26 == bytes_read);
430   bool allok = true;
431   for (int i = 0; i < 26; i++) {
432     if (read_buffer[i] != static_cast<char>('a' + i)) {
433       allok = false;
434       break;
435     }
436   }
437   REQUIRE(allok);
438 
439   // Read from a different offset
440   REQUIRE(
441       azure_.read(URI(large_file), 11, read_buffer, 26, 0, &bytes_read).ok());
442   CHECK(26 == bytes_read);
443   allok = true;
444   for (int i = 0; i < 26; i++) {
445     if (read_buffer[i] != static_cast<char>('a' + (i + 11) % 26)) {
446       allok = false;
447       break;
448     }
449   }
450   REQUIRE(allok);
451 }
452 
453 #endif