1 /**
2 * @file unit-azure.cc
3 *
4 * @section LICENSE
5 *
6 * The MIT License
7 *
8 * @copyright Copyright (c) 2017-2021 TileDB, Inc.
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 *
28 * @section DESCRIPTION
29 *
30 * Tests for AZURE API filesystem functions.
31 */
32
33 #ifdef HAVE_AZURE
34
35 #include "catch.hpp"
36 #include "tiledb/common/thread_pool.h"
37 #include "tiledb/sm/config/config.h"
38 #include "tiledb/sm/filesystem/azure.h"
39 #include "tiledb/sm/global_state/unit_test_config.h"
40 #include "tiledb/sm/misc/utils.h"
41
42 #include <fstream>
43 #include <thread>
44
45 using namespace tiledb::common;
46 using namespace tiledb::sm;
47
48 using ConfMap = std::map<std::string, std::string>;
49 using ConfList = std::vector<ConfMap>;
50
51 static ConfList test_settings = {
52 {{"vfs.azure.storage_account_name", "devstoreaccount1"},
53 {"vfs.azure.storage_account_key",
54 "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/"
55 "K1SZFPTOtr/KBHBeksoGMGw=="},
56 {"vfs.azure.blob_endpoint", "127.0.0.1:10000/devstoreaccount1"}},
57 // Currently disabled because it does not work with the Azurite emulator
58 // The SAS path was manually tested against the Azure Blob Service.
59 //{{"vfs.azure.storage_account_name", "devstoreaccount2"},
60 // {"vfs.azure.storage_sas_token", ""},
61 // {"vfs.azure.blob_endpoint", "127.0.0.1:10000/devstoreaccount2"}}
62 };
63
64 struct AzureFx {
65 const std::string AZURE_PREFIX = "azure://";
66 const tiledb::sm::URI AZURE_CONTAINER =
67 tiledb::sm::URI(AZURE_PREFIX + random_container_name("tiledb") + "/");
68 const std::string TEST_DIR = AZURE_CONTAINER.to_string() + "tiledb_test_dir/";
69
70 tiledb::sm::Azure azure_;
71 ThreadPool thread_pool_;
72
73 AzureFx() = default;
74 ~AzureFx();
75
76 void init_azure(Config&& config, ConfMap);
77
78 static std::string random_container_name(const std::string& prefix);
79 };
80
~AzureFx()81 AzureFx::~AzureFx() {
82 // Empty container
83 bool is_empty;
84 REQUIRE(azure_.is_empty_container(AZURE_CONTAINER, &is_empty).ok());
85 if (!is_empty) {
86 REQUIRE(azure_.empty_container(AZURE_CONTAINER).ok());
87 REQUIRE(azure_.is_empty_container(AZURE_CONTAINER, &is_empty).ok());
88 REQUIRE(is_empty);
89 }
90
91 // Delete container
92 REQUIRE(azure_.remove_container(AZURE_CONTAINER).ok());
93 }
94
init_azure(Config && config,ConfMap settings)95 void AzureFx::init_azure(Config&& config, ConfMap settings) {
96 auto set_conf = [&](auto iter) {
97 std::string key = iter.first;
98 std::string val = iter.second;
99 REQUIRE(config.set(key, val).ok());
100 };
101
102 // Set provided config settings for connection
103 std::for_each(settings.begin(), settings.end(), set_conf);
104 REQUIRE(config.set("vfs.azure.use_https", "false").ok());
105 REQUIRE(thread_pool_.init(2).ok());
106 REQUIRE(azure_.init(config, &thread_pool_).ok());
107
108 // Create container
109 bool is_container;
110 REQUIRE(azure_.is_container(AZURE_CONTAINER, &is_container).ok());
111 if (is_container) {
112 REQUIRE(azure_.remove_container(AZURE_CONTAINER).ok());
113 }
114
115 REQUIRE(azure_.is_container(AZURE_CONTAINER, &is_container).ok());
116 REQUIRE(!is_container);
117 REQUIRE(azure_.create_container(AZURE_CONTAINER).ok());
118
119 // Check if container is empty
120 bool is_empty;
121 REQUIRE(azure_.is_empty_container(AZURE_CONTAINER, &is_empty).ok());
122 REQUIRE(is_empty);
123 }
124
random_container_name(const std::string & prefix)125 std::string AzureFx::random_container_name(const std::string& prefix) {
126 std::stringstream ss;
127 ss << prefix << "-" << std::this_thread::get_id() << "-"
128 << tiledb::sm::utils::time::timestamp_now_ms();
129 return ss.str();
130 }
131
132 TEST_CASE_METHOD(AzureFx, "Test Azure filesystem, file management", "[azure]") {
133 Config config;
134 config.set("vfs.azure.use_block_list_upload", "true");
135
136 auto settings =
137 GENERATE(from_range(test_settings.begin(), test_settings.end()));
138 init_azure(std::move(config), settings);
139
140 /* Create the following file hierarchy:
141 *
142 * TEST_DIR/dir/subdir/file1
143 * TEST_DIR/dir/subdir/file2
144 * TEST_DIR/dir/file3
145 * TEST_DIR/file4
146 * TEST_DIR/file5
147 */
148 auto dir = TEST_DIR + "dir/";
149 auto dir2 = TEST_DIR + "dir2/";
150 auto subdir = dir + "subdir/";
151 auto file1 = subdir + "file1";
152 auto file2 = subdir + "file2";
153 auto file3 = dir + "file3";
154 auto file4 = TEST_DIR + "file4";
155 auto file5 = TEST_DIR + "file5";
156 auto file6 = TEST_DIR + "file6";
157
158 // Check that container is empty
159 bool is_empty;
160 REQUIRE(azure_.is_empty_container(AZURE_CONTAINER, &is_empty).ok());
161 REQUIRE(is_empty);
162
163 // Continue building the hierarchy
164 bool is_blob = false;
165 REQUIRE(azure_.touch(URI(file1)).ok());
166 REQUIRE(azure_.is_blob(URI(file1), &is_blob).ok());
167 REQUIRE(is_blob);
168 REQUIRE(azure_.touch(URI(file2)).ok());
169 REQUIRE(azure_.is_blob(URI(file2), &is_blob).ok());
170 REQUIRE(is_blob);
171 REQUIRE(azure_.touch(URI(file3)).ok());
172 REQUIRE(azure_.is_blob(URI(file3), &is_blob).ok());
173 REQUIRE(is_blob);
174 REQUIRE(azure_.touch(URI(file4)).ok());
175 REQUIRE(azure_.is_blob(URI(file4), &is_blob).ok());
176 REQUIRE(is_blob);
177 REQUIRE(azure_.touch(URI(file5)).ok());
178 REQUIRE(azure_.is_blob(URI(file5), &is_blob).ok());
179 REQUIRE(is_blob);
180
181 // Check that container is not empty
182 REQUIRE(azure_.is_empty_container(AZURE_CONTAINER, &is_empty).ok());
183 REQUIRE(!is_empty);
184
185 // Check invalid file
186 REQUIRE(azure_.is_blob(URI(TEST_DIR + "foo"), &is_blob).ok());
187 REQUIRE(!is_blob);
188
189 // List with prefix
190 std::vector<std::string> paths;
191 REQUIRE(azure_.ls(URI(TEST_DIR), &paths).ok());
192 REQUIRE(paths.size() == 3);
193 paths.clear();
194 REQUIRE(azure_.ls(URI(dir), &paths).ok());
195 REQUIRE(paths.size() == 2);
196 paths.clear();
197 REQUIRE(azure_.ls(URI(subdir), &paths).ok());
198 REQUIRE(paths.size() == 2);
199 paths.clear();
200 REQUIRE(azure_.ls(AZURE_CONTAINER, &paths, "").ok()); // No delimiter
201 REQUIRE(paths.size() == 5);
202
203 // Check if a directory exists
204 bool is_dir = false;
205 REQUIRE(azure_.is_dir(URI(file1), &is_dir).ok());
206 REQUIRE(!is_dir); // Not a dir
207 REQUIRE(azure_.is_dir(URI(file4), &is_dir).ok());
208 REQUIRE(!is_dir); // Not a dir
209 REQUIRE(azure_.is_dir(URI(dir), &is_dir).ok());
210 REQUIRE(is_dir); // This is viewed as a dir
211 REQUIRE(azure_.is_dir(URI(TEST_DIR + "dir"), &is_dir).ok());
212 REQUIRE(is_dir); // This is viewed as a dir
213
214 // Move file
215 REQUIRE(azure_.move_object(URI(file5), URI(file6)).ok());
216 REQUIRE(azure_.is_blob(URI(file5), &is_blob).ok());
217 REQUIRE(!is_blob);
218 REQUIRE(azure_.is_blob(URI(file6), &is_blob).ok());
219 REQUIRE(is_blob);
220 paths.clear();
221 REQUIRE(azure_.ls(AZURE_CONTAINER, &paths, "").ok()); // No delimiter
222 REQUIRE(paths.size() == 5);
223
224 // Move directory
225 REQUIRE(azure_.move_dir(URI(dir), URI(dir2)).ok());
226 REQUIRE(azure_.is_dir(URI(dir), &is_dir).ok());
227 REQUIRE(!is_dir);
228 REQUIRE(azure_.is_dir(URI(dir2), &is_dir).ok());
229 REQUIRE(is_dir);
230 paths.clear();
231 REQUIRE(azure_.ls(AZURE_CONTAINER, &paths, "").ok()); // No delimiter
232 REQUIRE(paths.size() == 5);
233
234 // Remove files
235 REQUIRE(azure_.remove_blob(URI(file4)).ok());
236 REQUIRE(azure_.is_blob(URI(file4), &is_blob).ok());
237 REQUIRE(!is_blob);
238
239 // Remove directories
240 REQUIRE(azure_.remove_dir(URI(dir2)).ok());
241 REQUIRE(azure_.is_blob(URI(file1), &is_blob).ok());
242 REQUIRE(!is_blob);
243 REQUIRE(azure_.is_blob(URI(file2), &is_blob).ok());
244 REQUIRE(!is_blob);
245 REQUIRE(azure_.is_blob(URI(file3), &is_blob).ok());
246 REQUIRE(!is_blob);
247 }
248
249 TEST_CASE_METHOD(
250 AzureFx, "Test Azure filesystem, file I/O", "[azure][multipart]") {
251 Config config;
252 const uint64_t max_parallel_ops = 2;
253 const uint64_t block_list_block_size = 4 * 1024 * 1024;
254 config.set("vfs.azure.use_block_list_upload", "true");
255 config.set("vfs.azure.max_parallel_ops", std::to_string(max_parallel_ops));
256 config.set(
257 "vfs.azure.block_list_block_size", std::to_string(block_list_block_size));
258
259 auto settings =
260 GENERATE(from_range(test_settings.begin(), test_settings.end()));
261 init_azure(std::move(config), settings);
262
263 const uint64_t write_cache_max_size =
264 max_parallel_ops * block_list_block_size;
265
266 // Prepare buffers
267 uint64_t buffer_size = write_cache_max_size * 5;
268 auto write_buffer = new char[buffer_size];
269 for (uint64_t i = 0; i < buffer_size; i++)
270 write_buffer[i] = (char)('a' + (i % 26));
271 uint64_t buffer_size_small = 1024 * 1024;
272 auto write_buffer_small = new char[buffer_size_small];
273 for (uint64_t i = 0; i < buffer_size_small; i++)
274 write_buffer_small[i] = (char)('a' + (i % 26));
275
276 // Write to two files
277 auto largefile = TEST_DIR + "largefile";
278 REQUIRE(azure_.write(URI(largefile), write_buffer, buffer_size).ok());
279 REQUIRE(
280 azure_.write(URI(largefile), write_buffer_small, buffer_size_small).ok());
281 auto smallfile = TEST_DIR + "smallfile";
282 REQUIRE(
283 azure_.write(URI(smallfile), write_buffer_small, buffer_size_small).ok());
284
285 // Before flushing, the files do not exist
286 bool is_blob = false;
287 REQUIRE(azure_.is_blob(URI(largefile), &is_blob).ok());
288 REQUIRE(!is_blob);
289 REQUIRE(azure_.is_blob(URI(smallfile), &is_blob).ok());
290 REQUIRE(!is_blob);
291
292 // Flush the files
293 REQUIRE(azure_.flush_blob(URI(largefile)).ok());
294 REQUIRE(azure_.flush_blob(URI(smallfile)).ok());
295
296 // After flushing, the files exist
297 REQUIRE(azure_.is_blob(URI(largefile), &is_blob).ok());
298 REQUIRE(is_blob);
299 REQUIRE(azure_.is_blob(URI(smallfile), &is_blob).ok());
300 REQUIRE(is_blob);
301
302 // Get file sizes
303 uint64_t nbytes = 0;
304 REQUIRE(azure_.blob_size(URI(largefile), &nbytes).ok());
305 REQUIRE(nbytes == (buffer_size + buffer_size_small));
306 REQUIRE(azure_.blob_size(URI(smallfile), &nbytes).ok());
307 REQUIRE(nbytes == buffer_size_small);
308
309 // Read from the beginning
310 auto read_buffer = new char[26];
311 uint64_t bytes_read;
312 REQUIRE(azure_.read(URI(largefile), 0, read_buffer, 26, 0, &bytes_read).ok());
313 CHECK(26 == bytes_read);
314 bool allok = true;
315 for (int i = 0; i < 26; i++) {
316 if (read_buffer[i] != static_cast<char>('a' + i)) {
317 allok = false;
318 break;
319 }
320 }
321 REQUIRE(allok);
322
323 // Read from a different offset
324 REQUIRE(
325 azure_.read(URI(largefile), 11, read_buffer, 26, 0, &bytes_read).ok());
326 CHECK(26 == bytes_read);
327 allok = true;
328 for (int i = 0; i < 26; i++) {
329 if (read_buffer[i] != static_cast<char>('a' + (i + 11) % 26)) {
330 allok = false;
331 break;
332 }
333 }
334 REQUIRE(allok);
335 }
336
337 TEST_CASE_METHOD(
338 AzureFx,
339 "Test Azure filesystem, file I/O, no multipart",
340 "[azure][no_multipart]") {
341 Config config;
342 const uint64_t max_parallel_ops = 2;
343 const uint64_t block_list_block_size = 4 * 1024 * 1024;
344 config.set("vfs.azure.use_block_list_upload", "false");
345 config.set("vfs.azure.max_parallel_ops", std::to_string(max_parallel_ops));
346 config.set(
347 "vfs.azure.block_list_block_size", std::to_string(block_list_block_size));
348
349 auto settings =
350 GENERATE(from_range(test_settings.begin(), test_settings.end()));
351 init_azure(std::move(config), settings);
352
353 const uint64_t write_cache_max_size =
354 max_parallel_ops * block_list_block_size;
355
356 // Prepare a large buffer that can fit in the write cache.
357 uint64_t large_buffer_size = write_cache_max_size;
358 auto large_write_buffer = new char[large_buffer_size];
359 for (uint64_t i = 0; i < large_buffer_size; i++)
360 large_write_buffer[i] = (char)('a' + (i % 26));
361
362 // Prepare a small buffer that can fit in the write cache.
363 uint64_t small_buffer_size = write_cache_max_size / 1024;
364 auto small_write_buffer = new char[small_buffer_size];
365 for (uint64_t i = 0; i < small_buffer_size; i++)
366 small_write_buffer[i] = (char)('a' + (i % 26));
367
368 // Prepare a buffer too large to fit in the write cache.
369 uint64_t oob_buffer_size = write_cache_max_size + 1;
370 auto oob_write_buffer = new char[oob_buffer_size];
371 for (uint64_t i = 0; i < oob_buffer_size; i++)
372 oob_write_buffer[i] = (char)('a' + (i % 26));
373
374 auto large_file = TEST_DIR + "largefile";
375 REQUIRE(azure_.write(URI(large_file), large_write_buffer, large_buffer_size)
376 .ok());
377
378 auto small_file_1 = TEST_DIR + "smallfile1";
379 REQUIRE(azure_.write(URI(small_file_1), small_write_buffer, small_buffer_size)
380 .ok());
381
382 auto small_file_2 = TEST_DIR + "smallfile2";
383 REQUIRE(azure_.write(URI(small_file_2), small_write_buffer, small_buffer_size)
384 .ok());
385 REQUIRE(azure_.write(URI(small_file_2), small_write_buffer, small_buffer_size)
386 .ok());
387
388 auto oob_file = TEST_DIR + "oobfile";
389 REQUIRE(!azure_.write(URI(oob_file), oob_write_buffer, oob_buffer_size).ok());
390
391 // Before flushing, the files do not exist
392 bool is_blob = false;
393 REQUIRE(azure_.is_blob(URI(large_file), &is_blob).ok());
394 REQUIRE(!is_blob);
395 REQUIRE(azure_.is_blob(URI(small_file_1), &is_blob).ok());
396 REQUIRE(!is_blob);
397 REQUIRE(azure_.is_blob(URI(small_file_2), &is_blob).ok());
398 REQUIRE(!is_blob);
399 REQUIRE(azure_.is_blob(URI(oob_file), &is_blob).ok());
400 REQUIRE(!is_blob);
401
402 // Flush the files
403 REQUIRE(azure_.flush_blob(URI(small_file_1)).ok());
404 REQUIRE(azure_.flush_blob(URI(small_file_2)).ok());
405 REQUIRE(azure_.flush_blob(URI(large_file)).ok());
406
407 // After flushing, the files exist
408 REQUIRE(azure_.is_blob(URI(large_file), &is_blob).ok());
409 REQUIRE(is_blob);
410 REQUIRE(azure_.is_blob(URI(small_file_1), &is_blob).ok());
411 REQUIRE(is_blob);
412 REQUIRE(azure_.is_blob(URI(small_file_2), &is_blob).ok());
413 REQUIRE(is_blob);
414
415 // Get file sizes
416 uint64_t nbytes = 0;
417 REQUIRE(azure_.blob_size(URI(large_file), &nbytes).ok());
418 CHECK(nbytes == large_buffer_size);
419 REQUIRE(azure_.blob_size(URI(small_file_1), &nbytes).ok());
420 CHECK(nbytes == small_buffer_size);
421 REQUIRE(azure_.blob_size(URI(small_file_2), &nbytes).ok());
422 CHECK(nbytes == (small_buffer_size + small_buffer_size));
423
424 // Read from the beginning
425 auto read_buffer = new char[26];
426 uint64_t bytes_read;
427 REQUIRE(
428 azure_.read(URI(large_file), 0, read_buffer, 26, 0, &bytes_read).ok());
429 CHECK(26 == bytes_read);
430 bool allok = true;
431 for (int i = 0; i < 26; i++) {
432 if (read_buffer[i] != static_cast<char>('a' + i)) {
433 allok = false;
434 break;
435 }
436 }
437 REQUIRE(allok);
438
439 // Read from a different offset
440 REQUIRE(
441 azure_.read(URI(large_file), 11, read_buffer, 26, 0, &bytes_read).ok());
442 CHECK(26 == bytes_read);
443 allok = true;
444 for (int i = 0; i < 26; i++) {
445 if (read_buffer[i] != static_cast<char>('a' + (i + 11) % 26)) {
446 allok = false;
447 break;
448 }
449 }
450 REQUIRE(allok);
451 }
452
453 #endif