1 /** 2 * @file rest_client.h 3 * 4 * @section LICENSE 5 * 6 * The MIT License 7 * 8 * @copyright Copyright (c) 2018-2021 TileDB, Inc. 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a copy 11 * of this software and associated documentation files (the "Software"), to deal 12 * in the Software without restriction, including without limitation the rights 13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 * copies of the Software, and to permit persons to whom the Software is 15 * furnished to do so, subject to the following conditions: 16 * 17 * The above copyright notice and this permission notice shall be included in 18 * all copies or substantial portions of the Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 * THE SOFTWARE. 27 * 28 * @section DESCRIPTION 29 * 30 * This file defines a REST client class. 31 */ 32 33 #ifndef TILEDB_REST_CLIENT_H 34 #define TILEDB_REST_CLIENT_H 35 36 #include <string> 37 #include <unordered_map> 38 39 #include "tiledb/common/status.h" 40 #include "tiledb/common/thread_pool.h" 41 #include "tiledb/sm/serialization/query.h" 42 #include "tiledb/sm/stats/stats.h" 43 44 using namespace tiledb::common; 45 46 namespace tiledb { 47 namespace sm { 48 49 class ArraySchema; 50 class Config; 51 class Query; 52 53 enum class SerializationType : uint8_t; 54 55 class RestClient { 56 public: 57 /** Constructor. */ 58 RestClient(); 59 60 /** Initialize the REST client with the given config. */ 61 Status init( 62 stats::Stats* parent_stats, const Config* config, ThreadPool* compute_tp); 63 64 /** Sets a header that will be attached to all requests. */ 65 Status set_header(const std::string& name, const std::string& value); 66 67 /** 68 * Get a data encoded array schema from rest server 69 * 70 * @param uri of array being loaded 71 * @param array_schema array schema to send to server 72 * @return Status Ok() on success Error() on failures 73 */ 74 Status get_array_schema_from_rest(const URI& uri, ArraySchema** array_schema); 75 76 /** 77 * Post a data array schema to rest server 78 * 79 * @param uri of array being created 80 * @param array_schema array schema to load into 81 * @return Status Ok() on success Error() on failures 82 */ 83 Status post_array_schema_to_rest(const URI& uri, ArraySchema* array_schema); 84 85 /** 86 * Deregisters an array at the given URI from the REST server. 87 * 88 * @param uri Array URI to deregister 89 * @return Status 90 */ 91 Status deregister_array_from_rest(const URI& uri); 92 93 /** 94 * Get array's non_empty domain from rest server 95 * 96 * @param array Array model to fetch and set non empty domain on 97 * @param timestamp_start Inclusive starting timestamp at which to open array 98 * @param timestamp_end Inclusive ending timestamp at which to open array 99 * @return Status Ok() on success Error() on failures 100 */ 101 Status get_array_non_empty_domain( 102 Array* array, uint64_t timestamp_start, uint64_t timestamp_end); 103 104 /** 105 * Get array's max buffer sizes from rest server. 106 * 107 * @param uri URI of array 108 * @param schema Array schema of array 109 * @param subarray Subrray to get max buffer sizes for 110 * @param buffer_sizes Will be populated with max buffer sizes 111 * @return Status 112 */ 113 Status get_array_max_buffer_sizes( 114 const URI& uri, 115 const ArraySchema* schema, 116 const void* subarray, 117 std::unordered_map<std::string, std::pair<uint64_t, uint64_t>>* 118 buffer_sizes); 119 120 /** 121 * Gets the array's metadata from the REST server (and updates the in-memory 122 * Metadata of the array to match the returned values). 123 * 124 * @param uri Array URI 125 * @param timestamp_start Inclusive starting timestamp at which to open array 126 * @param timestamp_end Inclusive ending timestamp at which to open array 127 * @param array Array to fetch metadata for 128 * @return Status 129 */ 130 Status get_array_metadata_from_rest( 131 const URI& uri, 132 uint64_t timestamp_start, 133 uint64_t timestamp_end, 134 Array* array); 135 136 /** 137 * Posts the array's metadata to the REST server. 138 * 139 * @param uri Array URI 140 * @param timestamp_start Inclusive starting timestamp at which to open array 141 * @param timestamp_end Inclusive ending timestamp at which to open array 142 * @param array Array to update/post metadata for. 143 * @return Status 144 */ 145 Status post_array_metadata_to_rest( 146 const URI& uri, 147 uint64_t timestamp_start, 148 uint64_t timestamp_end, 149 Array* array); 150 151 /** 152 * Post a data query to rest server 153 * 154 * @param uri of array being queried 155 * @param query to send to server and store results in, this qill be modified 156 * @return Status Ok() on success Error() on failures 157 */ 158 Status submit_query_to_rest(const URI& uri, Query* query); 159 160 /** 161 * Post a data query to rest server 162 * 163 * @param uri of array being queried 164 * @param query to send to server and store results in, this will be modified 165 * @return Status Ok() on success Error() on failures 166 */ 167 Status finalize_query_to_rest(const URI& uri, Query* query); 168 169 /** 170 * Get array's non_empty domain from rest server 171 * 172 * @param array Array model to fetch and set non empty domain on 173 * @return Status Ok() on success Error() on failures 174 */ 175 Status get_query_est_result_sizes(const URI& uri, Query* query); 176 177 /** 178 * Post array schema evolution to rest server 179 * 180 * @param uri of array being queried 181 * @param array_schema_evolution to send to server 182 * @return Status Ok() on success Error() on failures 183 */ 184 Status post_array_schema_evolution_to_rest( 185 const URI& uri, ArraySchemaEvolution* array_schema_evolution); 186 187 private: 188 /* ********************************* */ 189 /* PRIVATE ATTRIBUTES */ 190 /* ********************************* */ 191 192 /** The class stats. */ 193 stats::Stats* stats_; 194 195 /** The TileDB config options (contains server and auth info). */ 196 const Config* config_; 197 198 /** The thread pool for compute-bound tasks. */ 199 ThreadPool* compute_tp_; 200 201 /** Rest server config param. */ 202 std::string rest_server_; 203 204 /** Serialization type. */ 205 SerializationType serialization_type_; 206 207 /** 208 * If true (the default), automatically resubmit incomplete queries on the 209 * server-side. This guarantees that the user only receive a complete query 210 * result from the server. 211 * 212 * When this is turned on, it is currently an error if the user buffers on the 213 * client are too small to receive all data received from the server 214 * (regardless of how many times the query is resubmitted). 215 */ 216 bool resubmit_incomplete_; 217 218 /** Collection of extra headers that are attached to REST requests. */ 219 std::unordered_map<std::string, std::string> extra_headers_; 220 221 /** Array URI to redirected server mapping. */ 222 std::unordered_map<std::string, std::string> redirect_meta_; 223 224 /** Mutex for thread-safety. */ 225 mutable std::mutex redirect_mtx_; 226 227 /* ********************************* */ 228 /* PRIVATE METHODS */ 229 /* ********************************* */ 230 231 /** 232 * POSTs a query submit request to the REST server and deserializes the 233 * response into the same query object. 234 * 235 * For read queries, this also updates the given copy state with the number of 236 * bytes copied for each attribute, which allows for automatic resubmission of 237 * incomplete queries while concatenating to the user buffers. 238 * 239 * @param uri URI of array being queried 240 * @param query Query to send to server and store results in, this will be 241 * modified. 242 * @param copy_state Map of copy state per attribute. As attribute data is 243 * copied into user buffers on reads, the state of each attribute in this 244 * map is updated accordingly. 245 * @return 246 */ 247 Status post_query_submit( 248 const URI& uri, Query* query, serialization::CopyState* copy_state); 249 250 /** 251 * Callback to invoke as partial, buffered response data is received from 252 * posting a query. 253 * 254 * This is not thread-safe. It expects the response data to be ordered. The 255 * response must contain serialized query objects, prefixed by an 8-byte 256 * unsigned integer that contains the byte-size of the serialized query object 257 * it is prefixing. The scratch space must be empty before the first 258 * invocation, and must not change until the last invocation has completed. 259 * 260 * @param reset True if the callback must wipe the in-memory state 261 * @param contents the partial response data 262 * @param content_nbytes the size of the response data in 'contents' 263 * @param skip_retries Output argument that can be set to true to 264 * prevent the curl layer from retrying this request. 265 * @scratch scratch space to use between invocations of this callback 266 * @query the query object used for deserializing the serialized query 267 * objects in the response data. 268 * @param copy_state Map of copy state per attribute. As attribute data is 269 * copied into user buffers on reads, the state of each attribute in this 270 * map is updated accordingly. 271 * @return Number of acknowledged bytes 272 */ 273 size_t query_post_call_back( 274 const bool reset, 275 void* constcontents, 276 const size_t content_nbytes, 277 bool* constskip_retries, 278 tdb_shared_ptr<Buffer> scratch, 279 Query* query, 280 serialization::CopyState* copy_state); 281 282 /** 283 * Returns a string representation of the given subarray. The format is: 284 * 285 * "dim0min,dim0max,dim1min,dim1max,..." 286 * 287 * @param schema Array schema to use for domain information 288 * @param subarray Subarray to convert to string 289 * @param subarray_str Will be set to the CSV string 290 * @return Status 291 */ 292 static Status subarray_to_str( 293 const ArraySchema* schema, 294 const void* subarray, 295 std::string* subarray_str); 296 297 /** 298 * Sets the buffer sizes on the given query using the given state mapping (per 299 * attribute). Applicable only when deserializing read queries on the client. 300 * 301 * @param copy_state State map of attribute to buffer sizes. 302 * @param query Query to update buffers for 303 * @return Status 304 */ 305 Status update_attribute_buffer_sizes( 306 const serialization::CopyState& copy_state, Query* query) const; 307 308 /** 309 * Helper function encapsulating the functionality of looking up for cached 310 * redirected rest server addresses to avoid the redirection overhead 311 * 312 * @param array_ns Array namespace 313 * @param array_uri Array URI 314 * @return Returns the redirection URI if exists and empty string otherwise 315 */ 316 std::string redirect_uri(const std::string& cache_key); 317 }; 318 319 } // namespace sm 320 } // namespace tiledb 321 322 #endif // TILEDB_REST_CLIENT_H 323