1 /**
2  * @file   rest_client.h
3  *
4  * @section LICENSE
5  *
6  * The MIT License
7  *
8  * @copyright Copyright (c) 2018-2021 TileDB, Inc.
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  * THE SOFTWARE.
27  *
28  * @section DESCRIPTION
29  *
30  * This file defines a REST client class.
31  */
32 
33 #ifndef TILEDB_REST_CLIENT_H
34 #define TILEDB_REST_CLIENT_H
35 
36 #include <string>
37 #include <unordered_map>
38 
39 #include "tiledb/common/status.h"
40 #include "tiledb/common/thread_pool.h"
41 #include "tiledb/sm/serialization/query.h"
42 #include "tiledb/sm/stats/stats.h"
43 
44 using namespace tiledb::common;
45 
46 namespace tiledb {
47 namespace sm {
48 
49 class ArraySchema;
50 class Config;
51 class Query;
52 
53 enum class SerializationType : uint8_t;
54 
55 class RestClient {
56  public:
57   /** Constructor. */
58   RestClient();
59 
60   /** Initialize the REST client with the given config. */
61   Status init(
62       stats::Stats* parent_stats, const Config* config, ThreadPool* compute_tp);
63 
64   /** Sets a header that will be attached to all requests. */
65   Status set_header(const std::string& name, const std::string& value);
66 
67   /**
68    * Get a data encoded array schema from rest server
69    *
70    * @param uri of array being loaded
71    * @param array_schema array schema to send to server
72    * @return Status Ok() on success Error() on failures
73    */
74   Status get_array_schema_from_rest(const URI& uri, ArraySchema** array_schema);
75 
76   /**
77    * Post a data array schema to rest server
78    *
79    * @param uri of array being created
80    * @param array_schema array schema to load into
81    * @return Status Ok() on success Error() on failures
82    */
83   Status post_array_schema_to_rest(const URI& uri, ArraySchema* array_schema);
84 
85   /**
86    * Deregisters an array at the given URI from the REST server.
87    *
88    * @param uri Array URI to deregister
89    * @return Status
90    */
91   Status deregister_array_from_rest(const URI& uri);
92 
93   /**
94    * Get array's non_empty domain from rest server
95    *
96    * @param array Array model to fetch and set non empty domain on
97    * @param timestamp_start Inclusive starting timestamp at which to open array
98    * @param timestamp_end Inclusive ending timestamp at which to open array
99    * @return Status Ok() on success Error() on failures
100    */
101   Status get_array_non_empty_domain(
102       Array* array, uint64_t timestamp_start, uint64_t timestamp_end);
103 
104   /**
105    * Get array's max buffer sizes from rest server.
106    *
107    * @param uri URI of array
108    * @param schema Array schema of array
109    * @param subarray Subrray to get max buffer sizes for
110    * @param buffer_sizes Will be populated with max buffer sizes
111    * @return Status
112    */
113   Status get_array_max_buffer_sizes(
114       const URI& uri,
115       const ArraySchema* schema,
116       const void* subarray,
117       std::unordered_map<std::string, std::pair<uint64_t, uint64_t>>*
118           buffer_sizes);
119 
120   /**
121    * Gets the array's metadata from the REST server (and updates the in-memory
122    * Metadata of the array to match the returned values).
123    *
124    * @param uri Array URI
125    * @param timestamp_start Inclusive starting timestamp at which to open array
126    * @param timestamp_end Inclusive ending timestamp at which to open array
127    * @param array Array to fetch metadata for
128    * @return Status
129    */
130   Status get_array_metadata_from_rest(
131       const URI& uri,
132       uint64_t timestamp_start,
133       uint64_t timestamp_end,
134       Array* array);
135 
136   /**
137    * Posts the array's metadata to the REST server.
138    *
139    * @param uri Array URI
140    * @param timestamp_start Inclusive starting timestamp at which to open array
141    * @param timestamp_end Inclusive ending timestamp at which to open array
142    * @param array Array to update/post metadata for.
143    * @return Status
144    */
145   Status post_array_metadata_to_rest(
146       const URI& uri,
147       uint64_t timestamp_start,
148       uint64_t timestamp_end,
149       Array* array);
150 
151   /**
152    * Post a data query to rest server
153    *
154    * @param uri of array being queried
155    * @param query to send to server and store results in, this qill be modified
156    * @return Status Ok() on success Error() on failures
157    */
158   Status submit_query_to_rest(const URI& uri, Query* query);
159 
160   /**
161    * Post a data query to rest server
162    *
163    * @param uri of array being queried
164    * @param query to send to server and store results in, this will be modified
165    * @return Status Ok() on success Error() on failures
166    */
167   Status finalize_query_to_rest(const URI& uri, Query* query);
168 
169   /**
170    * Get array's non_empty domain from rest server
171    *
172    * @param array Array model to fetch and set non empty domain on
173    * @return Status Ok() on success Error() on failures
174    */
175   Status get_query_est_result_sizes(const URI& uri, Query* query);
176 
177   /**
178    * Post array schema evolution to rest server
179    *
180    * @param uri of array being queried
181    * @param array_schema_evolution to send to server
182    * @return Status Ok() on success Error() on failures
183    */
184   Status post_array_schema_evolution_to_rest(
185       const URI& uri, ArraySchemaEvolution* array_schema_evolution);
186 
187  private:
188   /* ********************************* */
189   /*        PRIVATE ATTRIBUTES         */
190   /* ********************************* */
191 
192   /** The class stats. */
193   stats::Stats* stats_;
194 
195   /** The TileDB config options (contains server and auth info). */
196   const Config* config_;
197 
198   /** The thread pool for compute-bound tasks. */
199   ThreadPool* compute_tp_;
200 
201   /** Rest server config param. */
202   std::string rest_server_;
203 
204   /** Serialization type. */
205   SerializationType serialization_type_;
206 
207   /**
208    * If true (the default), automatically resubmit incomplete queries on the
209    * server-side. This guarantees that the user only receive a complete query
210    * result from the server.
211    *
212    * When this is turned on, it is currently an error if the user buffers on the
213    * client are too small to receive all data received from the server
214    * (regardless of how many times the query is resubmitted).
215    */
216   bool resubmit_incomplete_;
217 
218   /** Collection of extra headers that are attached to REST requests. */
219   std::unordered_map<std::string, std::string> extra_headers_;
220 
221   /** Array URI to redirected server mapping. */
222   std::unordered_map<std::string, std::string> redirect_meta_;
223 
224   /** Mutex for thread-safety. */
225   mutable std::mutex redirect_mtx_;
226 
227   /* ********************************* */
228   /*         PRIVATE METHODS           */
229   /* ********************************* */
230 
231   /**
232    * POSTs a query submit request to the REST server and deserializes the
233    * response into the same query object.
234    *
235    * For read queries, this also updates the given copy state with the number of
236    * bytes copied for each attribute, which allows for automatic resubmission of
237    * incomplete queries while concatenating to the user buffers.
238    *
239    * @param uri URI of array being queried
240    * @param query Query to send to server and store results in, this will be
241    *    modified.
242    * @param copy_state Map of copy state per attribute. As attribute data is
243    *    copied into user buffers on reads, the state of each attribute in this
244    *    map is updated accordingly.
245    * @return
246    */
247   Status post_query_submit(
248       const URI& uri, Query* query, serialization::CopyState* copy_state);
249 
250   /**
251    * Callback to invoke as partial, buffered response data is received from
252    * posting a query.
253    *
254    * This is not thread-safe. It expects the response data to be ordered. The
255    * response must contain serialized query objects, prefixed by an 8-byte
256    * unsigned integer that contains the byte-size of the serialized query object
257    * it is prefixing. The scratch space must be empty before the first
258    * invocation, and must not change until the last invocation has completed.
259    *
260    * @param reset True if the callback must wipe the in-memory state
261    * @param contents the partial response data
262    * @param content_nbytes the size of the response data in 'contents'
263    * @param skip_retries Output argument that can be set to true to
264    *    prevent the curl layer from retrying this request.
265    * @scratch scratch space to use between invocations of this callback
266    * @query the query object used for deserializing the serialized query
267    *    objects in the response data.
268    * @param copy_state Map of copy state per attribute. As attribute data is
269    *    copied into user buffers on reads, the state of each attribute in this
270    *    map is updated accordingly.
271    * @return Number of acknowledged bytes
272    */
273   size_t query_post_call_back(
274       const bool reset,
275       void* constcontents,
276       const size_t content_nbytes,
277       bool* constskip_retries,
278       tdb_shared_ptr<Buffer> scratch,
279       Query* query,
280       serialization::CopyState* copy_state);
281 
282   /**
283    * Returns a string representation of the given subarray. The format is:
284    *
285    *   "dim0min,dim0max,dim1min,dim1max,..."
286    *
287    * @param schema Array schema to use for domain information
288    * @param subarray Subarray to convert to string
289    * @param subarray_str Will be set to the CSV string
290    * @return Status
291    */
292   static Status subarray_to_str(
293       const ArraySchema* schema,
294       const void* subarray,
295       std::string* subarray_str);
296 
297   /**
298    * Sets the buffer sizes on the given query using the given state mapping (per
299    * attribute). Applicable only when deserializing read queries on the client.
300    *
301    * @param copy_state State map of attribute to buffer sizes.
302    * @param query Query to update buffers for
303    * @return Status
304    */
305   Status update_attribute_buffer_sizes(
306       const serialization::CopyState& copy_state, Query* query) const;
307 
308   /**
309    * Helper function encapsulating the functionality of looking up for cached
310    * redirected rest server addresses to avoid the redirection overhead
311    *
312    * @param array_ns Array namespace
313    * @param array_uri Array URI
314    * @return Returns the redirection URI if exists and empty string otherwise
315    */
316   std::string redirect_uri(const std::string& cache_key);
317 };
318 
319 }  // namespace sm
320 }  // namespace tiledb
321 
322 #endif  // TILEDB_REST_CLIENT_H
323