1 /*
2    Copyright (c) 2018, Facebook, Inc.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
16 
17 /* MyRocks global type definitions goes here */
18 
19 #pragma once
20 
21 /* C++ standard header files */
22 #include <limits>
23 #include <string>
24 #include <vector>
25 
26 /* MySQL header files */
27 #include "handler.h" /* handler */
28 #include "ib_ut0counter.h"
29 #include "my_global.h" /* ulonglong */
30 #include "sql_string.h"
31 
32 namespace myrocks {
33 /*
34  * class for exporting transaction information for
35  * information_schema.rocksdb_trx
36  */
37 struct Rdb_trx_info {
38   std::string name;
39   ulonglong trx_id;
40   ulonglong write_count;
41   ulonglong lock_count;
42   int timeout_sec;
43   std::string state;
44   std::string waiting_key;
45   ulonglong waiting_cf_id;
46   int is_replication;
47   int skip_trx_api;
48   int read_only;
49   int deadlock_detect;
50   int num_ongoing_bulk_load;
51   ulong thread_id;
52   std::string query_str;
53 };
54 
55 std::vector<Rdb_trx_info> rdb_get_all_trx_info();
56 
57 /*
58  * class for exporting deadlock transaction information for
59  * information_schema.rocksdb_deadlock
60  */
61 struct Rdb_deadlock_info {
62   struct Rdb_dl_trx_info {
63     ulonglong trx_id;
64     std::string cf_name;
65     std::string waiting_key;
66     bool exclusive_lock;
67     std::string index_name;
68     std::string table_name;
69   };
70   std::vector<Rdb_dl_trx_info> path;
71   int64_t deadlock_time;
72   ulonglong victim_trx_id;
73 };
74 
75 std::vector<Rdb_deadlock_info> rdb_get_deadlock_info();
76 
77 /*
78   This is
79   - the name of the default Column Family (the CF which stores indexes which
80     didn't explicitly specify which CF they are in)
81   - the name used to set the default column family parameter for per-cf
82     arguments.
83 */
84 extern const std::string DEFAULT_CF_NAME;
85 
86 /*
87   This is the name of the Column Family used for storing the data dictionary.
88 */
89 extern const std::string DEFAULT_SYSTEM_CF_NAME;
90 
91 /*
92   This is the name of the hidden primary key for tables with no pk.
93 */
94 const constexpr char HIDDEN_PK_NAME[] = "HIDDEN_PK_ID";
95 
96 /*
97   Column family name which means "put this index into its own column family".
98   DEPRECATED!!!
99 */
100 extern const std::string PER_INDEX_CF_NAME;
101 
102 /*
103   Name for the background thread.
104 */
105 const constexpr char BG_THREAD_NAME[] = "myrocks-bg";
106 
107 /*
108   Name for the drop index thread.
109 */
110 const constexpr char INDEX_THREAD_NAME[] = "myrocks-index";
111 
112 /*
113   Name for the index stats calculation thread.
114 */
115 const char *const INDEX_STATS_THREAD_NAME = "myrocks-is";
116 
117 /*
118   Name for the manual compaction thread.
119 */
120 static const char constexpr MANUAL_COMPACTION_THREAD_NAME[] = "myrocks-mc";
121 
122 /*
123   Separator between partition name and the qualifier. Sample usage:
124 
125   - p0_cfname=foo
126   - p3_tts_col=bar
127 */
128 const constexpr char RDB_PER_PARTITION_QUALIFIER_NAME_SEP = '_';
129 
130 /*
131   Separator between qualifier name and value. Sample usage:
132 
133   - p0_cfname=foo
134   - p3_tts_col=bar
135 */
136 const constexpr char RDB_QUALIFIER_VALUE_SEP = '=';
137 
138 /*
139   Separator between multiple qualifier assignments. Sample usage:
140 
141   - p0_cfname=foo;p1_cfname=bar;p2_cfname=baz
142 */
143 const constexpr char RDB_QUALIFIER_SEP = ';';
144 
145 /*
146   Qualifier name for a custom per partition column family.
147 */
148 const constexpr char RDB_CF_NAME_QUALIFIER[] = "cfname";
149 
150 /*
151   Qualifier name for a custom per partition ttl duration.
152 */
153 const constexpr char RDB_TTL_DURATION_QUALIFIER[] = "ttl_duration";
154 
155 /*
156   Qualifier name for a custom per partition ttl duration.
157 */
158 const constexpr char RDB_TTL_COL_QUALIFIER[] = "ttl_col";
159 
160 /*
161   Default, minimal valid, and maximum valid sampling rate values when collecting
162   statistics about table.
163 */
164 #define RDB_DEFAULT_TBL_STATS_SAMPLE_PCT 10
165 #define RDB_TBL_STATS_SAMPLE_PCT_MIN 1
166 #define RDB_TBL_STATS_SAMPLE_PCT_MAX 100
167 
168 #define RDB_TBL_STATS_RECALC_THRESHOLD_PCT_MAX 100
169 
170 /* Minimum time interval between stats recalc for a given table */
171 #define RDB_MIN_RECALC_INTERVAL 10 /* seconds */
172 
173 #define THREAD_PRIO_MIN -20
174 #define THREAD_PRIO_MAX 19
175 /*
176   Default and maximum values for rocksdb-compaction-sequential-deletes and
177   rocksdb-compaction-sequential-deletes-window to add basic boundary checking.
178 */
179 #define DEFAULT_COMPACTION_SEQUENTIAL_DELETES 0
180 #define MAX_COMPACTION_SEQUENTIAL_DELETES 2000000
181 
182 #define DEFAULT_COMPACTION_SEQUENTIAL_DELETES_WINDOW 0
183 #define MAX_COMPACTION_SEQUENTIAL_DELETES_WINDOW 2000000
184 
185 /*
186   Default and maximum values for various compaction and flushing related
187   options. Numbers are based on the hardware we currently use and our internal
188   benchmarks which indicate that parallelization helps with the speed of
189   compactions.
190 
191   Ideally of course we'll use heuristic technique to determine the number of
192   CPU-s and derive the values from there. This however has its own set of
193   problems and we'll choose simplicity for now.
194 */
195 const constexpr int MAX_BACKGROUND_JOBS = 64;
196 
197 const constexpr int DEFAULT_SUBCOMPACTIONS = 1;
198 const constexpr int MAX_SUBCOMPACTIONS = 64;
199 
200 /*
201   Default value for rocksdb_sst_mgr_rate_bytes_per_sec = 0 (disabled).
202 */
203 const constexpr uint64_t DEFAULT_SST_MGR_RATE_BYTES_PER_SEC = 0;
204 
205 /*
206   Defines the field sizes for serializing XID object to a string representation.
207   string byte format: [field_size: field_value, ...]
208   [
209     8: XID.formatID,
210     1: XID.gtrid_length,
211     1: XID.bqual_length,
212     XID.gtrid_length + XID.bqual_length: XID.data
213   ]
214 */
215 #define RDB_FORMATID_SZ 8
216 #define RDB_GTRID_SZ 1
217 #define RDB_BQUAL_SZ 1
218 #define RDB_XIDHDR_LEN (RDB_FORMATID_SZ + RDB_GTRID_SZ + RDB_BQUAL_SZ)
219 
220 /*
221   To fix an unhandled exception we specify the upper bound as LONGLONGMAX
222   instead of ULONGLONGMAX because the latter is -1 and causes an exception when
223   cast to jlong (signed) of JNI
224 
225   The reason behind the cast issue is the lack of unsigned int support in Java.
226 */
227 #define MAX_RATE_LIMITER_BYTES_PER_SEC static_cast<uint64_t>(LLONG_MAX)
228 
229 /*
230   Hidden PK column (for tables with no primary key) is a longlong (aka 8 bytes).
231   static_assert() in code will validate this assumption.
232 */
233 #define ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN sizeof(longlong)
234 
235 /*
236   Bytes used to store TTL, in the beginning of all records for tables with TTL
237   enabled.
238 */
239 #define ROCKSDB_SIZEOF_TTL_RECORD sizeof(longlong)
240 
241 #define ROCKSDB_SIZEOF_AUTOINC_VALUE sizeof(longlong)
242 
243 /*
244   Maximum index prefix length in bytes.
245 */
246 const constexpr uint MAX_INDEX_COL_LEN_LARGE = 3072;
247 const constexpr uint MAX_INDEX_COL_LEN_SMALL = 767;
248 
249 /*
250   MyRocks specific error codes. NB! Please make sure that you will update
251   HA_ERR_ROCKSDB_LAST when adding new ones.  Also update the strings in
252   rdb_error_messages to include any new error messages.
253 
254   NOTE: Given that Oracle/Us keeps bumping up HA_ERR_LAST, we don't want to
255   start strictly from HA_ERR_LAST and instead we start from 500 and asserts
256   it is large
257 */
258 #define HA_ERR_ROCKSDB_FIRST (500)
259 static_assert(HA_ERR_ROCKSDB_FIRST > HA_ERR_LAST,
260               "ROCKSDB err need to be larger than HA_ERR_LAST");
261 #define HA_ERR_ROCKSDB_PK_REQUIRED (HA_ERR_ROCKSDB_FIRST + 0)
262 #define HA_ERR_ROCKSDB_TABLE_DATA_DIRECTORY_NOT_SUPPORTED \
263   (HA_ERR_ROCKSDB_FIRST + 1)
264 #define HA_ERR_ROCKSDB_TABLE_INDEX_DIRECTORY_NOT_SUPPORTED \
265   (HA_ERR_ROCKSDB_FIRST + 2)
266 #define HA_ERR_ROCKSDB_COMMIT_FAILED (HA_ERR_ROCKSDB_FIRST + 3)
267 #define HA_ERR_ROCKSDB_BULK_LOAD (HA_ERR_ROCKSDB_FIRST + 4)
268 #define HA_ERR_ROCKSDB_CORRUPT_DATA (HA_ERR_ROCKSDB_FIRST + 5)
269 #define HA_ERR_ROCKSDB_CHECKSUM_MISMATCH (HA_ERR_ROCKSDB_FIRST + 6)
270 #define HA_ERR_ROCKSDB_INVALID_TABLE (HA_ERR_ROCKSDB_FIRST + 7)
271 #define HA_ERR_ROCKSDB_PROPERTIES (HA_ERR_ROCKSDB_FIRST + 8)
272 #define HA_ERR_ROCKSDB_MERGE_FILE_ERR (HA_ERR_ROCKSDB_FIRST + 9)
273 /*
274   Each error code below maps to a RocksDB status code found in:
275   rocksdb/include/rocksdb/status.h
276 */
277 #define HA_ERR_ROCKSDB_STATUS_NOT_FOUND (HA_ERR_ROCKSDB_FIRST + 10)
278 #define HA_ERR_ROCKSDB_STATUS_CORRUPTION (HA_ERR_ROCKSDB_FIRST + 11)
279 #define HA_ERR_ROCKSDB_STATUS_NOT_SUPPORTED (HA_ERR_ROCKSDB_FIRST + 12)
280 #define HA_ERR_ROCKSDB_STATUS_INVALID_ARGUMENT (HA_ERR_ROCKSDB_FIRST + 13)
281 #define HA_ERR_ROCKSDB_STATUS_IO_ERROR (HA_ERR_ROCKSDB_FIRST + 14)
282 #define HA_ERR_ROCKSDB_STATUS_NO_SPACE (HA_ERR_ROCKSDB_FIRST + 15)
283 #define HA_ERR_ROCKSDB_STATUS_MERGE_IN_PROGRESS (HA_ERR_ROCKSDB_FIRST + 16)
284 #define HA_ERR_ROCKSDB_STATUS_INCOMPLETE (HA_ERR_ROCKSDB_FIRST + 17)
285 #define HA_ERR_ROCKSDB_STATUS_SHUTDOWN_IN_PROGRESS (HA_ERR_ROCKSDB_FIRST + 18)
286 #define HA_ERR_ROCKSDB_STATUS_TIMED_OUT (HA_ERR_ROCKSDB_FIRST + 19)
287 #define HA_ERR_ROCKSDB_STATUS_ABORTED (HA_ERR_ROCKSDB_FIRST + 20)
288 #define HA_ERR_ROCKSDB_STATUS_LOCK_LIMIT (HA_ERR_ROCKSDB_FIRST + 21)
289 #define HA_ERR_ROCKSDB_STATUS_BUSY (HA_ERR_ROCKSDB_FIRST + 22)
290 #define HA_ERR_ROCKSDB_STATUS_DEADLOCK (HA_ERR_ROCKSDB_FIRST + 23)
291 #define HA_ERR_ROCKSDB_STATUS_EXPIRED (HA_ERR_ROCKSDB_FIRST + 24)
292 #define HA_ERR_ROCKSDB_STATUS_TRY_AGAIN (HA_ERR_ROCKSDB_FIRST + 25)
293 #define HA_ERR_ROCKSDB_LAST HA_ERR_ROCKSDB_STATUS_TRY_AGAIN
294 
295 const constexpr char rocksdb_hton_name[] = "ROCKSDB";
296 
297 typedef struct _gl_index_id_s {
298   uint32_t cf_id;
299   uint32_t index_id;
300   bool operator==(const struct _gl_index_id_s &other) const {
301     return cf_id == other.cf_id && index_id == other.index_id;
302   }
303   bool operator!=(const struct _gl_index_id_s &other) const {
304     return cf_id != other.cf_id || index_id != other.index_id;
305   }
306   bool operator<(const struct _gl_index_id_s &other) const {
307     return cf_id < other.cf_id ||
308            (cf_id == other.cf_id && index_id < other.index_id);
309   }
310   bool operator<=(const struct _gl_index_id_s &other) const {
311     return cf_id < other.cf_id ||
312            (cf_id == other.cf_id && index_id <= other.index_id);
313   }
314   bool operator>(const struct _gl_index_id_s &other) const {
315     return cf_id > other.cf_id ||
316            (cf_id == other.cf_id && index_id > other.index_id);
317   }
318   bool operator>=(const struct _gl_index_id_s &other) const {
319     return cf_id > other.cf_id ||
320            (cf_id == other.cf_id && index_id >= other.index_id);
321   }
322 } GL_INDEX_ID;
323 
324 enum operation_type : int {
325   ROWS_DELETED = 0,
326   ROWS_INSERTED,
327   ROWS_READ,
328   ROWS_UPDATED,
329   ROWS_DELETED_BLIND,
330   ROWS_EXPIRED,
331   ROWS_FILTERED,
332   ROWS_HIDDEN_NO_SNAPSHOT,
333   ROWS_MAX
334 };
335 
336 enum query_type : int { QUERIES_POINT = 0, QUERIES_RANGE, QUERIES_MAX };
337 
338 enum table_index_stats_result_type : int {
339   TABLE_INDEX_STATS_SUCCESS = 0,
340   TABLE_INDEX_STATS_FAILURE,
341   TABLE_INDEX_STATS_RESULT_MAX
342 };
343 
344 #if defined(HAVE_SCHED_GETCPU)
345 #define RDB_INDEXER get_sched_indexer_t
346 #else
347 #define RDB_INDEXER thread_id_indexer_t
348 #endif
349 
350 /* Global statistics struct used inside MyRocks */
351 struct st_global_stats {
352   ib_counter_t<ulonglong, 64, RDB_INDEXER> rows[ROWS_MAX];
353 
354   // system_rows_ stats are only for system
355   // tables. They are not counted in rows_* stats.
356   ib_counter_t<ulonglong, 64, RDB_INDEXER> system_rows[ROWS_MAX];
357 
358   ib_counter_t<ulonglong, 64, RDB_INDEXER> queries[QUERIES_MAX];
359 
360   ib_counter_t<ulonglong, 64, RDB_INDEXER>
361       table_index_stats_result[TABLE_INDEX_STATS_RESULT_MAX];
362 
363   ib_counter_t<ulonglong, 64, RDB_INDEXER> covered_secondary_key_lookups;
364 };
365 
366 /* Struct used for exporting status to MySQL */
367 struct st_export_stats {
368   ulonglong rows_deleted;
369   ulonglong rows_inserted;
370   ulonglong rows_read;
371   ulonglong rows_updated;
372   ulonglong rows_deleted_blind;
373   ulonglong rows_expired;
374   ulonglong rows_filtered;
375   ulonglong rows_hidden_no_snapshot;
376 
377   ulonglong system_rows_deleted;
378   ulonglong system_rows_inserted;
379   ulonglong system_rows_read;
380   ulonglong system_rows_updated;
381 
382   ulonglong queries_point;
383   ulonglong queries_range;
384 
385   ulonglong table_index_stats_success;
386   ulonglong table_index_stats_failure;
387   ulonglong table_index_stats_req_queue_length;
388 
389   ulonglong covered_secondary_key_lookups;
390 };
391 
392 /* Struct used for exporting RocksDB memory status */
393 struct st_memory_stats {
394   ulonglong memtable_total;
395   ulonglong memtable_unflushed;
396 };
397 
398 /* Struct used for exporting RocksDB IO stalls stats */
399 struct st_io_stall_stats {
400   ulonglong level0_slowdown;
401   ulonglong level0_slowdown_with_compaction;
402   ulonglong level0_numfiles;
403   ulonglong level0_numfiles_with_compaction;
404   ulonglong stop_for_pending_compaction_bytes;
405   ulonglong slowdown_for_pending_compaction_bytes;
406   ulonglong memtable_compaction;
407   ulonglong memtable_slowdown;
408   ulonglong total_stop;
409   ulonglong total_slowdown;
410 
st_io_stall_statsst_io_stall_stats411   st_io_stall_stats()
412       : level0_slowdown(0),
413         level0_slowdown_with_compaction(0),
414         level0_numfiles(0),
415         level0_numfiles_with_compaction(0),
416         stop_for_pending_compaction_bytes(0),
417         slowdown_for_pending_compaction_bytes(0),
418         memtable_compaction(0),
419         memtable_slowdown(0),
420         total_stop(0),
421         total_slowdown(0) {}
422 };
423 }  // namespace myrocks
424 
425 // We define ROCKSDB_NAMESPACE = my_rocksdb to avoid symbol conflicts
426 // But keep code with rocksdb for clarity
427 // Declare my_rocks namespace is needed to make namespace alias happy
428 #ifdef ROCKSDB_CUSTOM_NAMESPACE
429 namespace ROCKSDB_CUSTOM_NAMESPACE {};
430 namespace rocksdb = ROCKSDB_CUSTOM_NAMESPACE;
431 #endif
432