1 /*
2  * Knobs.h
3  *
4  * This source file is part of the FoundationDB open source project
5  *
6  * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *     http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 #ifndef FDBSERVER_KNOBS_H
22 #define FDBSERVER_KNOBS_H
23 #pragma once
24 
25 #include "flow/Knobs.h"
26 #include "fdbrpc/fdbrpc.h"
27 #include "fdbclient/Knobs.h"
28 
29 // Disk queue
30 static const int _PAGE_SIZE = 4096;
31 
32 class ServerKnobs : public Knobs {
33 public:
34 	// Versions
35 	int64_t VERSIONS_PER_SECOND;
36 	int64_t MAX_VERSIONS_IN_FLIGHT;
37 	int64_t MAX_VERSIONS_IN_FLIGHT_FORCED;
38 	int64_t MAX_READ_TRANSACTION_LIFE_VERSIONS;
39 	int64_t MAX_WRITE_TRANSACTION_LIFE_VERSIONS;
40 	double MAX_COMMIT_BATCH_INTERVAL; // Each master proxy generates a CommitTransactionBatchRequest at least this often, so that versions always advance smoothly
41 
42 	// TLogs
43 	double TLOG_TIMEOUT;  // tlog OR master proxy failure - master's reaction time
44 	double RECOVERY_TLOG_SMART_QUORUM_DELAY;		// smaller might be better for bug amplification
45 	double TLOG_STORAGE_MIN_UPDATE_INTERVAL;
46 	double BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL;
47 	double UNFLUSHED_DATA_RATIO;
48 	int DESIRED_TOTAL_BYTES;
49 	int DESIRED_UPDATE_BYTES;
50 	double UPDATE_DELAY;
51 	int MAXIMUM_PEEK_BYTES;
52 	int APPLY_MUTATION_BYTES;
53 	int RECOVERY_DATA_BYTE_LIMIT;
54 	int BUGGIFY_RECOVERY_DATA_LIMIT;
55 	double LONG_TLOG_COMMIT_TIME;
56 	int64_t LARGE_TLOG_COMMIT_BYTES;
57 	double BUGGIFY_RECOVER_MEMORY_LIMIT;
58 	double BUGGIFY_WORKER_REMOVED_MAX_LAG;
59 	int64_t UPDATE_STORAGE_BYTE_LIMIT;
60 	int64_t REFERENCE_SPILL_UPDATE_STORAGE_BYTE_LIMIT;
61 	double TLOG_PEEK_DELAY;
62 	int LEGACY_TLOG_UPGRADE_ENTRIES_PER_VERSION;
63 	int VERSION_MESSAGES_OVERHEAD_FACTOR_1024THS; // Multiplicative factor to bound total space used to store a version message (measured in 1/1024ths, e.g. a value of 2048 yields a factor of 2).
64 	int64_t VERSION_MESSAGES_ENTRY_BYTES_WITH_OVERHEAD;
65 	double TLOG_MESSAGE_BLOCK_OVERHEAD_FACTOR;
66 	int64_t TLOG_MESSAGE_BLOCK_BYTES;
67 	int64_t MAX_MESSAGE_SIZE;
68 	int LOG_SYSTEM_PUSHED_DATA_BLOCK_SIZE;
69 	double PEEK_TRACKER_EXPIRATION_TIME;
70 	int PARALLEL_GET_MORE_REQUESTS;
71 	int MULTI_CURSOR_PRE_FETCH_LIMIT;
72 	int64_t MAX_QUEUE_COMMIT_BYTES;
73 	int64_t VERSIONS_PER_BATCH;
74 	int CONCURRENT_LOG_ROUTER_READS;
75 	double DISK_QUEUE_ADAPTER_MIN_SWITCH_TIME;
76 	double DISK_QUEUE_ADAPTER_MAX_SWITCH_TIME;
77 	int64_t TLOG_SPILL_REFERENCE_MAX_PEEK_MEMORY_BYTES;
78 	int64_t DISK_QUEUE_FILE_EXTENSION_BYTES; // When we grow the disk queue, by how many bytes should it grow?
79 	int64_t DISK_QUEUE_FILE_SHRINK_BYTES; // When we shrink the disk queue, by how many bytes should it shrink?
80 	int TLOG_DEGRADED_DELAY_COUNT;
81 	double TLOG_DEGRADED_DURATION;
82 
83 	// Data distribution queue
84 	double HEALTH_POLL_TIME;
85 	double BEST_TEAM_STUCK_DELAY;
86 	double BG_DD_POLLING_INTERVAL;
87 	double DD_QUEUE_LOGGING_INTERVAL;
88 	double RELOCATION_PARALLELISM_PER_SOURCE_SERVER;
89 	int DD_QUEUE_MAX_KEY_SERVERS;
90 	int DD_REBALANCE_PARALLELISM;
91 	int DD_REBALANCE_RESET_AMOUNT;
92 	double BG_DD_MAX_WAIT;
93 	double BG_DD_MIN_WAIT;
94 	double BG_DD_INCREASE_RATE;
95 	double BG_DD_DECREASE_RATE;
96 	double BG_DD_SATURATION_DELAY;
97 	double INFLIGHT_PENALTY_HEALTHY;
98 	double INFLIGHT_PENALTY_REDUNDANT;
99 	double INFLIGHT_PENALTY_UNHEALTHY;
100 	double INFLIGHT_PENALTY_ONE_LEFT;
101 
102 	// Data distribution
103 	double RETRY_RELOCATESHARD_DELAY;
104 	double DATA_DISTRIBUTION_FAILURE_REACTION_TIME;
105 	int MIN_SHARD_BYTES, SHARD_BYTES_RATIO, SHARD_BYTES_PER_SQRT_BYTES, MAX_SHARD_BYTES, KEY_SERVER_SHARD_BYTES;
106 	int64_t SHARD_MAX_BYTES_PER_KSEC, // Shards with more than this bandwidth will be split immediately
107 		SHARD_MIN_BYTES_PER_KSEC,     // Shards with more than this bandwidth will not be merged
108 		SHARD_SPLIT_BYTES_PER_KSEC;   // When splitting a shard, it is split into pieces with less than this bandwidth
109 	double STORAGE_METRIC_TIMEOUT;
110 	double METRIC_DELAY;
111 	double ALL_DATA_REMOVED_DELAY;
112 	double INITIAL_FAILURE_REACTION_DELAY;
113 	double CHECK_TEAM_DELAY;
114 	double LOG_ON_COMPLETION_DELAY;
115 	int BEST_TEAM_MAX_TEAM_TRIES;
116 	int BEST_TEAM_OPTION_COUNT;
117 	int BEST_OF_AMT;
118 	double SERVER_LIST_DELAY;
119 	double RECRUITMENT_IDLE_DELAY;
120 	double STORAGE_RECRUITMENT_DELAY;
121 	double DATA_DISTRIBUTION_LOGGING_INTERVAL;
122 	double DD_ENABLED_CHECK_DELAY;
123 	double DD_STALL_CHECK_DELAY;
124 	double DD_MERGE_COALESCE_DELAY;
125 	double STORAGE_METRICS_POLLING_DELAY;
126 	double STORAGE_METRICS_RANDOM_DELAY;
127 	double FREE_SPACE_RATIO_CUTOFF;
128 	double FREE_SPACE_RATIO_DD_CUTOFF;
129 	int DESIRED_TEAMS_PER_SERVER;
130 	int MAX_TEAMS_PER_SERVER;
131 	int64_t DD_SHARD_SIZE_GRANULARITY;
132 	int64_t DD_SHARD_SIZE_GRANULARITY_SIM;
133 	int DD_MOVE_KEYS_PARALLELISM;
134 	int DD_MERGE_LIMIT;
135 	double DD_SHARD_METRICS_TIMEOUT;
136 	int64_t DD_LOCATION_CACHE_SIZE;
137 	double MOVEKEYS_LOCK_POLLING_DELAY;
138 	double DEBOUNCE_RECRUITING_DELAY;
139 
140 	// TeamRemover to remove redundant teams
141 	bool TR_FLAG_DISABLE_TEAM_REMOVER;   // disable the teamRemover actor
142 	double TR_REMOVE_MACHINE_TEAM_DELAY; // wait for the specified time before try to remove next machine team
143 
144 	double DD_FAILURE_TIME;
145 	double DD_ZERO_HEALTHY_TEAM_DELAY;
146 
147 	// Redwood Storage Engine
148 	int PREFIX_TREE_IMMEDIATE_KEY_SIZE_LIMIT;
149 	int PREFIX_TREE_IMMEDIATE_KEY_SIZE_MIN;
150 
151 	// KeyValueStore SQLITE
152 	int CLEAR_BUFFER_SIZE;
153 	double READ_VALUE_TIME_ESTIMATE;
154 	double READ_RANGE_TIME_ESTIMATE;
155 	double SET_TIME_ESTIMATE;
156 	double CLEAR_TIME_ESTIMATE;
157 	double COMMIT_TIME_ESTIMATE;
158 	int CHECK_FREE_PAGE_AMOUNT;
159 	double DISK_METRIC_LOGGING_INTERVAL;
160 	int64_t SOFT_HEAP_LIMIT;
161 
162 	int SQLITE_PAGE_SCAN_ERROR_LIMIT;
163 	int SQLITE_BTREE_PAGE_USABLE;
164 	int SQLITE_BTREE_CELL_MAX_LOCAL;
165 	int SQLITE_BTREE_CELL_MIN_LOCAL;
166 	int SQLITE_FRAGMENT_PRIMARY_PAGE_USABLE;
167 	int SQLITE_FRAGMENT_OVERFLOW_PAGE_USABLE;
168 	double SQLITE_FRAGMENT_MIN_SAVINGS;
169 
170 	// KeyValueStoreSqlite spring cleaning
171 	double CLEANING_INTERVAL;
172 	double SPRING_CLEANING_TIME_ESTIMATE;
173 	double SPRING_CLEANING_VACUUMS_PER_LAZY_DELETE_PAGE;
174 	int SPRING_CLEANING_MIN_LAZY_DELETE_PAGES;
175 	int SPRING_CLEANING_MAX_LAZY_DELETE_PAGES;
176 	int SPRING_CLEANING_LAZY_DELETE_BATCH_SIZE;
177 	int SPRING_CLEANING_MIN_VACUUM_PAGES;
178 	int SPRING_CLEANING_MAX_VACUUM_PAGES;
179 
180 	// KeyValueStoreMemory
181 	int64_t REPLACE_CONTENTS_BYTES;
182 
183 	// Leader election
184 	int MAX_NOTIFICATIONS;
185 	int MIN_NOTIFICATIONS;
186 	double NOTIFICATION_FULL_CLEAR_TIME;
187 	double CANDIDATE_MIN_DELAY;
188 	double CANDIDATE_MAX_DELAY;
189 	double CANDIDATE_GROWTH_RATE;
190 	double POLLING_FREQUENCY;
191 	double HEARTBEAT_FREQUENCY;
192 
193 	// Master Proxy
194 	double START_TRANSACTION_BATCH_INTERVAL_MIN;
195 	double START_TRANSACTION_BATCH_INTERVAL_MAX;
196 	double START_TRANSACTION_BATCH_INTERVAL_LATENCY_FRACTION;
197 	double START_TRANSACTION_BATCH_INTERVAL_SMOOTHER_ALPHA;
198 	double START_TRANSACTION_BATCH_QUEUE_CHECK_INTERVAL;
199 	double START_TRANSACTION_MAX_TRANSACTIONS_TO_START;
200 	int START_TRANSACTION_MAX_REQUESTS_TO_START;
201 
202 	double COMMIT_TRANSACTION_BATCH_INTERVAL_FROM_IDLE;
203 	double COMMIT_TRANSACTION_BATCH_INTERVAL_MIN;
204 	double COMMIT_TRANSACTION_BATCH_INTERVAL_MAX;
205 	double COMMIT_TRANSACTION_BATCH_INTERVAL_LATENCY_FRACTION;
206 	double COMMIT_TRANSACTION_BATCH_INTERVAL_SMOOTHER_ALPHA;
207 	int    COMMIT_TRANSACTION_BATCH_COUNT_MAX;
208 	int    COMMIT_TRANSACTION_BATCH_BYTES_MIN;
209 	int    COMMIT_TRANSACTION_BATCH_BYTES_MAX;
210 	double COMMIT_TRANSACTION_BATCH_BYTES_SCALE_BASE;
211 	double COMMIT_TRANSACTION_BATCH_BYTES_SCALE_POWER;
212 	int64_t COMMIT_BATCHES_MEM_BYTES_HARD_LIMIT;
213 	double COMMIT_BATCHES_MEM_FRACTION_OF_TOTAL;
214 	double COMMIT_BATCHES_MEM_TO_TOTAL_MEM_SCALE_FACTOR;
215 
216 	double TRANSACTION_BUDGET_TIME;
217 	double RESOLVER_COALESCE_TIME;
218 	int BUGGIFIED_ROW_LIMIT;
219 	double PROXY_SPIN_DELAY;
220 	double UPDATE_REMOTE_LOG_VERSION_INTERVAL;
221 	int MAX_TXS_POP_VERSION_HISTORY;
222 
223 	// Master Server
224 	double COMMIT_SLEEP_TIME;
225 	double MIN_BALANCE_TIME;
226 	int64_t MIN_BALANCE_DIFFERENCE;
227 	double SECONDS_BEFORE_NO_FAILURE_DELAY;
228 	int64_t MAX_TXS_SEND_MEMORY;
229 	int64_t MAX_RECOVERY_VERSIONS;
230 	double MAX_RECOVERY_TIME;
231 	double PROVISIONAL_START_DELAY;
232 	double PROVISIONAL_DELAY_GROWTH;
233 	double PROVISIONAL_MAX_DELAY;
234 
235 	// Resolver
236 	int64_t KEY_BYTES_PER_SAMPLE;
237 	int64_t SAMPLE_OFFSET_PER_KEY;
238 	double SAMPLE_EXPIRATION_TIME;
239 	double SAMPLE_POLL_TIME;
240 	int64_t RESOLVER_STATE_MEMORY_LIMIT;
241 
242 	//Cluster Controller
243 	double CLUSTER_CONTROLLER_LOGGING_DELAY;
244 	double MASTER_FAILURE_REACTION_TIME;
245 	double MASTER_FAILURE_SLOPE_DURING_RECOVERY;
246 	int WORKER_COORDINATION_PING_DELAY;
247 	double SIM_SHUTDOWN_TIMEOUT;
248 	double SHUTDOWN_TIMEOUT;
249 	double MASTER_SPIN_DELAY;
250 	double CC_CHANGE_DELAY;
251 	double CC_CLASS_DELAY;
252 	double WAIT_FOR_GOOD_RECRUITMENT_DELAY;
253 	double WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY;
254 	double ATTEMPT_RECRUITMENT_DELAY;
255 	double WAIT_FOR_DISTRIBUTOR_JOIN_DELAY;
256 	double WAIT_FOR_RATEKEEPER_JOIN_DELAY;
257 	double WORKER_FAILURE_TIME;
258 	double CHECK_OUTSTANDING_INTERVAL;
259 	double INCOMPATIBLE_PEERS_LOGGING_INTERVAL;
260 	double VERSION_LAG_METRIC_INTERVAL;
261 	int64_t MAX_VERSION_DIFFERENCE;
262 	double FORCE_RECOVERY_CHECK_DELAY;
263 	double RATEKEEPER_FAILURE_TIME;
264 
265 	// Knobs used to select the best policy (via monte carlo)
266 	int POLICY_RATING_TESTS;	// number of tests per policy (in order to compare)
267 	int POLICY_GENERATIONS;		// number of policies to generate
268 
269 	int EXPECTED_MASTER_FITNESS;
270 	int EXPECTED_TLOG_FITNESS;
271 	int EXPECTED_LOG_ROUTER_FITNESS;
272 	int EXPECTED_PROXY_FITNESS;
273 	int EXPECTED_RESOLVER_FITNESS;
274 	double RECRUITMENT_TIMEOUT;
275 
276 	//Move Keys
277 	double SHARD_READY_DELAY;
278 	double SERVER_READY_QUORUM_INTERVAL;
279 	double SERVER_READY_QUORUM_TIMEOUT;
280 	double REMOVE_RETRY_DELAY;
281 	int MOVE_KEYS_KRM_LIMIT;
282 	int MOVE_KEYS_KRM_LIMIT_BYTES; //This must be sufficiently larger than CLIENT_KNOBS->KEY_SIZE_LIMIT (fdbclient/Knobs.h) to ensure that at least two entries will be returned from an attempt to read a key range map
283 	int MAX_SKIP_TAGS;
284 	double MAX_ADDED_SOURCES_MULTIPLIER;
285 
286 	//FdbServer
287 	double MIN_REBOOT_TIME;
288 	double MAX_REBOOT_TIME;
289 	std::string LOG_DIRECTORY;
290 	int64_t SERVER_MEM_LIMIT;
291 
292 	//Ratekeeper
293 	double SMOOTHING_AMOUNT;
294 	double SLOW_SMOOTHING_AMOUNT;
295 	double METRIC_UPDATE_RATE;
296 	double DETAILED_METRIC_UPDATE_RATE;
297 	double LAST_LIMITED_RATIO;
298 
299 	int64_t TARGET_BYTES_PER_STORAGE_SERVER;
300 	int64_t SPRING_BYTES_STORAGE_SERVER;
301 	int64_t TARGET_BYTES_PER_STORAGE_SERVER_BATCH;
302 	int64_t SPRING_BYTES_STORAGE_SERVER_BATCH;
303 
304 	int64_t TARGET_BYTES_PER_TLOG;
305 	int64_t SPRING_BYTES_TLOG;
306 	int64_t TARGET_BYTES_PER_TLOG_BATCH;
307 	int64_t SPRING_BYTES_TLOG_BATCH;
308 	int64_t TLOG_SPILL_THRESHOLD;
309 	int64_t TLOG_HARD_LIMIT_BYTES;
310 	int64_t TLOG_RECOVER_MEMORY_LIMIT;
311 
312 	double MAX_TRANSACTIONS_PER_BYTE;
313 
314 	int64_t MIN_FREE_SPACE;
315 	double MIN_FREE_SPACE_RATIO;
316 
317 	double MAX_TL_SS_VERSION_DIFFERENCE; // spring starts at half this value
318 	double MAX_TL_SS_VERSION_DIFFERENCE_BATCH;
319 	int MAX_MACHINES_FALLING_BEHIND;
320 
321 	//Storage Metrics
322 	double STORAGE_METRICS_AVERAGE_INTERVAL;
323 	double STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
324 	double SPLIT_JITTER_AMOUNT;
325 	int64_t IOPS_UNITS_PER_SAMPLE;
326 	int64_t BANDWIDTH_UNITS_PER_SAMPLE;
327 
328 	//Storage Server
329 	double STORAGE_LOGGING_DELAY;
330 	double STORAGE_SERVER_POLL_METRICS_DELAY;
331 	double FUTURE_VERSION_DELAY;
332 	int STORAGE_LIMIT_BYTES;
333 	int BUGGIFY_LIMIT_BYTES;
334 	int FETCH_BLOCK_BYTES;
335 	int FETCH_KEYS_PARALLELISM_BYTES;
336 	int BUGGIFY_BLOCK_BYTES;
337 	int64_t STORAGE_HARD_LIMIT_BYTES;
338 	int STORAGE_COMMIT_BYTES;
339 	double STORAGE_COMMIT_INTERVAL;
340 	double UPDATE_SHARD_VERSION_INTERVAL;
341 	int BYTE_SAMPLING_FACTOR;
342 	int BYTE_SAMPLING_OVERHEAD;
343 	int MAX_STORAGE_SERVER_WATCH_BYTES;
344 	int MAX_BYTE_SAMPLE_CLEAR_MAP_SIZE;
345 	double LONG_BYTE_SAMPLE_RECOVERY_DELAY;
346 	int BYTE_SAMPLE_LOAD_PARALLELISM;
347 	double BYTE_SAMPLE_LOAD_DELAY;
348 	double UPDATE_STORAGE_PROCESS_STATS_INTERVAL;
349 
350 	//Wait Failure
351 	int BUGGIFY_OUTSTANDING_WAIT_FAILURE_REQUESTS;
352 	int MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS;
353 	double WAIT_FAILURE_DELAY_LIMIT;
354 
355 	//Worker
356 	double WORKER_LOGGING_INTERVAL;
357 	double INCOMPATIBLE_PEER_DELAY_BEFORE_LOGGING;
358 	double DEGRADED_RESET_INTERVAL;
359 	double DEGRADED_WARNING_LIMIT;
360 	double DEGRADED_WARNING_RESET_DELAY;
361 
362 	// Test harness
363 	double WORKER_POLL_DELAY;
364 
365 	// Coordination
366 	double COORDINATED_STATE_ONCONFLICT_POLL_INTERVAL;
367 
368 	// Buggification
369 	double BUGGIFIED_EVENTUAL_CONSISTENCY;
370 	bool BUGGIFY_ALL_COORDINATION;
371 
372 	// Status
373 	double STATUS_MIN_TIME_BETWEEN_REQUESTS;
374 	double MAX_STATUS_REQUESTS_PER_SECOND;
375 	int CONFIGURATION_ROWS_TO_FETCH;
376 
377 	// IPager
378 	int PAGER_RESERVED_PAGES;
379 
380 	// IndirectShadowPager
381 	int FREE_PAGE_VACUUM_THRESHOLD;
382 	int VACUUM_QUEUE_SIZE;
383 	int VACUUM_BYTES_PER_SECOND;
384 
385 	// Timekeeper
386 	int64_t TIME_KEEPER_DELAY;
387 	int64_t TIME_KEEPER_MAX_ENTRIES;
388 
389 
390 	ServerKnobs(bool randomize = false, ClientKnobs* clientKnobs = NULL);
391 };
392 
393 extern ServerKnobs const* SERVER_KNOBS;
394 
395 #endif
396