1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of PerconaFT.
6
7
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9
10 PerconaFT is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License, version 2,
12 as published by the Free Software Foundation.
13
14 PerconaFT is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
21
22 ----------------------------------------
23
24 PerconaFT is free software: you can redistribute it and/or modify
25 it under the terms of the GNU Affero General Public License, version 3,
26 as published by the Free Software Foundation.
27
28 PerconaFT is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU Affero General Public License for more details.
32
33 You should have received a copy of the GNU Affero General Public License
34 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
35 ======= */
36
37 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38
39 // Here are some timing numbers:
40 // (Note: The not-quite-working version with cas can be found in r22519 of https://svn.tokutek.com/tokudb/toku/tokudb.2825/) It's about as fast as "Best cas".)
41 //
42 // On ramie (2.53GHz E5540)
43 // Best nop time= 1.074300ns
44 // Best cas time= 8.595600ns
45 // Best mutex time= 19.340201ns
46 // Best rwlock time= 34.024799ns
47 // Best util rwlock time= 38.680500ns
48 // Best prelocked time= 2.148700ns
49 // Best fair rwlock time= 45.127600ns
50 // On laptop
51 // Best nop time= 2.876000ns
52 // Best cas time= 15.362500ns
53 // Best mutex time= 51.951498ns
54 // Best rwlock time= 97.721201ns
55 // Best util rwlock time=110.456800ns
56 // Best prelocked time= 4.240100ns
57 // Best fair rwlock time=113.119102ns
58 //
59 // Analysis: If the mutex can be prelocked (as cachetable does, it uses the same mutex for the cachetable and for the condition variable protecting the cache table)
60 // then you can save quite a bit. What does the cachetable do?
61 // During pin: (In the common case:) It grabs the mutex, grabs a read lock, and releases the mutex.
62 // During unpin: It grabs the mutex, unlocks the rwlock lock in the pair, and releases the mutex.
63 // Both actions must acquire a cachetable lock during that time, so definitely saves time to do it that way.
64
65 #include <stdlib.h>
66 #include <errno.h>
67 #include <string.h>
68 #include <sys/time.h>
69 #include <sys/types.h>
70
71 #include <toku_portability.h>
72 #include <toku_assert.h>
73 #include <portability/toku_atomic.h>
74 #include <portability/toku_pthread.h>
75 #include <portability/toku_time.h>
76 #include <util/frwlock.h>
77 #include <util/rwlock.h>
78 #include "rwlock_condvar.h"
79
80 static int verbose=1;
81 static int timing_only=0;
82
parse_args(int argc,const char * argv[])83 static void parse_args (int argc, const char *argv[]) {
84 const char *progname = argv[0];
85 argc--; argv++;
86 while (argc>0) {
87 if (strcmp(argv[0], "-v")==0) {
88 verbose++;
89 } else if (strcmp(argv[0], "-q")==0) {
90 verbose--;
91 } else if (strcmp(argv[0], "--timing-only")==0) {
92 timing_only=1;
93 } else {
94 fprintf(stderr, "Usage: %s {-q}* {-v}* {--timing-only}\n", progname);
95 exit(1);
96 }
97 argc--; argv++;
98 }
99 }
100
101 static const int T=6;
102 static const int N=10000000;
103
104 static double best_nop_time=1e12;
105 static double best_fcall_time=1e12;
106 static double best_cas_time=1e12;
107 static double best_mutex_time=1e12;
108 static double best_rwlock_time=1e12;
109 static double best_util_time=1e12;
110 static double best_prelocked_time=1e12;
111 static double best_frwlock_time=1e12;
112 static double best_frwlock_prelocked_time=1e12;
mind(double a,double b)113 static double mind(double a, double b) { if (a<b) return a; else return b; }
114
115 #if 0
116 // gcc 4.4.4 (fedora 12) doesn't introduce memory barriers on these writes, so I think that volatile is not enough for sequential consistency.
117 // Intel guarantees that writes are seen in the same order as they were performed on one processor. But if there were two processors, funny things could happen.
118 volatile int sc_a, sc_b;
119 void sequential_consistency (void) {
120 sc_a = 1;
121 sc_b = 0;
122 }
123 #endif
124
125 // Declaring val to be volatile produces essentially identical code as putting the asm volatile memory statements in.
126 // gcc is not introducing memory barriers to force sequential consistency on volatile memory writes.
127 // That's probably good enough for us, since we'll have a barrier instruction anywhere it matters.
128 volatile int val = 0;
129
130 static void time_nop (void) __attribute((__noinline__)); // don't want it inline, because it messes up timing.
time_nop(void)131 static void time_nop (void) {
132 struct timeval start,end;
133 for (int t=0; t<T; t++) {
134 gettimeofday(&start, NULL);
135 for (int i=0; i<N; i++) {
136 if (val!=0) abort();
137 val=1;
138 //__asm__ volatile ("" : : : "memory");
139 val=0;
140 //__asm__ volatile ("" : : : "memory");
141 }
142 gettimeofday(&end, NULL);
143 double diff = 1e9*toku_tdiff(&end, &start)/N;
144 if (verbose>1)
145 fprintf(stderr, "nop = %.6fns/(lock+unlock)\n", diff);
146 best_nop_time=mind(best_nop_time,diff);
147 }
148 }
149
150 // This function is defined so we can measure the cost of a function call.
151 int fcall_nop (int i) __attribute__((__noinline__));
fcall_nop(int i)152 int fcall_nop (int i) {
153 return i;
154 }
155
156 void time_fcall (void) __attribute((__noinline__));
time_fcall(void)157 void time_fcall (void) {
158 struct timeval start,end;
159 for (int t=0; t<T; t++) {
160 gettimeofday(&start, NULL);
161 for (int i=0; i<N; i++) {
162 fcall_nop(i);
163 }
164 gettimeofday(&end, NULL);
165 double diff = 1e9*toku_tdiff(&end, &start)/N;
166 if (verbose>1)
167 fprintf(stderr, "fcall = %.6fns/(lock+unlock)\n", diff);
168 best_fcall_time=mind(best_fcall_time,diff);
169 }
170 }
171
172 void time_cas (void) __attribute__((__noinline__));
time_cas(void)173 void time_cas (void) {
174 volatile int64_t tval = 0;
175 struct timeval start,end;
176 for (int t=0; t<T; t++) {
177 gettimeofday(&start, NULL);
178 for (int i=0; i<N; i++) {
179 { int r = toku_sync_val_compare_and_swap(&tval, 0, 1); assert(r==0); }
180 { int r = toku_sync_val_compare_and_swap(&tval, 1, 0); assert(r==1); }
181 }
182 gettimeofday(&end, NULL);
183 double diff = 1e9*toku_tdiff(&end, &start)/N;
184 if (verbose>1)
185 fprintf(stderr, "cas = %.6fns/(lock+unlock)\n", diff);
186 best_cas_time=mind(best_cas_time,diff);
187 }
188 }
189
190
191 void time_pthread_mutex (void) __attribute__((__noinline__));
time_pthread_mutex(void)192 void time_pthread_mutex (void) {
193 pthread_mutex_t mutex;
194 { int r = pthread_mutex_init(&mutex, NULL); assert(r==0); }
195 struct timeval start,end;
196 pthread_mutex_lock(&mutex);
197 pthread_mutex_unlock(&mutex);
198 for (int t=0; t<T; t++) {
199 gettimeofday(&start, NULL);
200 for (int i=0; i<N; i++) {
201 pthread_mutex_lock(&mutex);
202 pthread_mutex_unlock(&mutex);
203 }
204 gettimeofday(&end, NULL);
205 double diff = 1e9*toku_tdiff(&end, &start)/N;
206 if (verbose>1)
207 fprintf(stderr, "pthread_mutex = %.6fns/(lock+unlock)\n", diff);
208 best_mutex_time=mind(best_mutex_time,diff);
209 }
210 { int r = pthread_mutex_destroy(&mutex); assert(r==0); }
211 }
212
213 void time_pthread_rwlock (void) __attribute__((__noinline__));
time_pthread_rwlock(void)214 void time_pthread_rwlock (void) {
215 pthread_rwlock_t mutex;
216 { int r = pthread_rwlock_init(&mutex, NULL); assert(r==0); }
217 struct timeval start,end;
218 pthread_rwlock_rdlock(&mutex);
219 pthread_rwlock_unlock(&mutex);
220 for (int t=0; t<T; t++) {
221 gettimeofday(&start, NULL);
222 for (int i=0; i<N; i++) {
223 pthread_rwlock_rdlock(&mutex);
224 pthread_rwlock_unlock(&mutex);
225 }
226 gettimeofday(&end, NULL);
227 double diff = 1e9*toku_tdiff(&end, &start)/N;
228 if (verbose>1)
229 fprintf(stderr, "pthread_rwlock(r) = %.6fns/(lock+unlock)\n", diff);
230 best_rwlock_time=mind(best_rwlock_time,diff);
231 }
232 { int r = pthread_rwlock_destroy(&mutex); assert(r==0); }
233 }
234
util_rwlock_lock(RWLOCK rwlock,toku_mutex_t * mutex)235 static void util_rwlock_lock (RWLOCK rwlock, toku_mutex_t *mutex) {
236 toku_mutex_lock(mutex);
237 rwlock_read_lock(rwlock, mutex);
238 toku_mutex_unlock(mutex);
239 }
240
util_rwlock_unlock(RWLOCK rwlock,toku_mutex_t * mutex)241 static void util_rwlock_unlock (RWLOCK rwlock, toku_mutex_t *mutex) {
242 toku_mutex_lock(mutex);
243 rwlock_read_unlock(rwlock);
244 toku_mutex_unlock(mutex);
245 }
246
247 // Time the read lock that's in util/rwlock.h
248 void time_util_rwlock(void) __attribute((__noinline__));
time_util_rwlock(void)249 void time_util_rwlock(void) {
250 struct st_rwlock rwlock;
251 toku_mutex_t external_mutex;
252 toku_mutex_init(toku_uninstrumented, &external_mutex, nullptr);
253 rwlock_init(toku_uninstrumented, &rwlock);
254 struct timeval start, end;
255
256 util_rwlock_lock(&rwlock, &external_mutex);
257 util_rwlock_unlock(&rwlock, &external_mutex);
258 for (int t=0; t<T; t++) {
259 gettimeofday(&start, NULL);
260 for (int i=0; i<N; i++) {
261 util_rwlock_lock(&rwlock, &external_mutex);
262 util_rwlock_unlock(&rwlock, &external_mutex);
263 }
264 gettimeofday(&end, NULL);
265 double diff = 1e9*toku_tdiff(&end, &start)/N;
266 if (verbose>1)
267 fprintf(stderr, "util_rwlock(r) = %.6fns/(lock+unlock)\n", diff);
268 best_util_time=mind(best_util_time,diff);
269 }
270 rwlock_destroy(&rwlock);
271 toku_mutex_destroy(&external_mutex);
272 }
273
274 // Time the read lock that's in util/rwlock.h, assuming the mutex is already
275 // held.
276 void time_util_prelocked_rwlock(void) __attribute__((__noinline__));
time_util_prelocked_rwlock(void)277 void time_util_prelocked_rwlock(void) {
278 struct st_rwlock rwlock;
279 toku_mutex_t external_mutex;
280 toku_mutex_init(toku_uninstrumented, &external_mutex, nullptr);
281 toku_mutex_lock(&external_mutex);
282 rwlock_init(toku_uninstrumented, &rwlock);
283 struct timeval start, end;
284
285 rwlock_read_lock(&rwlock, &external_mutex);
286 rwlock_read_unlock(&rwlock);
287 for (int t=0; t<T; t++) {
288 gettimeofday(&start, NULL);
289 for (int i=0; i<N; i++) {
290 rwlock_read_lock(&rwlock, &external_mutex);
291 rwlock_read_unlock(&rwlock);
292 }
293 gettimeofday(&end, NULL);
294 double diff = 1e9*toku_tdiff(&end, &start)/N;
295 if (verbose>1)
296 fprintf(stderr, "pre_util_rwlock(r) = %.6fns/(lock+unlock)\n", diff);
297 best_prelocked_time=mind(best_prelocked_time,diff);
298 }
299 rwlock_destroy(&rwlock);
300 toku_mutex_unlock(&external_mutex);
301 toku_mutex_destroy(&external_mutex);
302 }
303
304 void time_frwlock_prelocked(void) __attribute__((__noinline__));
time_frwlock_prelocked(void)305 void time_frwlock_prelocked(void) {
306 toku_mutex_t external_mutex;
307 toku_mutex_init(toku_uninstrumented, &external_mutex, nullptr);
308 struct timeval start, end;
309 toku::frwlock x;
310 x.init(&external_mutex);
311 toku_mutex_lock(&external_mutex);
312 bool got_lock;
313 x.read_lock();
314 x.read_unlock();
315
316 got_lock = x.try_read_lock();
317 invariant(got_lock);
318 x.read_unlock();
319 x.write_lock(true);
320 x.write_unlock();
321 got_lock = x.try_write_lock(true);
322 invariant(got_lock);
323 x.write_unlock();
324 for (int t=0; t<T; t++) {
325 gettimeofday(&start, NULL);
326 for (int i=0; i<N; i++) {
327 x.read_lock();
328 x.read_unlock();
329 }
330 gettimeofday(&end, NULL);
331 double diff = 1e9*toku_tdiff(&end, &start)/N;
332 if (verbose>1)
333 fprintf(stderr, "frwlock_prelocked = %.6fns/(lock+unlock)\n", diff);
334 best_frwlock_prelocked_time=mind(best_frwlock_prelocked_time,diff);
335 }
336 x.deinit();
337 toku_mutex_unlock(&external_mutex);
338 toku_mutex_destroy(&external_mutex);
339 }
340
341 void time_frwlock(void) __attribute__((__noinline__));
time_frwlock(void)342 void time_frwlock(void) {
343 toku_mutex_t external_mutex;
344 toku_mutex_init(toku_uninstrumented, &external_mutex, nullptr);
345 struct timeval start, end;
346 toku::frwlock x;
347 x.init(&external_mutex);
348 toku_mutex_lock(&external_mutex);
349 x.read_lock();
350 x.read_unlock();
351 toku_mutex_unlock(&external_mutex);
352 for (int t=0; t<T; t++) {
353 gettimeofday(&start, NULL);
354 for (int i=0; i<N; i++) {
355 toku_mutex_lock(&external_mutex);
356 x.read_lock();
357 toku_mutex_unlock(&external_mutex);
358
359 toku_mutex_lock(&external_mutex);
360 x.read_unlock();
361 toku_mutex_unlock(&external_mutex);
362 }
363 gettimeofday(&end, NULL);
364 double diff = 1e9*toku_tdiff(&end, &start)/N;
365 if (verbose>1)
366 fprintf(stderr, "frwlock = %.6fns/(lock+unlock)\n", diff);
367 best_frwlock_time=mind(best_frwlock_time,diff);
368 }
369 x.deinit();
370 toku_mutex_destroy(&external_mutex);
371 }
372
main(int argc,const char * argv[])373 int main (int argc, const char *argv[]) {
374 parse_args(argc, argv);
375 if (timing_only) {
376 if (1) { // to make it easy to only time the templated frwlock
377 time_nop();
378 time_fcall();
379 time_cas();
380 time_pthread_mutex();
381 time_pthread_rwlock();
382 time_util_rwlock();
383 time_util_prelocked_rwlock();
384 }
385 time_frwlock();
386 time_frwlock_prelocked();
387 if (verbose>0) {
388 if (1) { // to make it easy to only time the templated frwlock
389 printf("// Best nop time=%10.6fns\n", best_nop_time);
390 printf("// Best fcall time=%10.6fns\n", best_fcall_time);
391 printf("// Best cas time=%10.6fns\n", best_cas_time);
392 printf("// Best mutex time=%10.6fns\n", best_mutex_time);
393 printf("// Best rwlock time=%10.6fns\n", best_rwlock_time);
394 printf("// Best util rwlock time=%10.6fns\n", best_util_time);
395 printf("// Best prelocked time=%10.6fns\n", best_prelocked_time);
396 }
397 printf("// Best frwlock time=%10.6fns\n", best_frwlock_time);
398 printf("// Best frwlock_pre time=%10.6fns\n", best_frwlock_prelocked_time);
399 }
400 }
401 return 0;
402 }
403
404