1 /* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software Foundation,
14 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
15
16 /**
17 @file storage/perfschema/pfs.cc
18 The performance schema implementation of all instruments.
19 */
20
21 #include "my_global.h"
22 #include "pfs.h"
23 #include "pfs_instr_class.h"
24 #include "pfs_instr.h"
25 #include "pfs_global.h"
26 #include "pfs_column_values.h"
27 #include "pfs_timer.h"
28 #include "pfs_events_waits.h"
29
30 /* Pending WL#4895 PERFORMANCE_SCHEMA Instrumenting Table IO */
31 #undef HAVE_TABLE_WAIT
32
33 /**
34 @page PAGE_PERFORMANCE_SCHEMA The Performance Schema main page
35 MySQL PERFORMANCE_SCHEMA implementation.
36
37 @section INTRO Introduction
38 The PERFORMANCE_SCHEMA is a way to introspect the internal execution of
39 the server at runtime.
40 The performance schema focuses primarily on performance data,
41 as opposed to the INFORMATION_SCHEMA whose purpose is to inspect metadata.
42
43 From a user point of view, the performance schema consists of:
44 - a dedicated database schema, named PERFORMANCE_SCHEMA,
45 - SQL tables, used to query the server internal state or change
46 configuration settings.
47
48 From an implementation point of view, the performance schema is a dedicated
49 Storage Engine which exposes data collected by 'Instrumentation Points'
50 placed in the server code.
51
52 @section INTERFACES Multiple interfaces
53
54 The performance schema exposes many different interfaces,
55 for different components, and for different purposes.
56
57 @subsection INT_INSTRUMENTING Instrumenting interface
58
59 All the data representing the server internal state exposed
60 in the performance schema must be first collected:
61 this is the role of the instrumenting interface.
62 The instrumenting interface is a coding interface provided
63 by implementors (of the performance schema) to implementors
64 (of the server or server components).
65
66 This interface is available to:
67 - C implementations
68 - C++ implementations
69 - the core SQL layer (/sql)
70 - the mysys library (/mysys)
71 - MySQL plugins, including storage engines,
72 - third party plugins, including third party storage engines.
73
74 For details, see the @ref PAGE_INSTRUMENTATION_INTERFACE
75 "instrumentation interface page".
76
77 @subsection INT_COMPILING Compiling interface
78
79 The implementation of the performance schema can be enabled or disabled at
80 build time, when building MySQL from the source code.
81
82 When building with the performance schema code, some compilation flags
83 are available to change the default values used in the code, if required.
84
85 For more details, see:
86 @verbatim ./configure --help @endverbatim
87
88 To compile with the performance schema:
89 @verbatim ./configure --with-perfschema @endverbatim
90
91 The implementation of all the compiling options is located in
92 @verbatim ./storage/perfschema/plug.in @endverbatim
93
94 @subsection INT_STARTUP Server startup interface
95
96 The server startup interface consists of the "./mysqld ..."
97 command line used to start the server.
98 When the performance schema is compiled in the server binary,
99 extra command line options are available.
100
101 These extra start options allow the DBA to:
102 - enable or disable the performance schema
103 - specify some sizing parameters.
104
105 To see help for the performance schema startup options, see:
106 @verbatim ./sql/mysqld --verbose --help @endverbatim
107
108 The implementation of all the startup options is located in
109 @verbatim ./sql/mysqld.cc, my_long_options[] @endverbatim
110
111 @subsection INT_BOOTSTRAP Server bootstrap interface
112
113 The bootstrap interface is a private interface exposed by
114 the performance schema, and used by the SQL layer.
115 Its role is to advertise all the SQL tables natively
116 supported by the performance schema to the SQL server.
117 The code consists of creating MySQL tables for the
118 performance schema itself, and is used in './mysql --bootstrap'
119 mode when a server is installed.
120
121 The implementation of the database creation script is located in
122 @verbatim ./scripts/mysql_system_tables.sql @endverbatim
123
124 @subsection INT_CONFIG Runtime configuration interface
125
126 When the performance schema is used at runtime, various configuration
127 parameters can be used to specify what kind of data is collected,
128 what kind of aggregations are computed, what kind of timers are used,
129 what events are timed, etc.
130
131 For all these capabilities, not a single statement or special syntax
132 was introduced in the parser.
133 Instead of new SQL statements, the interface consists of DML
134 (SELECT, INSERT, UPDATE, DELETE) against special "SETUP" tables.
135
136 For example:
137 @verbatim mysql> update performance_schema.SETUP_INSTRUMENTS
138 set ENABLED='YES', TIMED='YES';
139 Query OK, 234 rows affected (0.00 sec)
140 Rows matched: 234 Changed: 234 Warnings: 0 @endverbatim
141
142 @subsection INT_STATUS Internal audit interface
143
144 The internal audit interface is provided to the DBA to inspect if the
145 performance schema code itself is functioning properly.
146 This interface is necessary because a failure caused while
147 instrumenting code in the server should not cause failures in the
148 MySQL server itself, so that the performance schema implementation
149 never raises errors during runtime execution.
150
151 This auditing interface consists of:
152 @verbatim SHOW ENGINE PERFORMANCE_SCHEMA STATUS; @endverbatim
153 It displays data related to the memory usage of the performance schema,
154 as well as statistics about lost events, if any.
155
156 The SHOW STATUS command is implemented in
157 @verbatim ./storage/perfschema/pfs_engine_table.cc @endverbatim
158
159 @subsection INT_QUERY Query interface
160
161 The query interface is used to query the internal state of a running server.
162 It is provided as SQL tables.
163
164 For example:
165 @verbatim mysql> select * from performance_schema.EVENTS_WAITS_CURRENT;
166 @endverbatim
167
168 @section DESIGN_PRINCIPLES Design principles
169
170 @subsection PRINCIPLE_BEHAVIOR No behavior changes
171
172 The primary goal of the performance schema is to measure (instrument) the
173 execution of the server. A good measure should not cause any change
174 in behavior.
175
176 To achieve this, the overall design of the performance schema complies
177 with the following very severe design constraints:
178
179 The parser is unchanged. There are no new keywords, no new statements.
180 This guarantees that existing applications will run the same way with or
181 without the performance schema.
182
183 All the instrumentation points return "void", there are no error codes.
184 Even if the performance schema internally fails, execution of the server
185 code will proceed.
186
187 None of the instrumentation points allocate memory.
188 All the memory used by the performance schema is pre-allocated at startup,
189 and is considered "static" during the server life time.
190
191 None of the instrumentation points use any pthread_mutex, pthread_rwlock,
192 or pthread_cond (or platform equivalents).
193 Executing the instrumentation point should not cause thread scheduling to
194 change in the server.
195
196 In other words, the implementation of the instrumentation points,
197 including all the code called by the instrumentation points, is:
198 - malloc free
199 - mutex free
200 - rwlock free
201
202 TODO: All the code located in storage/perfschema is malloc free,
203 but unfortunately the usage of LF_HASH introduces some memory allocation.
204 This should be revised if possible, to use a lock-free,
205 malloc-free hash code table.
206
207 @subsection PRINCIPLE_PERFORMANCE No performance hit
208
209 The instrumentation of the server should be as fast as possible.
210 In cases when there are choices between:
211 - doing some processing when recording the performance data
212 in the instrumentation,
213 - doing some processing when retrieving the performance data,
214
215 priority is given in the design to make the instrumentation faster,
216 pushing some complexity to data retrieval.
217
218 As a result, some parts of the design, related to:
219 - the setup code path,
220 - the query code path,
221
222 might appear to be sub-optimal.
223
224 The criterion used here is to optimize primarily the critical path (data
225 collection), possibly at the expense of non-critical code paths.
226
227 @subsection PRINCIPLE_NOT_INTRUSIVE Unintrusive instrumentation
228
229 For the performance schema in general to be successful, the barrier
230 of entry for a developer should be low, so it's easy to instrument code.
231
232 In particular, the instrumentation interface:
233 - is available for C and C++ code (so it's a C interface),
234 - does not require parameters that the calling code can't easily provide,
235 - supports partial instrumentation (for example, instrumenting mutexes does
236 not require that every mutex is instrumented)
237
238 @subsection PRINCIPLE_EXTENDABLE Extendable instrumentation
239
240 As the content of the performance schema improves,
241 with more tables exposed and more data collected,
242 the instrumentation interface will also be augmented
243 to support instrumenting new concepts.
244 Existing instrumentations should not be affected when additional
245 instrumentation is made available, and making a new instrumentation
246 available should not require existing instrumented code to support it.
247
248 @subsection PRINCIPLE_VERSIONED Versioned instrumentation
249
250 Given that the instrumentation offered by the performance schema will
251 be augmented with time, when more features are implemented,
252 the interface itself should be versioned, to keep compatibility
253 with previous instrumented code.
254
255 For example, after both plugin-A and plugin-B have been instrumented for
256 mutexes, read write locks and conditions, using the instrumentation
257 interface, we can anticipate that the instrumentation interface
258 is expanded to support file based operations.
259
260 Plugin-A, a file based storage engine, will most likely use the expanded
261 interface and instrument its file usage, using the version 2
262 interface, while Plugin-B, a network based storage engine, will not change
263 its code and not release a new binary.
264
265 When later the instrumentation interface is expanded to support network
266 based operations (which will define interface version 3), the Plugin-B code
267 can then be changed to make use of it.
268
269 Note, this is just an example to illustrate the design concept here.
270 Both mutexes and file instrumentation are already available
271 since version 1 of the instrumentation interface.
272
273 @subsection PRINCIPLE_DEPLOYMENT Easy deployment
274
275 Internally, we might want every plugin implementation to upgrade the
276 instrumented code to the latest available, but this will cause additional
277 work and this is not practical if the code change is monolithic.
278
279 Externally, for third party plugin implementors, asking implementors to
280 always stay aligned to the latest instrumentation and make new releases,
281 even when the change does not provide new functionality for them,
282 is a bad idea.
283
284 For example, requiring a network based engine to re-release because the
285 instrumentation interface changed for file based operations, will create
286 too many deployment issues.
287
288 So, the performance schema implementation must support concurrently,
289 in the same deployment, multiple versions of the instrumentation
290 interface, and ensure binary compatibility with each version.
291
292 In addition to this, the performance schema can be included or excluded
293 from the server binary, using build time configuration options.
294
295 Regardless, the following types of deployment are valid:
296 - a server supporting the performance schema + a storage engine
297 that is not instrumented
298 - a server not supporting the performance schema + a storage engine
299 that is instrumented
300 */
301
302 /**
303 @page PAGE_INSTRUMENTATION_INTERFACE
304 Performance schema: instrumentation interface page.
305 MySQL performance schema instrumentation interface.
306
307 @section INTRO Introduction
308
309 The instrumentation interface consist of two layers:
310 - a raw ABI (Application Binary Interface) layer, that exposes the primitive
311 instrumentation functions exported by the performance schema instrumentation
312 - an API (Application Programing Interface) layer,
313 that provides many helpers for a developer instrumenting some code,
314 to make the instrumentation as easy as possible.
315
316 The ABI layer consists of:
317 @code
318 #include "mysql/psi/psi.h"
319 @endcode
320
321 The API layer consists of:
322 @code
323 #include "mysql/psi/mutex_mutex.h"
324 #include "mysql/psi/mutex_file.h"
325 @endcode
326
327 The first helper is for mutexes, rwlocks and conditions,
328 the second for file io.
329
330 The API layer exposes C macros and typedefs which will expand:
331 - either to non-instrumented code, when compiled without the performance
332 schema instrumentation
333 - or to instrumented code, that will issue the raw calls to the ABI layer
334 so that the implementation can collect data.
335
336 Note that all the names introduced (for example, @c mysql_mutex_lock) do not
337 collide with any other namespace.
338 In particular, the macro @c mysql_mutex_lock is on purpose not named
339 @c pthread_mutex_lock.
340 This is to:
341 - avoid overloading @c pthread_mutex_lock with yet another macro,
342 which is dangerous as it can affect user code and pollute
343 the end-user namespace.
344 - allow the developer instrumenting code to selectively instrument
345 some code but not all.
346
347 @section PRINCIPLES Design principles
348
349 The ABI part is designed as a facade, that exposes basic primitives.
350 The expectation is that each primitive will be very stable over time,
351 but the list will constantly grow when more instruments are supported.
352 To support binary compatibility with plugins compiled with a different
353 version of the instrumentation, the ABI itself is versioned
354 (see @c PSI_v1, @c PSI_v2).
355
356 For a given instrumentation point in the API, the basic coding pattern
357 used is:
358 - (a) If the performance schema is not initialized, do nothing
359 - (b) If the object acted upon is not instrumented, do nothing
360 - (c) otherwise, notify the performance schema of the operation
361 about to be performed.
362
363 The implementation of the instrumentation interface can:
364 - decide that it is not interested by the event, and return NULL.
365 In this context, 'interested' means whether the instrumentation for
366 this object + event is turned on in the performance schema configuration
367 (the SETUP_ tables).
368 - decide that this event is to be instrumented.
369 In this case, the instrumentation returns an opaque pointer,
370 that acts as a listener.
371
372 If a listener is returned, the instrumentation point then:
373 - (d) invokes the "start" event method
374 - (e) executes the instrumented code.
375 - (f) invokes the "end" event method.
376
377 If no listener is returned, only the instrumented code (e) is invoked.
378
379 The following code fragment is annotated to show how in detail this pattern
380 in implemented, when the instrumentation is compiled in:
381
382 @verbatim
383 static inline int mysql_mutex_lock(
384 mysql_mutex_t *that, myf flags, const char *src_file, uint src_line)
385 {
386 int result;
387 struct PSI_mutex_locker *locker= NULL;
388
389 ...... (a) .......... (b)
390 if (PSI_server && that->m_psi)
391
392 .......................... (c)
393 if ((locker= PSI_server->get_thread_mutex_locker(that->m_psi,
394 PSI_MUTEX_LOCK)))
395
396 ............... (d)
397 PSI_server->start_mutex_wait(locker, src_file, src_line);
398
399 ........ (e)
400 result= pthread_mutex_lock(&that->m_mutex);
401
402 if (locker)
403
404 ............. (f)
405 PSI_server->end_mutex_wait(locker, result);
406
407 return result;
408 }
409 @endverbatim
410
411 When the performance schema instrumentation is not compiled in,
412 the code becomes simply a wrapper, expanded in line by the compiler:
413
414 @verbatim
415 static inline int mysql_mutex_lock(...)
416 {
417 int result;
418
419 ........ (e)
420 result= pthread_mutex_lock(&that->m_mutex);
421
422 return result;
423 }
424 @endverbatim
425 */
426
427 /**
428 @page PAGE_AGGREGATES Performance schema: the aggregates page.
429 Performance schema aggregates.
430
431 @section INTRO Introduction
432
433 Aggregates tables are tables that can be formally defined as
434 SELECT ... from EVENTS_WAITS_HISTORY_INFINITE ... group by 'group clause'.
435
436 Each group clause defines a different kind of aggregate, and corresponds to
437 a different table exposed by the performance schema.
438
439 Aggregates can be either:
440 - computed on the fly,
441 - computed on demand, based on other available data.
442
443 'EVENTS_WAITS_HISTORY_INFINITE' is a table that does not exist,
444 the best approximation is EVENTS_WAITS_HISTORY_LONG.
445 Aggregates computed on the fly in fact are based on EVENTS_WAITS_CURRENT,
446 while aggregates computed on demand are based on other
447 EVENTS_WAITS_SUMMARY_BY_xxx tables.
448
449 To better understand the implementation itself, a bit of math is
450 required first, to understand the model behind the code:
451 the code is deceptively simple, the real complexity resides
452 in the flyweight of pointers between various performance schema buffers.
453
454 @section DIMENSION Concept of dimension
455
456 An event measured by the instrumentation has many attributes.
457 An event is represented as a data point P(x1, x2, ..., xN),
458 where each x_i coordinate represents a given attribute value.
459
460 Examples of attributes are:
461 - the time waited
462 - the object waited on
463 - the instrument waited on
464 - the thread that waited
465 - the operation performed
466 - per object or per operation additional attributes, such as spins,
467 number of bytes, etc.
468
469 Computing an aggregate per thread is fundamentally different from
470 computing an aggregate by instrument, so the "_BY_THREAD" and
471 "_BY_EVENT_NAME" aggregates are different dimensions,
472 operating on different x_i and x_j coordinates.
473 These aggregates are "orthogonal".
474
475 @section PROJECTION Concept of projection
476
477 A given x_i attribute value can convey either just one basic information,
478 such as a number of bytes, or can convey implied information,
479 such as an object fully qualified name.
480
481 For example, from the value "test.t1", the name of the object schema
482 "test" can be separated from the object name "t1", so that now aggregates
483 by object schema can be implemented.
484
485 In math terms, that corresponds to defining a function:
486 F_i (x): x --> y
487 Applying this function to our point P gives another point P':
488
489 F_i (P):
490 P(x1, x2, ..., x{i-1}, x_i, x{i+1}, ..., x_N
491 --> P' (x1, x2, ..., x{i-1}, f_i(x_i), x{i+1}, ..., x_N)
492
493 That function defines in fact an aggregate !
494 In SQL terms, this aggregate would look like the following table:
495
496 @verbatim
497 CREATE VIEW EVENTS_WAITS_SUMMARY_BY_Func_i AS
498 SELECT col_1, col_2, ..., col_{i-1},
499 Func_i(col_i),
500 COUNT(col_i),
501 MIN(col_i), AVG(col_i), MAX(col_i), -- if col_i is a numeric value
502 col_{i+1}, ..., col_N
503 FROM EVENTS_WAITS_HISTORY_INFINITE
504 group by col_1, col_2, ..., col_{i-1}, col{i+1}, ..., col_N.
505 @endverbatim
506
507 Note that not all columns have to be included,
508 in particular some columns that are dependent on the x_i column should
509 be removed, so that in practice, MySQL's aggregation method tends to
510 remove many attributes at each aggregation steps.
511
512 For example, when aggregating wait events by object instances,
513 - the wait_time and number_of_bytes can be summed,
514 and sum(wait_time) now becomes an object instance attribute.
515 - the source, timer_start, timer_end columns are not in the
516 _BY_INSTANCE table, because these attributes are only
517 meaningful for a wait.
518
519 @section COMPOSITION Concept of composition
520
521 Now, the "test.t1" --> "test" example was purely theory,
522 just to explain the concept, and does not lead very far.
523 Let's look at a more interesting example of data that can be derived
524 from the row event.
525
526 An event creates a transient object, PFS_wait_locker, per operation.
527 This object's life cycle is extremely short: it's created just
528 before the start_wait() instrumentation call, and is destroyed in
529 the end_wait() call.
530
531 The wait locker itself contains a pointer to the object instance
532 waited on.
533 That allows to implement a wait_locker --> object instance projection,
534 with m_target.
535 The object instance life cycle depends on _init and _destroy calls
536 from the code, such as mysql_mutex_init()
537 and mysql_mutex_destroy() for a mutex.
538
539 The object instance waited on contains a pointer to the object class,
540 which is represented by the instrument name.
541 That allows to implement an object instance --> object class projection.
542 The object class life cycle is permanent, as instruments are loaded in
543 the server and never removed.
544
545 The object class is named in such a way
546 (for example, "wait/sync/mutex/sql/LOCK_open",
547 "wait/io/file/maria/data_file) that the component ("sql", "maria")
548 that it belongs to can be inferred.
549 That allows to implement an object class --> server component projection.
550
551 Back to math again, we have, for example for mutexes:
552
553 F1 (l) : PFS_wait_locker l --> PFS_mutex m = l->m_target.m_mutex
554
555 F1_to_2 (m) : PFS_mutex m --> PFS_mutex_class i = m->m_class
556
557 F2_to_3 (i) : PFS_mutex_class i --> const char *component =
558 substring(i->m_name, ...)
559
560 Per components aggregates are not implemented, this is just an illustration.
561
562 F1 alone defines this aggregate:
563
564 EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_INSTANCE
565 (or MUTEX_INSTANCE)
566
567 F1_to_2 alone could define this aggregate:
568
569 EVENTS_WAITS_SUMMARY_BY_INSTANCE --> EVENTS_WAITS_SUMMARY_BY_EVENT_NAME
570
571 Alternatively, using function composition, with
572 F2 = F1_to_2 o F1, F2 defines:
573
574 EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_EVENT_NAME
575
576 Likewise, F_2_to_3 defines:
577
578 EVENTS_WAITS_SUMMARY_BY_EVENT_NAME --> EVENTS_WAITS_SUMMARY_BY_COMPONENT
579
580 and F3 = F_2_to_3 o F_1_to_2 o F1 defines:
581
582 EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_COMPONENT
583
584 What has all this to do with the code ?
585
586 Function composition such as F_2_to_3 o F_1_to_2 o F1 is implemented
587 as PFS_single_stat_chain, where each link in the chain represents
588 an individual F_{i}_to_{i+1} aggregation step.
589
590 A single call to aggregate_single_stat_chain() updates all the tables
591 described in the statistics chain.
592
593 @section STAT_CHAIN Statistics chains
594
595 Statistics chains are only used for on the fly aggregates,
596 and are therefore all based initially on the '_CURRENT' base table that
597 contains the data recorded.
598 The following table aggregates are implemented with a statistics chain:
599
600 EVENTS_WAITS_CURRENT --> EVENTS_WAITS_SUMMARY_BY_INSTANCE
601 --> EVENTS_WAITS_SUMMARY_BY_EVENT_NAME
602
603 This relationship is between classes.
604
605 In terms of object instances, or records, this chain is implemented
606 as a flyweight.
607
608 For example, assuming the following scenario:
609 - A mutex class "M" is instrumented, the instrument name
610 is "wait/sync/mutex/sql/M"
611 - This mutex instrument has been instantiated twice,
612 mutex instances are noted M-1 and M-2
613 - Threads T-A and T-B are locking mutex instance M-1
614 - Threads T-C and T-D are locking mutex instance M-2
615
616 The performance schema will record the following data:
617 - EVENTS_WAITS_CURRENT has 4 rows, one for each mutex locker
618 - EVENTS_WAITS_SUMMARY_BY_INSTANCE shows 2 rows, for M-1 and M-2
619 - EVENTS_WAITS_SUMMARY_BY_EVENT_NAME shows 1 row, for M
620
621 The graph of structures will look like:
622
623 @verbatim
624 PFS_wait_locker (T-A, M-1) ----------
625 |
626 v
627 PFS_mutex (M-1)
628 - m_wait_stat ------------
629 ^ |
630 | |
631 PFS_wait_locker (T-B, M-1) ---------- |
632 v
633 PFS_mutex_class (M)
634 - m_wait_stat
635 PFS_wait_locker (T-C, M-2) ---------- ^
636 | |
637 v |
638 PFS_mutex (M-2) |
639 - m_wait_stat ------------
640 ^
641 |
642 PFS_wait_locker (T-D, M-2) ----------
643
644 || || ||
645 || || ||
646 vv vv vv
647
648 EVENTS_WAITS_CURRENT ..._SUMMARY_BY_INSTANCE ..._SUMMARY_BY_EVENT_NAME
649 @endverbatim
650
651 @section ON_THE_FLY On the fly aggregates
652
653 'On the fly' aggregates are computed during the code execution.
654 This is necessary because the data the aggregate is based on is volatile,
655 and can not be kept indefinitely.
656
657 @section HIGHER_LEVEL Higher level aggregates
658
659 Note: no higher level aggregate is implemented yet,
660 this section is a place holder.
661 */
662
663 /**
664 @defgroup Performance_schema Performance Schema
665 The performance schema component.
666 For details, see the
667 @ref PAGE_PERFORMANCE_SCHEMA "performance schema main page".
668
669 @defgroup Performance_schema_implementation Performance Schema Implementation
670 @ingroup Performance_schema
671
672 @defgroup Performance_schema_tables Performance Schema Tables
673 @ingroup Performance_schema_implementation
674 */
675
676 pthread_key(PFS_thread*, THR_PFS);
677 bool THR_PFS_initialized= false;
678
679 static enum_operation_type mutex_operation_map[]=
680 {
681 OPERATION_TYPE_LOCK,
682 OPERATION_TYPE_TRYLOCK
683 };
684
685 static enum_operation_type rwlock_operation_map[]=
686 {
687 OPERATION_TYPE_READLOCK,
688 OPERATION_TYPE_WRITELOCK,
689 OPERATION_TYPE_TRYREADLOCK,
690 OPERATION_TYPE_TRYWRITELOCK
691 };
692
693 static enum_operation_type cond_operation_map[]=
694 {
695 OPERATION_TYPE_WAIT,
696 OPERATION_TYPE_TIMEDWAIT
697 };
698
699 /**
700 Conversion map from PSI_file_operation to enum_operation_type.
701 Indexed by enum PSI_file_operation.
702 */
703 static enum_operation_type file_operation_map[]=
704 {
705 OPERATION_TYPE_FILECREATE,
706 OPERATION_TYPE_FILECREATETMP,
707 OPERATION_TYPE_FILEOPEN,
708 OPERATION_TYPE_FILESTREAMOPEN,
709 OPERATION_TYPE_FILECLOSE,
710 OPERATION_TYPE_FILESTREAMCLOSE,
711 OPERATION_TYPE_FILEREAD,
712 OPERATION_TYPE_FILEWRITE,
713 OPERATION_TYPE_FILESEEK,
714 OPERATION_TYPE_FILETELL,
715 OPERATION_TYPE_FILEFLUSH,
716 OPERATION_TYPE_FILESTAT,
717 OPERATION_TYPE_FILEFSTAT,
718 OPERATION_TYPE_FILECHSIZE,
719 OPERATION_TYPE_FILEDELETE,
720 OPERATION_TYPE_FILERENAME,
721 OPERATION_TYPE_FILESYNC
722 };
723
724 /**
725 Build the prefix name of a class of instruments in a category.
726 For example, this function builds the string 'wait/sync/mutex/sql/' from
727 a prefix 'wait/sync/mutex' and a category 'sql'.
728 This prefix is used later to build each instrument name, such as
729 'wait/sync/mutex/sql/LOCK_open'.
730 @param prefix Prefix for this class of instruments
731 @param category Category name
732 @param [out] output Buffer of length PFS_MAX_INFO_NAME_LENGTH.
733 @param [out] output_length Length of the resulting output string.
734 @return 0 for success, non zero for errors
735 */
build_prefix(const LEX_STRING * prefix,const char * category,char * output,int * output_length)736 static int build_prefix(const LEX_STRING *prefix, const char *category,
737 char *output, int *output_length)
738 {
739 int len= strlen(category);
740 char *out_ptr= output;
741 int prefix_length= prefix->length;
742
743 if (unlikely((prefix_length + len + 1) >=
744 PFS_MAX_FULL_PREFIX_NAME_LENGTH))
745 {
746 pfs_print_error("build_prefix: prefix+category is too long <%s> <%s>\n",
747 prefix->str, category);
748 return 1;
749 }
750
751 if (unlikely(strchr(category, '/') != NULL))
752 {
753 pfs_print_error("build_prefix: invalid category <%s>\n",
754 category);
755 return 1;
756 }
757
758 /* output = prefix + category + '/' */
759 memcpy(out_ptr, prefix->str, prefix_length);
760 out_ptr+= prefix_length;
761 memcpy(out_ptr, category, len);
762 out_ptr+= len;
763 *out_ptr= '/';
764 out_ptr++;
765 *output_length= out_ptr - output;
766
767 return 0;
768 }
769
770 #define REGISTER_BODY_V1(KEY_T, PREFIX, REGISTER_FUNC) \
771 KEY_T key; \
772 char formatted_name[PFS_MAX_INFO_NAME_LENGTH]; \
773 int prefix_length; \
774 int len; \
775 int full_length; \
776 \
777 DBUG_ASSERT(category != NULL); \
778 DBUG_ASSERT(info != NULL); \
779 if (unlikely(build_prefix(&PREFIX, category, \
780 formatted_name, &prefix_length))) \
781 { \
782 for (; count>0; count--, info++) \
783 *(info->m_key)= 0; \
784 return ; \
785 } \
786 \
787 for (; count>0; count--, info++) \
788 { \
789 DBUG_ASSERT(info->m_key != NULL); \
790 DBUG_ASSERT(info->m_name != NULL); \
791 len= strlen(info->m_name); \
792 full_length= prefix_length + len; \
793 if (likely(full_length <= PFS_MAX_INFO_NAME_LENGTH)) \
794 { \
795 memcpy(formatted_name + prefix_length, info->m_name, len); \
796 key= REGISTER_FUNC(formatted_name, full_length, info->m_flags); \
797 } \
798 else \
799 { \
800 pfs_print_error("REGISTER_BODY_V1: name too long <%s> <%s>\n", \
801 category, info->m_name); \
802 key= 0; \
803 } \
804 \
805 *(info->m_key)= key; \
806 } \
807 return;
808
809 /* Use C linkage for the interface functions. */
810
811 C_MODE_START
812
register_mutex_v1(const char * category,PSI_mutex_info_v1 * info,int count)813 static void register_mutex_v1(const char *category,
814 PSI_mutex_info_v1 *info,
815 int count)
816 {
817 REGISTER_BODY_V1(PSI_mutex_key,
818 mutex_instrument_prefix,
819 register_mutex_class)
820 }
821
register_rwlock_v1(const char * category,PSI_rwlock_info_v1 * info,int count)822 static void register_rwlock_v1(const char *category,
823 PSI_rwlock_info_v1 *info,
824 int count)
825 {
826 REGISTER_BODY_V1(PSI_rwlock_key,
827 rwlock_instrument_prefix,
828 register_rwlock_class)
829 }
830
register_cond_v1(const char * category,PSI_cond_info_v1 * info,int count)831 static void register_cond_v1(const char *category,
832 PSI_cond_info_v1 *info,
833 int count)
834 {
835 REGISTER_BODY_V1(PSI_cond_key,
836 cond_instrument_prefix,
837 register_cond_class)
838 }
839
register_thread_v1(const char * category,PSI_thread_info_v1 * info,int count)840 static void register_thread_v1(const char *category,
841 PSI_thread_info_v1 *info,
842 int count)
843 {
844 REGISTER_BODY_V1(PSI_thread_key,
845 thread_instrument_prefix,
846 register_thread_class)
847 }
848
register_file_v1(const char * category,PSI_file_info_v1 * info,int count)849 static void register_file_v1(const char *category,
850 PSI_file_info_v1 *info,
851 int count)
852 {
853 REGISTER_BODY_V1(PSI_file_key,
854 file_instrument_prefix,
855 register_file_class)
856 }
857
858 #define INIT_BODY_V1(T, KEY, ID) \
859 PFS_##T##_class *klass; \
860 PFS_##T *pfs; \
861 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS); \
862 if (unlikely(pfs_thread == NULL)) \
863 return NULL; \
864 if (! pfs_thread->m_enabled) \
865 return NULL; \
866 klass= find_##T##_class(KEY); \
867 if (unlikely(klass == NULL)) \
868 return NULL; \
869 if (! klass->m_enabled) \
870 return NULL; \
871 pfs= create_##T(klass, ID); \
872 return reinterpret_cast<PSI_##T *> (pfs)
873
874 static PSI_mutex*
init_mutex_v1(PSI_mutex_key key,const void * identity)875 init_mutex_v1(PSI_mutex_key key, const void *identity)
876 {
877 INIT_BODY_V1(mutex, key, identity);
878 }
879
destroy_mutex_v1(PSI_mutex * mutex)880 static void destroy_mutex_v1(PSI_mutex* mutex)
881 {
882 PFS_mutex *pfs= reinterpret_cast<PFS_mutex*> (mutex);
883 destroy_mutex(pfs);
884 }
885
886 static PSI_rwlock*
init_rwlock_v1(PSI_rwlock_key key,const void * identity)887 init_rwlock_v1(PSI_rwlock_key key, const void *identity)
888 {
889 INIT_BODY_V1(rwlock, key, identity);
890 }
891
destroy_rwlock_v1(PSI_rwlock * rwlock)892 static void destroy_rwlock_v1(PSI_rwlock* rwlock)
893 {
894 PFS_rwlock *pfs= reinterpret_cast<PFS_rwlock*> (rwlock);
895 destroy_rwlock(pfs);
896 }
897
898 static PSI_cond*
init_cond_v1(PSI_cond_key key,const void * identity)899 init_cond_v1(PSI_cond_key key, const void *identity)
900 {
901 INIT_BODY_V1(cond, key, identity);
902 }
903
destroy_cond_v1(PSI_cond * cond)904 static void destroy_cond_v1(PSI_cond* cond)
905 {
906 PFS_cond *pfs= reinterpret_cast<PFS_cond*> (cond);
907 destroy_cond(pfs);
908 }
909
910 static PSI_table_share*
get_table_share_v1(const char * schema_name,int schema_name_length,const char * table_name,int table_name_length,const void * identity)911 get_table_share_v1(const char *schema_name, int schema_name_length,
912 const char *table_name, int table_name_length,
913 const void *identity)
914 {
915 #ifdef HAVE_TABLE_WAIT
916 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
917 if (unlikely(pfs_thread == NULL))
918 return NULL;
919 PFS_table_share* share;
920 share= find_or_create_table_share(pfs_thread,
921 schema_name, schema_name_length,
922 table_name, table_name_length);
923 return reinterpret_cast<PSI_table_share*> (share);
924 #else
925 return NULL;
926 #endif
927 }
928
release_table_share_v1(PSI_table_share * share)929 static void release_table_share_v1(PSI_table_share* share)
930 {
931 /*
932 To be implemented by WL#4895 PERFORMANCE_SCHEMA Instrumenting Table IO.
933 */
934 }
935
936 static PSI_table*
open_table_v1(PSI_table_share * share,const void * identity)937 open_table_v1(PSI_table_share *share, const void *identity)
938 {
939 PFS_table_share *pfs_table_share=
940 reinterpret_cast<PFS_table_share*> (share);
941 PFS_table *pfs_table;
942 DBUG_ASSERT(pfs_table_share);
943 pfs_table= create_table(pfs_table_share, identity);
944 return reinterpret_cast<PSI_table *> (pfs_table);
945 }
946
close_table_v1(PSI_table * table)947 static void close_table_v1(PSI_table *table)
948 {
949 PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
950 DBUG_ASSERT(pfs);
951 destroy_table(pfs);
952 }
953
create_file_v1(PSI_file_key key,const char * name,File file)954 static void create_file_v1(PSI_file_key key, const char *name, File file)
955 {
956 int index= (int) file;
957 if (unlikely(index < 0))
958 return;
959 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
960 if (unlikely(pfs_thread == NULL))
961 return;
962 if (! pfs_thread->m_enabled)
963 return;
964 PFS_file_class *klass= find_file_class(key);
965 if (unlikely(klass == NULL))
966 return;
967 if (! klass->m_enabled)
968 return;
969 if (likely(index < file_handle_max))
970 {
971 uint len= strlen(name);
972 PFS_file *pfs= find_or_create_file(pfs_thread, klass, name, len);
973 file_handle_array[index]= pfs;
974 }
975 else
976 file_handle_lost++;
977 }
978
979 struct PFS_spawn_thread_arg
980 {
981 PFS_thread *m_parent_thread;
982 PSI_thread_key m_child_key;
983 const void *m_child_identity;
984 void *(*m_user_start_routine)(void*);
985 void *m_user_arg;
986 };
987
pfs_spawn_thread(void * arg)988 void* pfs_spawn_thread(void *arg)
989 {
990 PFS_spawn_thread_arg *typed_arg= (PFS_spawn_thread_arg*) arg;
991 void *user_arg;
992 void *(*user_start_routine)(void*);
993
994 PFS_thread *pfs;
995
996 /* First, attach instrumentation to this newly created pthread. */
997 PFS_thread_class *klass= find_thread_class(typed_arg->m_child_key);
998 if (likely(klass != NULL))
999 pfs= create_thread(klass, typed_arg->m_child_identity, 0);
1000 else
1001 pfs= NULL;
1002 my_pthread_setspecific_ptr(THR_PFS, pfs);
1003
1004 /*
1005 Secondly, free the memory allocated in spawn_thread_v1().
1006 It is preferable to do this before invoking the user
1007 routine, to avoid memory leaks at shutdown, in case
1008 the server exits without waiting for this thread.
1009 */
1010 user_start_routine= typed_arg->m_user_start_routine;
1011 user_arg= typed_arg->m_user_arg;
1012 my_free(typed_arg);
1013
1014 /* Then, execute the user code for this thread. */
1015 (*user_start_routine)(user_arg);
1016
1017 return NULL;
1018 }
1019
spawn_thread_v1(PSI_thread_key key,pthread_t * thread,const pthread_attr_t * attr,void * (* start_routine)(void *),void * arg)1020 static int spawn_thread_v1(PSI_thread_key key,
1021 pthread_t *thread, const pthread_attr_t *attr,
1022 void *(*start_routine)(void*), void *arg)
1023 {
1024 PFS_spawn_thread_arg *psi_arg;
1025
1026 /* psi_arg can not be global, and can not be a local variable. */
1027 psi_arg= (PFS_spawn_thread_arg*) my_malloc(sizeof(PFS_spawn_thread_arg),
1028 MYF(MY_WME));
1029 if (unlikely(psi_arg == NULL))
1030 return EAGAIN;
1031
1032 psi_arg->m_parent_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1033 psi_arg->m_child_key= key;
1034 psi_arg->m_child_identity= (arg ? arg : thread);
1035 psi_arg->m_user_start_routine= start_routine;
1036 psi_arg->m_user_arg= arg;
1037
1038 int result= pthread_create(thread, attr, pfs_spawn_thread, psi_arg);
1039 if (unlikely(result != 0))
1040 my_free(psi_arg);
1041 return result;
1042 }
1043
1044 static PSI_thread*
new_thread_v1(PSI_thread_key key,const void * identity,ulong thread_id)1045 new_thread_v1(PSI_thread_key key, const void *identity, ulong thread_id)
1046 {
1047 PFS_thread *pfs;
1048
1049 PFS_thread_class *klass= find_thread_class(key);
1050 if (likely(klass != NULL))
1051 pfs= create_thread(klass, identity, thread_id);
1052 else
1053 pfs= NULL;
1054
1055 return reinterpret_cast<PSI_thread*> (pfs);
1056 }
1057
set_thread_id_v1(PSI_thread * thread,unsigned long id)1058 static void set_thread_id_v1(PSI_thread *thread, unsigned long id)
1059 {
1060 DBUG_ASSERT(thread);
1061 PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
1062 pfs->m_thread_id= id;
1063 }
1064
1065 static PSI_thread*
get_thread_v1(void)1066 get_thread_v1(void)
1067 {
1068 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1069 return reinterpret_cast<PSI_thread*> (pfs);
1070 }
1071
set_thread_v1(PSI_thread * thread)1072 static void set_thread_v1(PSI_thread* thread)
1073 {
1074 PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
1075 my_pthread_setspecific_ptr(THR_PFS, pfs);
1076 }
1077
delete_current_thread_v1(void)1078 static void delete_current_thread_v1(void)
1079 {
1080 PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1081 if (thread != NULL)
1082 {
1083 my_pthread_setspecific_ptr(THR_PFS, NULL);
1084 destroy_thread(thread);
1085 }
1086 }
1087
delete_thread_v1(PSI_thread * thread)1088 static void delete_thread_v1(PSI_thread *thread)
1089 {
1090 PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
1091 if (pfs != NULL)
1092 destroy_thread(pfs);
1093 }
1094
1095 static PSI_mutex_locker*
get_thread_mutex_locker_v1(PSI_mutex_locker_state * state,PSI_mutex * mutex,PSI_mutex_operation op)1096 get_thread_mutex_locker_v1(PSI_mutex_locker_state *state,
1097 PSI_mutex *mutex, PSI_mutex_operation op)
1098 {
1099 PFS_mutex *pfs_mutex= reinterpret_cast<PFS_mutex*> (mutex);
1100 DBUG_ASSERT((int) op >= 0);
1101 DBUG_ASSERT((uint) op < array_elements(mutex_operation_map));
1102 DBUG_ASSERT(pfs_mutex != NULL);
1103 DBUG_ASSERT(pfs_mutex->m_class != NULL);
1104 if (! flag_events_waits_current)
1105 return NULL;
1106 if (! pfs_mutex->m_class->m_enabled)
1107 return NULL;
1108 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1109 if (unlikely(pfs_thread == NULL))
1110 return NULL;
1111 if (! pfs_thread->m_enabled)
1112 return NULL;
1113 if (unlikely(pfs_thread->m_wait_locker_count >= LOCKER_STACK_SIZE))
1114 {
1115 locker_lost++;
1116 return NULL;
1117 }
1118 PFS_wait_locker *pfs_locker= &pfs_thread->m_wait_locker_stack
1119 [pfs_thread->m_wait_locker_count];
1120
1121 pfs_locker->m_target.m_mutex= pfs_mutex;
1122 pfs_locker->m_waits_current.m_thread= pfs_thread;
1123 pfs_locker->m_waits_current.m_class= pfs_mutex->m_class;
1124 if (pfs_mutex->m_class->m_timed)
1125 {
1126 pfs_locker->m_timer_name= wait_timer;
1127 pfs_locker->m_waits_current.m_timer_state= TIMER_STATE_STARTING;
1128 }
1129 else
1130 pfs_locker->m_waits_current.m_timer_state= TIMER_STATE_UNTIMED;
1131 pfs_locker->m_waits_current.m_object_instance_addr= pfs_mutex->m_identity;
1132 pfs_locker->m_waits_current.m_event_id= pfs_thread->m_event_id++;
1133 pfs_locker->m_waits_current.m_operation= mutex_operation_map[(int) op];
1134 pfs_locker->m_waits_current.m_wait_class= WAIT_CLASS_MUTEX;
1135
1136 pfs_thread->m_wait_locker_count++;
1137 return reinterpret_cast<PSI_mutex_locker*> (pfs_locker);
1138 }
1139
1140 static PSI_rwlock_locker*
get_thread_rwlock_locker_v1(PSI_rwlock_locker_state * state,PSI_rwlock * rwlock,PSI_rwlock_operation op)1141 get_thread_rwlock_locker_v1(PSI_rwlock_locker_state *state,
1142 PSI_rwlock *rwlock, PSI_rwlock_operation op)
1143 {
1144 PFS_rwlock *pfs_rwlock= reinterpret_cast<PFS_rwlock*> (rwlock);
1145 DBUG_ASSERT(static_cast<int> (op) >= 0);
1146 DBUG_ASSERT(static_cast<uint> (op) < array_elements(rwlock_operation_map));
1147 DBUG_ASSERT(pfs_rwlock != NULL);
1148 DBUG_ASSERT(pfs_rwlock->m_class != NULL);
1149 if (! flag_events_waits_current)
1150 return NULL;
1151 if (! pfs_rwlock->m_class->m_enabled)
1152 return NULL;
1153 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1154 if (unlikely(pfs_thread == NULL))
1155 return NULL;
1156 if (! pfs_thread->m_enabled)
1157 return NULL;
1158 if (unlikely(pfs_thread->m_wait_locker_count >= LOCKER_STACK_SIZE))
1159 {
1160 locker_lost++;
1161 return NULL;
1162 }
1163 PFS_wait_locker *pfs_locker= &pfs_thread->m_wait_locker_stack
1164 [pfs_thread->m_wait_locker_count];
1165
1166 pfs_locker->m_target.m_rwlock= pfs_rwlock;
1167 pfs_locker->m_waits_current.m_thread= pfs_thread;
1168 pfs_locker->m_waits_current.m_class= pfs_rwlock->m_class;
1169 if (pfs_rwlock->m_class->m_timed)
1170 {
1171 pfs_locker->m_timer_name= wait_timer;
1172 pfs_locker->m_waits_current.m_timer_state= TIMER_STATE_STARTING;
1173 }
1174 else
1175 pfs_locker->m_waits_current.m_timer_state= TIMER_STATE_UNTIMED;
1176 pfs_locker->m_waits_current.m_object_instance_addr= pfs_rwlock->m_identity;
1177 pfs_locker->m_waits_current.m_event_id= pfs_thread->m_event_id++;
1178 pfs_locker->m_waits_current.m_operation=
1179 rwlock_operation_map[static_cast<int> (op)];
1180 pfs_locker->m_waits_current.m_wait_class= WAIT_CLASS_RWLOCK;
1181
1182 pfs_thread->m_wait_locker_count++;
1183 return reinterpret_cast<PSI_rwlock_locker*> (pfs_locker);
1184 }
1185
1186 static PSI_cond_locker*
get_thread_cond_locker_v1(PSI_cond_locker_state * state,PSI_cond * cond,PSI_mutex *,PSI_cond_operation op)1187 get_thread_cond_locker_v1(PSI_cond_locker_state *state,
1188 PSI_cond *cond, PSI_mutex * /* unused: mutex */,
1189 PSI_cond_operation op)
1190 {
1191 /*
1192 Note about the unused PSI_mutex *mutex parameter:
1193 In the pthread library, a call to pthread_cond_wait()
1194 causes an unlock() + lock() on the mutex associated with the condition.
1195 This mutex operation is not instrumented, so the mutex will still
1196 appear as locked when a thread is waiting on a condition.
1197 This has no impact now, as unlock_mutex() is not recording events.
1198 When unlock_mutex() is implemented by later work logs,
1199 this parameter here will be used to adjust the mutex state,
1200 in start_cond_wait_v1() and end_cond_wait_v1().
1201 */
1202 PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
1203 DBUG_ASSERT(static_cast<int> (op) >= 0);
1204 DBUG_ASSERT(static_cast<uint> (op) < array_elements(cond_operation_map));
1205 DBUG_ASSERT(pfs_cond != NULL);
1206 DBUG_ASSERT(pfs_cond->m_class != NULL);
1207 if (! flag_events_waits_current)
1208 return NULL;
1209 if (! pfs_cond->m_class->m_enabled)
1210 return NULL;
1211 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1212 if (unlikely(pfs_thread == NULL))
1213 return NULL;
1214 if (! pfs_thread->m_enabled)
1215 return NULL;
1216 if (unlikely(pfs_thread->m_wait_locker_count >= LOCKER_STACK_SIZE))
1217 {
1218 locker_lost++;
1219 return NULL;
1220 }
1221 PFS_wait_locker *pfs_locker= &pfs_thread->m_wait_locker_stack
1222 [pfs_thread->m_wait_locker_count];
1223
1224 pfs_locker->m_target.m_cond= pfs_cond;
1225 pfs_locker->m_waits_current.m_thread= pfs_thread;
1226 pfs_locker->m_waits_current.m_class= pfs_cond->m_class;
1227 if (pfs_cond->m_class->m_timed)
1228 {
1229 pfs_locker->m_timer_name= wait_timer;
1230 pfs_locker->m_waits_current.m_timer_state= TIMER_STATE_STARTING;
1231 }
1232 else
1233 pfs_locker->m_waits_current.m_timer_state= TIMER_STATE_UNTIMED;
1234 pfs_locker->m_waits_current.m_object_instance_addr= pfs_cond->m_identity;
1235 pfs_locker->m_waits_current.m_event_id= pfs_thread->m_event_id++;
1236 pfs_locker->m_waits_current.m_operation=
1237 cond_operation_map[static_cast<int> (op)];
1238 pfs_locker->m_waits_current.m_wait_class= WAIT_CLASS_COND;
1239
1240 pfs_thread->m_wait_locker_count++;
1241 return reinterpret_cast<PSI_cond_locker*> (pfs_locker);
1242 }
1243
1244 static PSI_table_locker*
get_thread_table_locker_v1(PSI_table_locker_state * state,PSI_table * table)1245 get_thread_table_locker_v1(PSI_table_locker_state *state,
1246 PSI_table *table)
1247 {
1248 PFS_table *pfs_table= reinterpret_cast<PFS_table*> (table);
1249 DBUG_ASSERT(pfs_table != NULL);
1250 DBUG_ASSERT(pfs_table->m_share != NULL);
1251 if (! flag_events_waits_current)
1252 return NULL;
1253 if (! pfs_table->m_share->m_enabled)
1254 return NULL;
1255 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1256 if (unlikely(pfs_thread == NULL))
1257 return NULL;
1258 if (! pfs_thread->m_enabled)
1259 return NULL;
1260 if (unlikely(pfs_thread->m_wait_locker_count >= LOCKER_STACK_SIZE))
1261 {
1262 locker_lost++;
1263 return NULL;
1264 }
1265 PFS_wait_locker *pfs_locker= &pfs_thread->m_wait_locker_stack
1266 [pfs_thread->m_wait_locker_count];
1267
1268 pfs_locker->m_target.m_table= pfs_table;
1269 pfs_locker->m_waits_current.m_thread= pfs_thread;
1270 pfs_locker->m_waits_current.m_class= &global_table_class;
1271 if (pfs_table->m_share->m_timed)
1272 {
1273 pfs_locker->m_timer_name= wait_timer;
1274 pfs_locker->m_waits_current.m_timer_state= TIMER_STATE_STARTING;
1275 }
1276 else
1277 pfs_locker->m_waits_current.m_timer_state= TIMER_STATE_UNTIMED;
1278 pfs_locker->m_waits_current.m_object_instance_addr= pfs_table->m_identity;
1279 pfs_locker->m_waits_current.m_event_id= pfs_thread->m_event_id++;
1280 pfs_locker->m_waits_current.m_wait_class= WAIT_CLASS_TABLE;
1281
1282 pfs_thread->m_wait_locker_count++;
1283 return reinterpret_cast<PSI_table_locker*> (pfs_locker);
1284 }
1285
1286 static PSI_file_locker*
get_thread_file_name_locker_v1(PSI_file_locker_state * state,PSI_file_key key,PSI_file_operation op,const char * name,const void * identity)1287 get_thread_file_name_locker_v1(PSI_file_locker_state *state,
1288 PSI_file_key key,
1289 PSI_file_operation op,
1290 const char *name, const void *identity)
1291 {
1292 DBUG_ASSERT(static_cast<int> (op) >= 0);
1293 DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
1294
1295 if (! flag_events_waits_current)
1296 return NULL;
1297 PFS_file_class *klass= find_file_class(key);
1298 if (unlikely(klass == NULL))
1299 return NULL;
1300 if (! klass->m_enabled)
1301 return NULL;
1302 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1303 if (unlikely(pfs_thread == NULL))
1304 return NULL;
1305 if (! pfs_thread->m_enabled)
1306 return NULL;
1307 if (unlikely(pfs_thread->m_wait_locker_count >= LOCKER_STACK_SIZE))
1308 {
1309 locker_lost++;
1310 return NULL;
1311 }
1312 uint len= strlen(name);
1313 PFS_file *pfs_file= find_or_create_file(pfs_thread, klass, name, len);
1314 if (unlikely(pfs_file == NULL))
1315 return NULL;
1316
1317 PFS_wait_locker *pfs_locker= &pfs_thread->m_wait_locker_stack
1318 [pfs_thread->m_wait_locker_count];
1319
1320 pfs_locker->m_target.m_file= pfs_file;
1321 pfs_locker->m_waits_current.m_thread= pfs_thread;
1322 pfs_locker->m_waits_current.m_class= pfs_file->m_class;
1323 if (pfs_file->m_class->m_timed)
1324 {
1325 pfs_locker->m_timer_name= wait_timer;
1326 pfs_locker->m_waits_current.m_timer_state= TIMER_STATE_STARTING;
1327 }
1328 else
1329 pfs_locker->m_waits_current.m_timer_state= TIMER_STATE_UNTIMED;
1330 pfs_locker->m_waits_current.m_object_instance_addr= pfs_file;
1331 pfs_locker->m_waits_current.m_object_name= pfs_file->m_filename;
1332 pfs_locker->m_waits_current.m_object_name_length=
1333 pfs_file->m_filename_length;
1334 pfs_locker->m_waits_current.m_event_id= pfs_thread->m_event_id++;
1335 pfs_locker->m_waits_current.m_operation=
1336 file_operation_map[static_cast<int> (op)];
1337 pfs_locker->m_waits_current.m_wait_class= WAIT_CLASS_FILE;
1338
1339 pfs_thread->m_wait_locker_count++;
1340 return reinterpret_cast<PSI_file_locker*> (pfs_locker);
1341 }
1342
1343 static PSI_file_locker*
get_thread_file_stream_locker_v1(PSI_file_locker_state * state,PSI_file * file,PSI_file_operation op)1344 get_thread_file_stream_locker_v1(PSI_file_locker_state *state,
1345 PSI_file *file, PSI_file_operation op)
1346 {
1347 PFS_file *pfs_file= reinterpret_cast<PFS_file*> (file);
1348
1349 DBUG_ASSERT(static_cast<int> (op) >= 0);
1350 DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
1351 DBUG_ASSERT(pfs_file != NULL);
1352 DBUG_ASSERT(pfs_file->m_class != NULL);
1353
1354 if (! flag_events_waits_current)
1355 return NULL;
1356 if (! pfs_file->m_class->m_enabled)
1357 return NULL;
1358 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1359 if (unlikely(pfs_thread == NULL))
1360 return NULL;
1361 if (! pfs_thread->m_enabled)
1362 return NULL;
1363 if (unlikely(pfs_thread->m_wait_locker_count >= LOCKER_STACK_SIZE))
1364 {
1365 locker_lost++;
1366 return NULL;
1367 }
1368 PFS_wait_locker *pfs_locker= &pfs_thread->m_wait_locker_stack
1369 [pfs_thread->m_wait_locker_count];
1370
1371 pfs_locker->m_target.m_file= pfs_file;
1372 pfs_locker->m_waits_current.m_thread= pfs_thread;
1373 pfs_locker->m_waits_current.m_class= pfs_file->m_class;
1374 if (pfs_file->m_class->m_timed)
1375 {
1376 pfs_locker->m_timer_name= wait_timer;
1377 pfs_locker->m_waits_current.m_timer_state= TIMER_STATE_STARTING;
1378 }
1379 else
1380 pfs_locker->m_waits_current.m_timer_state= TIMER_STATE_UNTIMED;
1381 pfs_locker->m_waits_current.m_object_instance_addr= pfs_file;
1382 pfs_locker->m_waits_current.m_object_name= pfs_file->m_filename;
1383 pfs_locker->m_waits_current.m_object_name_length=
1384 pfs_file->m_filename_length;
1385 pfs_locker->m_waits_current.m_event_id= pfs_thread->m_event_id++;
1386 pfs_locker->m_waits_current.m_operation=
1387 file_operation_map[static_cast<int> (op)];
1388 pfs_locker->m_waits_current.m_wait_class= WAIT_CLASS_FILE;
1389
1390 pfs_thread->m_wait_locker_count++;
1391 return reinterpret_cast<PSI_file_locker*> (pfs_locker);
1392 }
1393
1394 static PSI_file_locker*
get_thread_file_descriptor_locker_v1(PSI_file_locker_state * state,File file,PSI_file_operation op)1395 get_thread_file_descriptor_locker_v1(PSI_file_locker_state *state,
1396 File file, PSI_file_operation op)
1397 {
1398 int index= static_cast<int> (file);
1399
1400 DBUG_ASSERT(static_cast<int> (op) >= 0);
1401 DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
1402
1403 if (! flag_events_waits_current)
1404 return NULL;
1405 if (likely((index >= 0) && (index < file_handle_max)))
1406 {
1407 PFS_file *pfs_file= file_handle_array[index];
1408 if (likely(pfs_file != NULL))
1409 {
1410 PFS_thread *pfs_thread;
1411
1412 /*
1413 We are about to close a file by descriptor number,
1414 and the calling code still holds the descriptor.
1415 Cleanup the file descriptor <--> file instrument association.
1416 Remove the instrumentation *before* the close to avoid race
1417 conditions with another thread opening a file
1418 (that could be given the same descriptor).
1419 */
1420 if (op == PSI_FILE_CLOSE)
1421 file_handle_array[index]= NULL;
1422
1423 DBUG_ASSERT(pfs_file->m_class != NULL);
1424 if (! pfs_file->m_class->m_enabled)
1425 return NULL;
1426 pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1427 if (unlikely(pfs_thread == NULL))
1428 return NULL;
1429 if (! pfs_thread->m_enabled)
1430 return NULL;
1431 if (unlikely(pfs_thread->m_wait_locker_count >= LOCKER_STACK_SIZE))
1432 {
1433 locker_lost++;
1434 return NULL;
1435 }
1436 PFS_wait_locker *pfs_locker= &pfs_thread->m_wait_locker_stack
1437 [pfs_thread->m_wait_locker_count];
1438
1439 pfs_locker->m_target.m_file= pfs_file;
1440 pfs_locker->m_waits_current.m_thread= pfs_thread;
1441 pfs_locker->m_waits_current.m_class= pfs_file->m_class;
1442 if (pfs_file->m_class->m_timed)
1443 {
1444 pfs_locker->m_timer_name= wait_timer;
1445 pfs_locker->m_waits_current.m_timer_state= TIMER_STATE_STARTING;
1446 }
1447 else
1448 pfs_locker->m_waits_current.m_timer_state= TIMER_STATE_UNTIMED;
1449 pfs_locker->m_waits_current.m_object_instance_addr= pfs_file;
1450 pfs_locker->m_waits_current.m_object_name= pfs_file->m_filename;
1451 pfs_locker->m_waits_current.m_object_name_length=
1452 pfs_file->m_filename_length;
1453 pfs_locker->m_waits_current.m_event_id= pfs_thread->m_event_id++;
1454 pfs_locker->m_waits_current.m_operation=
1455 file_operation_map[static_cast<int> (op)];
1456 pfs_locker->m_waits_current.m_wait_class= WAIT_CLASS_FILE;
1457
1458 pfs_thread->m_wait_locker_count++;
1459 return reinterpret_cast<PSI_file_locker*> (pfs_locker);
1460 }
1461 }
1462 return NULL;
1463 }
1464
unlock_mutex_v1(PSI_mutex * mutex)1465 static void unlock_mutex_v1(PSI_mutex *mutex)
1466 {
1467 PFS_mutex *pfs_mutex= reinterpret_cast<PFS_mutex*> (mutex);
1468 DBUG_ASSERT(pfs_mutex != NULL);
1469
1470 /*
1471 Note that this code is still protected by the instrumented mutex,
1472 and therefore is thread safe. See inline_mysql_mutex_unlock().
1473 */
1474
1475 /* Always update the instrumented state */
1476 pfs_mutex->m_owner= NULL;
1477 pfs_mutex->m_last_locked= 0;
1478
1479 #ifdef LATER_WL2333
1480 /*
1481 See WL#2333: SHOW ENGINE ... LOCK STATUS.
1482 PFS_mutex::m_lock_stat is not exposed in user visible tables
1483 currently, so there is no point spending time computing it.
1484 */
1485 PFS_thread *pfs_thread= reinterpret_cast<PFS_thread*> (thread);
1486 DBUG_ASSERT(pfs_thread != NULL);
1487
1488 if (unlikely(! flag_events_waits_current))
1489 return;
1490 if (! pfs_mutex->m_class->m_enabled)
1491 return;
1492 if (! pfs_thread->m_enabled)
1493 return;
1494
1495 if (pfs_mutex->m_class->m_timed)
1496 {
1497 ulonglong locked_time;
1498 locked_time= get_timer_value(wait_timer) - pfs_mutex->m_last_locked;
1499 aggregate_single_stat_chain(&pfs_mutex->m_lock_stat, locked_time);
1500 }
1501 #endif
1502 }
1503
unlock_rwlock_v1(PSI_rwlock * rwlock)1504 static void unlock_rwlock_v1(PSI_rwlock *rwlock)
1505 {
1506 PFS_rwlock *pfs_rwlock= reinterpret_cast<PFS_rwlock*> (rwlock);
1507 DBUG_ASSERT(pfs_rwlock != NULL);
1508 bool last_writer= false;
1509 bool last_reader= false;
1510
1511 /*
1512 Note that this code is still protected by the instrumented rwlock,
1513 and therefore is:
1514 - thread safe for write locks
1515 - almost thread safe for read locks (pfs_rwlock->m_readers is unsafe).
1516 See inline_mysql_rwlock_unlock()
1517 */
1518
1519 /* Always update the instrumented state */
1520 if (pfs_rwlock->m_writer)
1521 {
1522 /* Nominal case, a writer is unlocking. */
1523 last_writer= true;
1524 pfs_rwlock->m_writer= NULL;
1525 /* Reset the readers stats, they could be off */
1526 pfs_rwlock->m_readers= 0;
1527 }
1528 else if (likely(pfs_rwlock->m_readers > 0))
1529 {
1530 /* Nominal case, a reader is unlocking. */
1531 if (--(pfs_rwlock->m_readers) == 0)
1532 last_reader= true;
1533 }
1534 else
1535 {
1536 /*
1537 Edge case, we have no writer and no readers,
1538 on an unlock event.
1539 This is possible for:
1540 - partial instrumentation
1541 - instrumentation disabled at runtime,
1542 see when get_thread_rwlock_locker_v1() returns NULL
1543 No further action is taken here, the next
1544 write lock will put the statistics is a valid state.
1545 */
1546 }
1547
1548 #ifdef LATER_WL2333
1549 /* See WL#2333: SHOW ENGINE ... LOCK STATUS. */
1550 PFS_thread *pfs_thread= reinterpret_cast<PFS_thread*> (thread);
1551 DBUG_ASSERT(pfs_thread != NULL);
1552
1553 if (unlikely(! flag_events_waits_current))
1554 return;
1555 if (! pfs_rwlock->m_class->m_enabled)
1556 return;
1557 if (! pfs_thread->m_enabled)
1558 return;
1559
1560 ulonglong locked_time;
1561 if (last_writer)
1562 {
1563 if (pfs_rwlock->m_class->m_timed)
1564 {
1565 locked_time= get_timer_value(wait_timer) - pfs_rwlock->m_last_written;
1566 aggregate_single_stat_chain(&pfs_rwlock->m_write_lock_stat, locked_time);
1567 }
1568 }
1569 else if (last_reader)
1570 {
1571 if (pfs_rwlock->m_class->m_timed)
1572 {
1573 locked_time= get_timer_value(wait_timer) - pfs_rwlock->m_last_read;
1574 aggregate_single_stat_chain(&pfs_rwlock->m_read_lock_stat, locked_time);
1575 }
1576 }
1577 #else
1578 (void) last_reader;
1579 (void) last_writer;
1580 #endif
1581 }
1582
signal_cond_v1(PSI_cond * cond)1583 static void signal_cond_v1(PSI_cond* cond)
1584 {
1585 PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
1586 DBUG_ASSERT(pfs_cond != NULL);
1587
1588 pfs_cond->m_cond_stat.m_signal_count++;
1589 }
1590
broadcast_cond_v1(PSI_cond * cond)1591 static void broadcast_cond_v1(PSI_cond* cond)
1592 {
1593 PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
1594 DBUG_ASSERT(pfs_cond != NULL);
1595
1596 pfs_cond->m_cond_stat.m_broadcast_count++;
1597 }
1598
start_mutex_wait_v1(PSI_mutex_locker * locker,const char * src_file,uint src_line)1599 static void start_mutex_wait_v1(PSI_mutex_locker* locker,
1600 const char *src_file, uint src_line)
1601 {
1602 PFS_wait_locker *pfs_locker= reinterpret_cast<PFS_wait_locker*> (locker);
1603 DBUG_ASSERT(pfs_locker != NULL);
1604
1605 PFS_events_waits *wait= &pfs_locker->m_waits_current;
1606 if (wait->m_timer_state == TIMER_STATE_STARTING)
1607 {
1608 wait->m_timer_start= get_timer_value(pfs_locker->m_timer_name);
1609 wait->m_timer_state= TIMER_STATE_STARTED;
1610 }
1611 wait->m_source_file= src_file;
1612 wait->m_source_line= src_line;
1613 }
1614
end_mutex_wait_v1(PSI_mutex_locker * locker,int rc)1615 static void end_mutex_wait_v1(PSI_mutex_locker* locker, int rc)
1616 {
1617 PFS_wait_locker *pfs_locker= reinterpret_cast<PFS_wait_locker*> (locker);
1618 DBUG_ASSERT(pfs_locker != NULL);
1619 PFS_events_waits *wait= &pfs_locker->m_waits_current;
1620
1621 if (wait->m_timer_state == TIMER_STATE_STARTED)
1622 {
1623 wait->m_timer_end= get_timer_value(pfs_locker->m_timer_name);
1624 wait->m_timer_state= TIMER_STATE_TIMED;
1625 }
1626 if (flag_events_waits_history)
1627 insert_events_waits_history(wait->m_thread, wait);
1628 if (flag_events_waits_history_long)
1629 insert_events_waits_history_long(wait);
1630
1631 if (rc == 0)
1632 {
1633 /* Thread safe: we are protected by the instrumented mutex */
1634 PFS_mutex *mutex= pfs_locker->m_target.m_mutex;
1635 PFS_single_stat_chain *stat= find_per_thread_mutex_class_wait_stat(wait->m_thread, mutex->m_class);
1636 mutex->m_owner= wait->m_thread;
1637 mutex->m_last_locked= wait->m_timer_end;
1638
1639 /* If timed then aggregate stats, else increment the value counts only */
1640 if (wait->m_timer_state == TIMER_STATE_TIMED)
1641 {
1642 ulonglong wait_time= wait->m_timer_end - wait->m_timer_start;
1643 aggregate_single_stat_chain(&mutex->m_wait_stat, wait_time);
1644 aggregate_single_stat_chain(stat, wait_time);
1645 }
1646 else
1647 {
1648 increment_single_stat_chain(&mutex->m_wait_stat);
1649 increment_single_stat_chain(stat);
1650 }
1651 }
1652 wait->m_thread->m_wait_locker_count--;
1653 }
1654
start_rwlock_rdwait_v1(PSI_rwlock_locker * locker,const char * src_file,uint src_line)1655 static void start_rwlock_rdwait_v1(PSI_rwlock_locker* locker,
1656 const char *src_file, uint src_line)
1657 {
1658 PFS_wait_locker *pfs_locker= reinterpret_cast<PFS_wait_locker*> (locker);
1659 DBUG_ASSERT(pfs_locker != NULL);
1660
1661 PFS_events_waits *wait= &pfs_locker->m_waits_current;
1662 if (wait->m_timer_state == TIMER_STATE_STARTING)
1663 {
1664 wait->m_timer_start= get_timer_value(pfs_locker->m_timer_name);
1665 wait->m_timer_state= TIMER_STATE_STARTED;
1666 }
1667 wait->m_source_file= src_file;
1668 wait->m_source_line= src_line;
1669 }
1670
end_rwlock_rdwait_v1(PSI_rwlock_locker * locker,int rc)1671 static void end_rwlock_rdwait_v1(PSI_rwlock_locker* locker, int rc)
1672 {
1673 PFS_wait_locker *pfs_locker= reinterpret_cast<PFS_wait_locker*> (locker);
1674 DBUG_ASSERT(pfs_locker != NULL);
1675 PFS_events_waits *wait= &pfs_locker->m_waits_current;
1676
1677 if (wait->m_timer_state == TIMER_STATE_STARTED)
1678 {
1679 wait->m_timer_end= get_timer_value(pfs_locker->m_timer_name);
1680 wait->m_timer_state= TIMER_STATE_TIMED;
1681 }
1682 if (flag_events_waits_history)
1683 insert_events_waits_history(wait->m_thread, wait);
1684 if (flag_events_waits_history_long)
1685 insert_events_waits_history_long(wait);
1686
1687 if (rc == 0)
1688 {
1689 /*
1690 Warning:
1691 Multiple threads can execute this section concurrently
1692 (since multiple readers can execute in parallel).
1693 The statistics generated are not safe, which is why they are
1694 just statistics, not facts.
1695 */
1696 PFS_rwlock *rwlock= pfs_locker->m_target.m_rwlock;
1697 PFS_single_stat_chain *stat= find_per_thread_rwlock_class_wait_stat(wait->m_thread, rwlock->m_class);
1698
1699 if (rwlock->m_readers == 0)
1700 rwlock->m_last_read= wait->m_timer_end;
1701 rwlock->m_writer= NULL;
1702 rwlock->m_readers++;
1703
1704 /* If timed then aggregate stats, else increment the value counts only */
1705 if (wait->m_timer_state == TIMER_STATE_TIMED)
1706 {
1707 ulonglong wait_time= wait->m_timer_end - wait->m_timer_start;
1708 aggregate_single_stat_chain(&rwlock->m_wait_stat, wait_time);
1709 aggregate_single_stat_chain(stat, wait_time);
1710 }
1711 else
1712 {
1713 increment_single_stat_chain(&rwlock->m_wait_stat);
1714 increment_single_stat_chain(stat);
1715 }
1716 }
1717 wait->m_thread->m_wait_locker_count--;
1718 }
1719
start_rwlock_wrwait_v1(PSI_rwlock_locker * locker,const char * src_file,uint src_line)1720 static void start_rwlock_wrwait_v1(PSI_rwlock_locker* locker,
1721 const char *src_file, uint src_line)
1722 {
1723 PFS_wait_locker *pfs_locker= reinterpret_cast<PFS_wait_locker*> (locker);
1724 DBUG_ASSERT(pfs_locker != NULL);
1725
1726 PFS_events_waits *wait= &pfs_locker->m_waits_current;
1727 if (wait->m_timer_state == TIMER_STATE_STARTING)
1728 {
1729 wait->m_timer_start= get_timer_value(pfs_locker->m_timer_name);
1730 wait->m_timer_state= TIMER_STATE_STARTED;
1731 }
1732 wait->m_source_file= src_file;
1733 wait->m_source_line= src_line;
1734 }
1735
end_rwlock_wrwait_v1(PSI_rwlock_locker * locker,int rc)1736 static void end_rwlock_wrwait_v1(PSI_rwlock_locker* locker, int rc)
1737 {
1738 PFS_wait_locker *pfs_locker= reinterpret_cast<PFS_wait_locker*> (locker);
1739 DBUG_ASSERT(pfs_locker != NULL);
1740 PFS_events_waits *wait= &pfs_locker->m_waits_current;
1741
1742 if (wait->m_timer_state == TIMER_STATE_STARTED)
1743 {
1744 wait->m_timer_end= get_timer_value(pfs_locker->m_timer_name);
1745 wait->m_timer_state= TIMER_STATE_TIMED;
1746 }
1747 if (flag_events_waits_history)
1748 insert_events_waits_history(wait->m_thread, wait);
1749 if (flag_events_waits_history_long)
1750 insert_events_waits_history_long(wait);
1751
1752 if (rc == 0)
1753 {
1754 /* Thread safe : we are protected by the instrumented rwlock */
1755 PFS_rwlock *rwlock= pfs_locker->m_target.m_rwlock;
1756 PFS_single_stat_chain *stat= find_per_thread_rwlock_class_wait_stat(wait->m_thread, rwlock->m_class);
1757 rwlock->m_writer= wait->m_thread;
1758 rwlock->m_last_written= wait->m_timer_end;
1759 /* Reset the readers stats, they could be off */
1760 rwlock->m_readers= 0;
1761 rwlock->m_last_read= 0;
1762
1763 /* If timed then aggregate stats, else increment the value counts only */
1764 if (wait->m_timer_state == TIMER_STATE_TIMED)
1765 {
1766 ulonglong wait_time= wait->m_timer_end - wait->m_timer_start;
1767 aggregate_single_stat_chain(&rwlock->m_wait_stat, wait_time);
1768 aggregate_single_stat_chain(stat, wait_time);
1769 }
1770 else
1771 {
1772 increment_single_stat_chain(&rwlock->m_wait_stat);
1773 increment_single_stat_chain(stat);
1774 }
1775 }
1776 wait->m_thread->m_wait_locker_count--;
1777 }
1778
start_cond_wait_v1(PSI_cond_locker * locker,const char * src_file,uint src_line)1779 static void start_cond_wait_v1(PSI_cond_locker* locker,
1780 const char *src_file, uint src_line)
1781 {
1782 PFS_wait_locker *pfs_locker= reinterpret_cast<PFS_wait_locker*> (locker);
1783 DBUG_ASSERT(pfs_locker != NULL);
1784
1785 PFS_events_waits *wait= &pfs_locker->m_waits_current;
1786 if (wait->m_timer_state == TIMER_STATE_STARTING)
1787 {
1788 wait->m_timer_start= get_timer_value(pfs_locker->m_timer_name);
1789 wait->m_timer_state= TIMER_STATE_STARTED;
1790 }
1791 wait->m_source_file= src_file;
1792 wait->m_source_line= src_line;
1793 }
1794
end_cond_wait_v1(PSI_cond_locker * locker,int rc)1795 static void end_cond_wait_v1(PSI_cond_locker* locker, int rc)
1796 {
1797 PFS_wait_locker *pfs_locker= reinterpret_cast<PFS_wait_locker*> (locker);
1798 DBUG_ASSERT(pfs_locker != NULL);
1799 PFS_events_waits *wait= &pfs_locker->m_waits_current;
1800
1801 if (wait->m_timer_state == TIMER_STATE_STARTED)
1802 {
1803 wait->m_timer_end= get_timer_value(pfs_locker->m_timer_name);
1804 wait->m_timer_state= TIMER_STATE_TIMED;
1805 }
1806 if (flag_events_waits_history)
1807 insert_events_waits_history(wait->m_thread, wait);
1808 if (flag_events_waits_history_long)
1809 insert_events_waits_history_long(wait);
1810
1811 if (rc == 0)
1812 {
1813 /*
1814 Not thread safe, race conditions will occur.
1815 A first race condition is:
1816 - thread 1 waits on cond A
1817 - thread 2 waits on cond B
1818 threads 1 and 2 compete when updating the same cond A
1819 statistics, possibly missing a min / max / sum / count.
1820 A second race condition is:
1821 - thread 1 waits on cond A
1822 - thread 2 destroys cond A
1823 - thread 2 or 3 creates cond B in the same condition slot
1824 thread 1 will then aggregate statistics about defunct A
1825 in condition B.
1826 This is accepted, the data will be slightly inaccurate.
1827 */
1828 PFS_cond *cond= pfs_locker->m_target.m_cond;
1829 PFS_single_stat_chain *stat= find_per_thread_cond_class_wait_stat(wait->m_thread, cond->m_class);
1830
1831 /* If timed then aggregate stats, else increment the value counts only */
1832 if (wait->m_timer_state == TIMER_STATE_TIMED)
1833 {
1834 ulonglong wait_time= wait->m_timer_end - wait->m_timer_start;
1835 aggregate_single_stat_chain(&cond->m_wait_stat, wait_time);
1836 aggregate_single_stat_chain(stat, wait_time);
1837 }
1838 else
1839 {
1840 increment_single_stat_chain(&cond->m_wait_stat);
1841 increment_single_stat_chain(stat);
1842 }
1843 }
1844 wait->m_thread->m_wait_locker_count--;
1845 }
1846
start_table_wait_v1(PSI_table_locker * locker,const char * src_file,uint src_line)1847 static void start_table_wait_v1(PSI_table_locker* locker,
1848 const char *src_file, uint src_line)
1849 {
1850 PFS_wait_locker *pfs_locker= reinterpret_cast<PFS_wait_locker*> (locker);
1851 DBUG_ASSERT(pfs_locker != NULL);
1852
1853 PFS_events_waits *wait= &pfs_locker->m_waits_current;
1854 if (wait->m_timer_state == TIMER_STATE_STARTING)
1855 {
1856 wait->m_timer_start= get_timer_value(pfs_locker->m_timer_name);
1857 wait->m_timer_state= TIMER_STATE_STARTED;
1858 }
1859 wait->m_source_file= src_file;
1860 wait->m_source_line= src_line;
1861 wait->m_operation= OPERATION_TYPE_LOCK;
1862 PFS_table_share *share= pfs_locker->m_target.m_table->m_share;
1863 wait->m_schema_name= share->m_schema_name;
1864 wait->m_schema_name_length= share->m_schema_name_length;
1865 wait->m_object_name= share->m_table_name;
1866 wait->m_object_name_length= share->m_table_name_length;
1867 }
1868
end_table_wait_v1(PSI_table_locker * locker)1869 static void end_table_wait_v1(PSI_table_locker* locker)
1870 {
1871 PFS_wait_locker *pfs_locker= reinterpret_cast<PFS_wait_locker*> (locker);
1872 DBUG_ASSERT(pfs_locker != NULL);
1873 PFS_events_waits *wait= &pfs_locker->m_waits_current;
1874
1875 if (wait->m_timer_state == TIMER_STATE_STARTED)
1876 {
1877 wait->m_timer_end= get_timer_value(pfs_locker->m_timer_name);
1878 wait->m_timer_state= TIMER_STATE_TIMED;
1879 }
1880 if (flag_events_waits_history)
1881 insert_events_waits_history(wait->m_thread, wait);
1882 if (flag_events_waits_history_long)
1883 insert_events_waits_history_long(wait);
1884
1885 PFS_table *table= pfs_locker->m_target.m_table;
1886
1887 /* If timed then aggregate stats, else increment the value counts only */
1888 if (wait->m_timer_state == TIMER_STATE_TIMED)
1889 {
1890 ulonglong wait_time= wait->m_timer_end - wait->m_timer_start;
1891 aggregate_single_stat_chain(&table->m_wait_stat, wait_time);
1892 }
1893 else
1894 {
1895 increment_single_stat_chain(&table->m_wait_stat);
1896 }
1897
1898 /*
1899 There is currently no per table and per thread aggregation.
1900 The number of tables in the application is arbitrary, and may be high.
1901 The number of slots per thread to hold aggregates is fixed,
1902 and is constrained by memory.
1903 Implementing a per thread and per table aggregate has not been
1904 decided yet.
1905 If it's implemented, it's likely that the user will have to specify,
1906 per table name, if the aggregate per thread is to be computed or not.
1907 This will mean a SETUP_ table.
1908 */
1909 wait->m_thread->m_wait_locker_count--;
1910 }
1911
1912 static void start_file_wait_v1(PSI_file_locker *locker,
1913 size_t count,
1914 const char *src_file,
1915 uint src_line);
1916
1917 static void end_file_wait_v1(PSI_file_locker *locker,
1918 size_t count);
1919
start_file_open_wait_v1(PSI_file_locker * locker,const char * src_file,uint src_line)1920 static PSI_file* start_file_open_wait_v1(PSI_file_locker *locker,
1921 const char *src_file,
1922 uint src_line)
1923 {
1924 PFS_wait_locker *pfs_locker= reinterpret_cast<PFS_wait_locker*> (locker);
1925 DBUG_ASSERT(pfs_locker != NULL);
1926
1927 start_file_wait_v1(locker, 0, src_file, src_line);
1928
1929 PFS_file *pfs_file= pfs_locker->m_target.m_file;
1930 return reinterpret_cast<PSI_file*> (pfs_file);
1931 }
1932
end_file_open_wait_v1(PSI_file_locker * locker)1933 static void end_file_open_wait_v1(PSI_file_locker *locker)
1934 {
1935 end_file_wait_v1(locker, 0);
1936 }
1937
end_file_open_wait_and_bind_to_descriptor_v1(PSI_file_locker * locker,File file)1938 static void end_file_open_wait_and_bind_to_descriptor_v1
1939 (PSI_file_locker *locker, File file)
1940 {
1941 int index= (int) file;
1942 PFS_wait_locker *pfs_locker= reinterpret_cast<PFS_wait_locker*> (locker);
1943 DBUG_ASSERT(pfs_locker != NULL);
1944
1945 end_file_wait_v1(locker, 0);
1946
1947 PFS_file *pfs_file= pfs_locker->m_target.m_file;
1948 DBUG_ASSERT(pfs_file != NULL);
1949
1950 if (likely(index >= 0))
1951 {
1952 if (likely(index < file_handle_max))
1953 file_handle_array[index]= pfs_file;
1954 else
1955 file_handle_lost++;
1956 }
1957 else
1958 release_file(pfs_file);
1959 }
1960
start_file_wait_v1(PSI_file_locker * locker,size_t count,const char * src_file,uint src_line)1961 static void start_file_wait_v1(PSI_file_locker *locker,
1962 size_t count,
1963 const char *src_file,
1964 uint src_line)
1965 {
1966 PFS_wait_locker *pfs_locker= reinterpret_cast<PFS_wait_locker*> (locker);
1967 DBUG_ASSERT(pfs_locker != NULL);
1968
1969 PFS_events_waits *wait= &pfs_locker->m_waits_current;
1970 if (wait->m_timer_state == TIMER_STATE_STARTING)
1971 {
1972 wait->m_timer_start= get_timer_value(pfs_locker->m_timer_name);
1973 wait->m_timer_state= TIMER_STATE_STARTED;
1974 }
1975 wait->m_source_file= src_file;
1976 wait->m_source_line= src_line;
1977 wait->m_number_of_bytes= count;
1978 }
1979
end_file_wait_v1(PSI_file_locker * locker,size_t count)1980 static void end_file_wait_v1(PSI_file_locker *locker,
1981 size_t count)
1982 {
1983 PFS_wait_locker *pfs_locker= reinterpret_cast<PFS_wait_locker*> (locker);
1984 DBUG_ASSERT(pfs_locker != NULL);
1985 PFS_events_waits *wait= &pfs_locker->m_waits_current;
1986
1987 wait->m_number_of_bytes= count;
1988 if (wait->m_timer_state == TIMER_STATE_STARTED)
1989 {
1990 wait->m_timer_end= get_timer_value(pfs_locker->m_timer_name);
1991 wait->m_timer_state= TIMER_STATE_TIMED;
1992 }
1993 if (flag_events_waits_history)
1994 insert_events_waits_history(wait->m_thread, wait);
1995 if (flag_events_waits_history_long)
1996 insert_events_waits_history_long(wait);
1997
1998 PFS_file *file= pfs_locker->m_target.m_file;
1999 PFS_single_stat_chain *stat= find_per_thread_file_class_wait_stat(wait->m_thread, file->m_class);
2000
2001 /* If timed then aggregate stats, else increment the value counts only */
2002 if (wait->m_timer_state == TIMER_STATE_TIMED)
2003 {
2004 ulonglong wait_time= wait->m_timer_end - wait->m_timer_start;
2005 aggregate_single_stat_chain(&file->m_wait_stat, wait_time);
2006 aggregate_single_stat_chain(stat, wait_time);
2007 }
2008 else
2009 {
2010 increment_single_stat_chain(&file->m_wait_stat);
2011 increment_single_stat_chain(stat);
2012 }
2013
2014 PFS_file_class *klass= file->m_class;
2015
2016 switch(wait->m_operation)
2017 {
2018 case OPERATION_TYPE_FILEREAD:
2019 file->m_file_stat.m_count_read++;
2020 file->m_file_stat.m_read_bytes+= count;
2021 klass->m_file_stat.m_count_read++;
2022 klass->m_file_stat.m_read_bytes+= count;
2023 break;
2024 case OPERATION_TYPE_FILEWRITE:
2025 file->m_file_stat.m_count_write++;
2026 file->m_file_stat.m_write_bytes+= count;
2027 klass->m_file_stat.m_count_write++;
2028 klass->m_file_stat.m_write_bytes+= count;
2029 break;
2030 case OPERATION_TYPE_FILECLOSE:
2031 case OPERATION_TYPE_FILESTREAMCLOSE:
2032 case OPERATION_TYPE_FILESTAT:
2033 release_file(pfs_locker->m_target.m_file);
2034 break;
2035 case OPERATION_TYPE_FILEDELETE:
2036 destroy_file(wait->m_thread, pfs_locker->m_target.m_file);
2037 break;
2038 default:
2039 break;
2040 }
2041
2042 wait->m_thread->m_wait_locker_count--;
2043 }
2044
2045 PSI_v1 PFS_v1=
2046 {
2047 register_mutex_v1,
2048 register_rwlock_v1,
2049 register_cond_v1,
2050 register_thread_v1,
2051 register_file_v1,
2052 init_mutex_v1,
2053 destroy_mutex_v1,
2054 init_rwlock_v1,
2055 destroy_rwlock_v1,
2056 init_cond_v1,
2057 destroy_cond_v1,
2058 get_table_share_v1,
2059 release_table_share_v1,
2060 open_table_v1,
2061 close_table_v1,
2062 create_file_v1,
2063 spawn_thread_v1,
2064 new_thread_v1,
2065 set_thread_id_v1,
2066 get_thread_v1,
2067 set_thread_v1,
2068 delete_current_thread_v1,
2069 delete_thread_v1,
2070 get_thread_mutex_locker_v1,
2071 get_thread_rwlock_locker_v1,
2072 get_thread_cond_locker_v1,
2073 get_thread_table_locker_v1,
2074 get_thread_file_name_locker_v1,
2075 get_thread_file_stream_locker_v1,
2076 get_thread_file_descriptor_locker_v1,
2077 unlock_mutex_v1,
2078 unlock_rwlock_v1,
2079 signal_cond_v1,
2080 broadcast_cond_v1,
2081 start_mutex_wait_v1,
2082 end_mutex_wait_v1,
2083 start_rwlock_rdwait_v1,
2084 end_rwlock_rdwait_v1,
2085 start_rwlock_wrwait_v1,
2086 end_rwlock_wrwait_v1,
2087 start_cond_wait_v1,
2088 end_cond_wait_v1,
2089 start_table_wait_v1,
2090 end_table_wait_v1,
2091 start_file_open_wait_v1,
2092 end_file_open_wait_v1,
2093 end_file_open_wait_and_bind_to_descriptor_v1,
2094 start_file_wait_v1,
2095 end_file_wait_v1
2096 };
2097
get_interface(int version)2098 static void* get_interface(int version)
2099 {
2100 switch (version)
2101 {
2102 case PSI_VERSION_1:
2103 return &PFS_v1;
2104 default:
2105 return NULL;
2106 }
2107 }
2108
2109 C_MODE_END
2110
2111 struct PSI_bootstrap PFS_bootstrap=
2112 {
2113 get_interface
2114 };
2115