1 /* Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software Foundation,
21 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22
23 /**
24 @file storage/perfschema/pfs.cc
25 The performance schema implementation of all instruments.
26 */
27 #include "my_global.h"
28 #include "thr_lock.h"
29 #include "mysql/psi/psi.h"
30 #include "mysql/psi/mysql_thread.h"
31 #include "my_pthread.h"
32 #include "sql_const.h"
33 #include "pfs.h"
34 #include "pfs_instr_class.h"
35 #include "pfs_instr.h"
36 #include "pfs_host.h"
37 #include "pfs_user.h"
38 #include "pfs_account.h"
39 #include "pfs_global.h"
40 #include "pfs_column_values.h"
41 #include "pfs_timer.h"
42 #include "pfs_events_waits.h"
43 #include "pfs_events_stages.h"
44 #include "pfs_events_statements.h"
45 #include "pfs_setup_actor.h"
46 #include "pfs_setup_object.h"
47 #include "sql_error.h"
48 #include "sp_head.h"
49 #include "pfs_digest.h"
50
51 using std::min;
52 /**
53 @page PAGE_PERFORMANCE_SCHEMA The Performance Schema main page
54 MySQL PERFORMANCE_SCHEMA implementation.
55
56 @section INTRO Introduction
57 The PERFORMANCE_SCHEMA is a way to introspect the internal execution of
58 the server at runtime.
59 The performance schema focuses primarily on performance data,
60 as opposed to the INFORMATION_SCHEMA whose purpose is to inspect metadata.
61
62 From a user point of view, the performance schema consists of:
63 - a dedicated database schema, named PERFORMANCE_SCHEMA,
64 - SQL tables, used to query the server internal state or change
65 configuration settings.
66
67 From an implementation point of view, the performance schema is a dedicated
68 Storage Engine which exposes data collected by 'Instrumentation Points'
69 placed in the server code.
70
71 @section INTERFACES Multiple interfaces
72
73 The performance schema exposes many different interfaces,
74 for different components, and for different purposes.
75
76 @subsection INT_INSTRUMENTING Instrumenting interface
77
78 All the data representing the server internal state exposed
79 in the performance schema must be first collected:
80 this is the role of the instrumenting interface.
81 The instrumenting interface is a coding interface provided
82 by implementors (of the performance schema) to implementors
83 (of the server or server components).
84
85 This interface is available to:
86 - C implementations
87 - C++ implementations
88 - the core SQL layer (/sql)
89 - the mysys library (/mysys)
90 - MySQL plugins, including storage engines,
91 - third party plugins, including third party storage engines.
92
93 For details, see the @ref PAGE_INSTRUMENTATION_INTERFACE
94 "instrumentation interface page".
95
96 @subsection INT_COMPILING Compiling interface
97
98 The implementation of the performance schema can be enabled or disabled at
99 build time, when building MySQL from the source code.
100
101 When building with the performance schema code, some compilation flags
102 are available to change the default values used in the code, if required.
103
104 For more details, see:
105 @verbatim ./configure --help @endverbatim
106
107 To compile with the performance schema:
108 @verbatim ./configure --with-perfschema @endverbatim
109
110 The implementation of all the compiling options is located in
111 @verbatim ./storage/perfschema/plug.in @endverbatim
112
113 @subsection INT_STARTUP Server startup interface
114
115 The server startup interface consists of the "./mysqld ..."
116 command line used to start the server.
117 When the performance schema is compiled in the server binary,
118 extra command line options are available.
119
120 These extra start options allow the DBA to:
121 - enable or disable the performance schema
122 - specify some sizing parameters.
123
124 To see help for the performance schema startup options, see:
125 @verbatim ./sql/mysqld --verbose --help @endverbatim
126
127 The implementation of all the startup options is located in
128 @verbatim ./sql/mysqld.cc, my_long_options[] @endverbatim
129
130 @subsection INT_BOOTSTRAP Server bootstrap interface
131
132 The bootstrap interface is a private interface exposed by
133 the performance schema, and used by the SQL layer.
134 Its role is to advertise all the SQL tables natively
135 supported by the performance schema to the SQL server.
136 The code consists of creating MySQL tables for the
137 performance schema itself, and is used in './mysql --bootstrap'
138 mode when a server is installed.
139
140 The implementation of the database creation script is located in
141 @verbatim ./scripts/mysql_system_tables.sql @endverbatim
142
143 @subsection INT_CONFIG Runtime configuration interface
144
145 When the performance schema is used at runtime, various configuration
146 parameters can be used to specify what kind of data is collected,
147 what kind of aggregations are computed, what kind of timers are used,
148 what events are timed, etc.
149
150 For all these capabilities, not a single statement or special syntax
151 was introduced in the parser.
152 Instead of new SQL statements, the interface consists of DML
153 (SELECT, INSERT, UPDATE, DELETE) against special "SETUP" tables.
154
155 For example:
156 @verbatim mysql> update performance_schema.SETUP_INSTRUMENTS
157 set ENABLED='YES', TIMED='YES';
158 Query OK, 234 rows affected (0.00 sec)
159 Rows matched: 234 Changed: 234 Warnings: 0 @endverbatim
160
161 @subsection INT_STATUS Internal audit interface
162
163 The internal audit interface is provided to the DBA to inspect if the
164 performance schema code itself is functioning properly.
165 This interface is necessary because a failure caused while
166 instrumenting code in the server should not cause failures in the
167 MySQL server itself, so that the performance schema implementation
168 never raises errors during runtime execution.
169
170 This auditing interface consists of:
171 @verbatim SHOW ENGINE PERFORMANCE_SCHEMA STATUS; @endverbatim
172 It displays data related to the memory usage of the performance schema,
173 as well as statistics about lost events, if any.
174
175 The SHOW STATUS command is implemented in
176 @verbatim ./storage/perfschema/pfs_engine_table.cc @endverbatim
177
178 @subsection INT_QUERY Query interface
179
180 The query interface is used to query the internal state of a running server.
181 It is provided as SQL tables.
182
183 For example:
184 @verbatim mysql> select * from performance_schema.EVENTS_WAITS_CURRENT;
185 @endverbatim
186
187 @section DESIGN_PRINCIPLES Design principles
188
189 @subsection PRINCIPLE_BEHAVIOR No behavior changes
190
191 The primary goal of the performance schema is to measure (instrument) the
192 execution of the server. A good measure should not cause any change
193 in behavior.
194
195 To achieve this, the overall design of the performance schema complies
196 with the following very severe design constraints:
197
198 The parser is unchanged. There are no new keywords, no new statements.
199 This guarantees that existing applications will run the same way with or
200 without the performance schema.
201
202 All the instrumentation points return "void", there are no error codes.
203 Even if the performance schema internally fails, execution of the server
204 code will proceed.
205
206 None of the instrumentation points allocate memory.
207 All the memory used by the performance schema is pre-allocated at startup,
208 and is considered "static" during the server life time.
209
210 None of the instrumentation points use any pthread_mutex, pthread_rwlock,
211 or pthread_cond (or platform equivalents).
212 Executing the instrumentation point should not cause thread scheduling to
213 change in the server.
214
215 In other words, the implementation of the instrumentation points,
216 including all the code called by the instrumentation points, is:
217 - malloc free
218 - mutex free
219 - rwlock free
220
221 TODO: All the code located in storage/perfschema is malloc free,
222 but unfortunately the usage of LF_HASH introduces some memory allocation.
223 This should be revised if possible, to use a lock-free,
224 malloc-free hash code table.
225
226 @subsection PRINCIPLE_PERFORMANCE No performance hit
227
228 The instrumentation of the server should be as fast as possible.
229 In cases when there are choices between:
230 - doing some processing when recording the performance data
231 in the instrumentation,
232 - doing some processing when retrieving the performance data,
233
234 priority is given in the design to make the instrumentation faster,
235 pushing some complexity to data retrieval.
236
237 As a result, some parts of the design, related to:
238 - the setup code path,
239 - the query code path,
240
241 might appear to be sub-optimal.
242
243 The criterion used here is to optimize primarily the critical path (data
244 collection), possibly at the expense of non-critical code paths.
245
246 @subsection PRINCIPLE_NOT_INTRUSIVE Unintrusive instrumentation
247
248 For the performance schema in general to be successful, the barrier
249 of entry for a developer should be low, so it's easy to instrument code.
250
251 In particular, the instrumentation interface:
252 - is available for C and C++ code (so it's a C interface),
253 - does not require parameters that the calling code can't easily provide,
254 - supports partial instrumentation (for example, instrumenting mutexes does
255 not require that every mutex is instrumented)
256
257 @subsection PRINCIPLE_EXTENDABLE Extendable instrumentation
258
259 As the content of the performance schema improves,
260 with more tables exposed and more data collected,
261 the instrumentation interface will also be augmented
262 to support instrumenting new concepts.
263 Existing instrumentations should not be affected when additional
264 instrumentation is made available, and making a new instrumentation
265 available should not require existing instrumented code to support it.
266
267 @subsection PRINCIPLE_VERSIONED Versioned instrumentation
268
269 Given that the instrumentation offered by the performance schema will
270 be augmented with time, when more features are implemented,
271 the interface itself should be versioned, to keep compatibility
272 with previous instrumented code.
273
274 For example, after both plugin-A and plugin-B have been instrumented for
275 mutexes, read write locks and conditions, using the instrumentation
276 interface, we can anticipate that the instrumentation interface
277 is expanded to support file based operations.
278
279 Plugin-A, a file based storage engine, will most likely use the expanded
280 interface and instrument its file usage, using the version 2
281 interface, while Plugin-B, a network based storage engine, will not change
282 its code and not release a new binary.
283
284 When later the instrumentation interface is expanded to support network
285 based operations (which will define interface version 3), the Plugin-B code
286 can then be changed to make use of it.
287
288 Note, this is just an example to illustrate the design concept here.
289 Both mutexes and file instrumentation are already available
290 since version 1 of the instrumentation interface.
291
292 @subsection PRINCIPLE_DEPLOYMENT Easy deployment
293
294 Internally, we might want every plugin implementation to upgrade the
295 instrumented code to the latest available, but this will cause additional
296 work and this is not practical if the code change is monolithic.
297
298 Externally, for third party plugin implementors, asking implementors to
299 always stay aligned to the latest instrumentation and make new releases,
300 even when the change does not provide new functionality for them,
301 is a bad idea.
302
303 For example, requiring a network based engine to re-release because the
304 instrumentation interface changed for file based operations, will create
305 too many deployment issues.
306
307 So, the performance schema implementation must support concurrently,
308 in the same deployment, multiple versions of the instrumentation
309 interface, and ensure binary compatibility with each version.
310
311 In addition to this, the performance schema can be included or excluded
312 from the server binary, using build time configuration options.
313
314 Regardless, the following types of deployment are valid:
315 - a server supporting the performance schema + a storage engine
316 that is not instrumented
317 - a server not supporting the performance schema + a storage engine
318 that is instrumented
319 */
320
321 /**
322 @page PAGE_INSTRUMENTATION_INTERFACE Performance schema: instrumentation interface page.
323 MySQL performance schema instrumentation interface.
324
325 @section INTRO Introduction
326
327 The instrumentation interface consist of two layers:
328 - a raw ABI (Application Binary Interface) layer, that exposes the primitive
329 instrumentation functions exported by the performance schema instrumentation
330 - an API (Application Programing Interface) layer,
331 that provides many helpers for a developer instrumenting some code,
332 to make the instrumentation as easy as possible.
333
334 The ABI layer consists of:
335 @code
336 #include "mysql/psi/psi.h"
337 @endcode
338
339 The API layer consists of:
340 @code
341 #include "mysql/psi/mutex_mutex.h"
342 #include "mysql/psi/mutex_file.h"
343 @endcode
344
345 The first helper is for mutexes, rwlocks and conditions,
346 the second for file io.
347
348 The API layer exposes C macros and typedefs which will expand:
349 - either to non-instrumented code, when compiled without the performance
350 schema instrumentation
351 - or to instrumented code, that will issue the raw calls to the ABI layer
352 so that the implementation can collect data.
353
354 Note that all the names introduced (for example, @c mysql_mutex_lock) do not
355 collide with any other namespace.
356 In particular, the macro @c mysql_mutex_lock is on purpose not named
357 @c pthread_mutex_lock.
358 This is to:
359 - avoid overloading @c pthread_mutex_lock with yet another macro,
360 which is dangerous as it can affect user code and pollute
361 the end-user namespace.
362 - allow the developer instrumenting code to selectively instrument
363 some code but not all.
364
365 @section PRINCIPLES Design principles
366
367 The ABI part is designed as a facade, that exposes basic primitives.
368 The expectation is that each primitive will be very stable over time,
369 but the list will constantly grow when more instruments are supported.
370 To support binary compatibility with plugins compiled with a different
371 version of the instrumentation, the ABI itself is versioned
372 (see @c PSI_v1, @c PSI_v2).
373
374 For a given instrumentation point in the API, the basic coding pattern
375 used is:
376 - (a) notify the performance schema of the operation
377 about to be performed.
378 - (b) execute the instrumented code.
379 - (c) notify the performance schema that the operation
380 is completed.
381
382 An opaque "locker" pointer is returned by (a), that is given to (c).
383 This pointer helps the implementation to keep context, for performances.
384
385 The following code fragment is annotated to show how in detail this pattern
386 in implemented, when the instrumentation is compiled in:
387
388 @verbatim
389 static inline int mysql_mutex_lock(
390 mysql_mutex_t *that, myf flags, const char *src_file, uint src_line)
391 {
392 int result;
393 struct PSI_mutex_locker_state state;
394 struct PSI_mutex_locker *locker= NULL;
395
396 ............... (a)
397 locker= PSI_server->start_mutex_wait(&state, that->p_psi,
398 PSI_MUTEX_LOCK, locker, src_file, src_line);
399
400 ............... (b)
401 result= pthread_mutex_lock(&that->m_mutex);
402
403 ............... (c)
404 PSI_server->end_mutex_wait(locker, result);
405
406 return result;
407 }
408 @endverbatim
409
410 When the performance schema instrumentation is not compiled in,
411 the code becomes simply a wrapper, expanded in line by the compiler:
412
413 @verbatim
414 static inline int mysql_mutex_lock(...)
415 {
416 int result;
417
418 ............... (b)
419 result= pthread_mutex_lock(&that->m_mutex);
420
421 return result;
422 }
423 @endverbatim
424 */
425
426 /**
427 @page PAGE_AGGREGATES Performance schema: the aggregates page.
428 Performance schema aggregates.
429
430 @section INTRO Introduction
431
432 Aggregates tables are tables that can be formally defined as
433 SELECT ... from EVENTS_WAITS_HISTORY_INFINITE ... group by 'group clause'.
434
435 Each group clause defines a different kind of aggregate, and corresponds to
436 a different table exposed by the performance schema.
437
438 Aggregates can be either:
439 - computed on the fly,
440 - computed on demand, based on other available data.
441
442 'EVENTS_WAITS_HISTORY_INFINITE' is a table that does not exist,
443 the best approximation is EVENTS_WAITS_HISTORY_LONG.
444 Aggregates computed on the fly in fact are based on EVENTS_WAITS_CURRENT,
445 while aggregates computed on demand are based on other
446 EVENTS_WAITS_SUMMARY_BY_xxx tables.
447
448 To better understand the implementation itself, a bit of math is
449 required first, to understand the model behind the code:
450 the code is deceptively simple, the real complexity resides
451 in the flyweight of pointers between various performance schema buffers.
452
453 @section DIMENSION Concept of dimension
454
455 An event measured by the instrumentation has many attributes.
456 An event is represented as a data point P(x1, x2, ..., xN),
457 where each x_i coordinate represents a given attribute value.
458
459 Examples of attributes are:
460 - the time waited
461 - the object waited on
462 - the instrument waited on
463 - the thread that waited
464 - the operation performed
465 - per object or per operation additional attributes, such as spins,
466 number of bytes, etc.
467
468 Computing an aggregate per thread is fundamentally different from
469 computing an aggregate by instrument, so the "_BY_THREAD" and
470 "_BY_EVENT_NAME" aggregates are different dimensions,
471 operating on different x_i and x_j coordinates.
472 These aggregates are "orthogonal".
473
474 @section PROJECTION Concept of projection
475
476 A given x_i attribute value can convey either just one basic information,
477 such as a number of bytes, or can convey implied information,
478 such as an object fully qualified name.
479
480 For example, from the value "test.t1", the name of the object schema
481 "test" can be separated from the object name "t1", so that now aggregates
482 by object schema can be implemented.
483
484 In math terms, that corresponds to defining a function:
485 F_i (x): x --> y
486 Applying this function to our point P gives another point P':
487
488 F_i (P):
489 P(x1, x2, ..., x{i-1}, x_i, x{i+1}, ..., x_N)
490 --> P' (x1, x2, ..., x{i-1}, f_i(x_i), x{i+1}, ..., x_N)
491
492 That function defines in fact an aggregate !
493 In SQL terms, this aggregate would look like the following table:
494
495 @verbatim
496 CREATE VIEW EVENTS_WAITS_SUMMARY_BY_Func_i AS
497 SELECT col_1, col_2, ..., col_{i-1},
498 Func_i(col_i),
499 COUNT(col_i),
500 MIN(col_i), AVG(col_i), MAX(col_i), -- if col_i is a numeric value
501 col_{i+1}, ..., col_N
502 FROM EVENTS_WAITS_HISTORY_INFINITE
503 group by col_1, col_2, ..., col_{i-1}, col{i+1}, ..., col_N.
504 @endverbatim
505
506 Note that not all columns have to be included,
507 in particular some columns that are dependent on the x_i column should
508 be removed, so that in practice, MySQL's aggregation method tends to
509 remove many attributes at each aggregation steps.
510
511 For example, when aggregating wait events by object instances,
512 - the wait_time and number_of_bytes can be summed,
513 and sum(wait_time) now becomes an object instance attribute.
514 - the source, timer_start, timer_end columns are not in the
515 _BY_INSTANCE table, because these attributes are only
516 meaningful for a wait.
517
518 @section COMPOSITION Concept of composition
519
520 Now, the "test.t1" --> "test" example was purely theory,
521 just to explain the concept, and does not lead very far.
522 Let's look at a more interesting example of data that can be derived
523 from the row event.
524
525 An event creates a transient object, PFS_wait_locker, per operation.
526 This object's life cycle is extremely short: it's created just
527 before the start_wait() instrumentation call, and is destroyed in
528 the end_wait() call.
529
530 The wait locker itself contains a pointer to the object instance
531 waited on.
532 That allows to implement a wait_locker --> object instance projection,
533 with m_target.
534 The object instance life cycle depends on _init and _destroy calls
535 from the code, such as mysql_mutex_init()
536 and mysql_mutex_destroy() for a mutex.
537
538 The object instance waited on contains a pointer to the object class,
539 which is represented by the instrument name.
540 That allows to implement an object instance --> object class projection.
541 The object class life cycle is permanent, as instruments are loaded in
542 the server and never removed.
543
544 The object class is named in such a way
545 (for example, "wait/sync/mutex/sql/LOCK_open",
546 "wait/io/file/maria/data_file) that the component ("sql", "maria")
547 that it belongs to can be inferred.
548 That allows to implement an object class --> server component projection.
549
550 Back to math again, we have, for example for mutexes:
551
552 F1 (l) : PFS_wait_locker l --> PFS_mutex m = l->m_target.m_mutex
553
554 F1_to_2 (m) : PFS_mutex m --> PFS_mutex_class i = m->m_class
555
556 F2_to_3 (i) : PFS_mutex_class i --> const char *component =
557 substring(i->m_name, ...)
558
559 Per components aggregates are not implemented, this is just an illustration.
560
561 F1 alone defines this aggregate:
562
563 EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_INSTANCE
564 (or MUTEX_INSTANCE)
565
566 F1_to_2 alone could define this aggregate:
567
568 EVENTS_WAITS_SUMMARY_BY_INSTANCE --> EVENTS_WAITS_SUMMARY_BY_EVENT_NAME
569
570 Alternatively, using function composition, with
571 F2 = F1_to_2 o F1, F2 defines:
572
573 EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_EVENT_NAME
574
575 Likewise, F_2_to_3 defines:
576
577 EVENTS_WAITS_SUMMARY_BY_EVENT_NAME --> EVENTS_WAITS_SUMMARY_BY_COMPONENT
578
579 and F3 = F_2_to_3 o F_1_to_2 o F1 defines:
580
581 EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_COMPONENT
582
583 What has all this to do with the code ?
584
585 Functions (or aggregates) such as F_3 are not implemented as is.
586 Instead, they are decomposed into F_2_to_3 o F_1_to_2 o F1,
587 and each intermediate aggregate is stored into an internal buffer.
588 This allows to support every F1, F2, F3 aggregates from shared
589 internal buffers, where computation already performed to compute F2
590 is reused when computing F3.
591
592 @section OBJECT_GRAPH Object graph
593
594 In terms of object instances, or records, pointers between
595 different buffers define an object instance graph.
596
597 For example, assuming the following scenario:
598 - A mutex class "M" is instrumented, the instrument name
599 is "wait/sync/mutex/sql/M"
600 - This mutex instrument has been instantiated twice,
601 mutex instances are noted M-1 and M-2
602 - Threads T-A and T-B are locking mutex instance M-1
603 - Threads T-C and T-D are locking mutex instance M-2
604
605 The performance schema will record the following data:
606 - EVENTS_WAITS_CURRENT has 4 rows, one for each mutex locker
607 - EVENTS_WAITS_SUMMARY_BY_INSTANCE shows 2 rows, for M-1 and M-2
608 - EVENTS_WAITS_SUMMARY_BY_EVENT_NAME shows 1 row, for M
609
610 The graph of structures will look like:
611
612 @verbatim
613 PFS_wait_locker (T-A, M-1) ----------
614 |
615 v
616 PFS_mutex (M-1)
617 - m_wait_stat ------------
618 ^ |
619 | |
620 PFS_wait_locker (T-B, M-1) ---------- |
621 v
622 PFS_mutex_class (M)
623 - m_wait_stat
624 PFS_wait_locker (T-C, M-2) ---------- ^
625 | |
626 v |
627 PFS_mutex (M-2) |
628 - m_wait_stat ------------
629 ^
630 |
631 PFS_wait_locker (T-D, M-2) ----------
632
633 || || ||
634 || || ||
635 vv vv vv
636
637 EVENTS_WAITS_CURRENT ..._SUMMARY_BY_INSTANCE ..._SUMMARY_BY_EVENT_NAME
638 @endverbatim
639
640 @section ON_THE_FLY On the fly aggregates
641
642 'On the fly' aggregates are computed during the code execution.
643 This is necessary because the data the aggregate is based on is volatile,
644 and can not be kept indefinitely.
645
646 With on the fly aggregates:
647 - the writer thread does all the computation
648 - the reader thread accesses the result directly
649
650 This model is to be avoided if possible, due to the overhead
651 caused when instrumenting code.
652
653 @section HIGHER_LEVEL Higher level aggregates
654
655 'Higher level' aggregates are implemented on demand only.
656 The code executing a SELECT from the aggregate table is
657 collecting data from multiple internal buffers to produce the result.
658
659 With higher level aggregates:
660 - the reader thread does all the computation
661 - the writer thread has no overhead.
662
663 @section MIXED Mixed level aggregates
664
665 The 'Mixed' model is a compromise between 'On the fly' and 'Higher level'
666 aggregates, for internal buffers that are not permanent.
667
668 While an object is present in a buffer, the higher level model is used.
669 When an object is about to be destroyed, statistics are saved into
670 a 'parent' buffer with a longer life cycle, to follow the on the fly model.
671
672 With mixed aggregates:
673 - the reader thread does a lot of complex computation,
674 - the writer thread has minimal overhead, on destroy events.
675
676 @section IMPL_WAIT Implementation for waits aggregates
677
678 For waits, the tables that contains aggregated wait data are:
679 - EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
680 - EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME
681 - EVENTS_WAITS_SUMMARY_BY_INSTANCE
682 - EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
683 - EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME
684 - EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME
685 - FILE_SUMMARY_BY_EVENT_NAME
686 - FILE_SUMMARY_BY_INSTANCE
687 - SOCKET_SUMMARY_BY_INSTANCE
688 - SOCKET_SUMMARY_BY_EVENT_NAME
689 - OBJECTS_SUMMARY_GLOBAL_BY_TYPE
690
691 The instrumented code that generates waits events consist of:
692 - mutexes (mysql_mutex_t)
693 - rwlocks (mysql_rwlock_t)
694 - conditions (mysql_cond_t)
695 - file io (MYSQL_FILE)
696 - socket io (MYSQL_SOCKET)
697 - table io
698 - table lock
699 - idle
700
701 The flow of data between aggregates tables varies for each instrumentation.
702
703 @subsection IMPL_WAIT_MUTEX Mutex waits
704
705 @verbatim
706 mutex_locker(T, M)
707 |
708 | [1]
709 |
710 |-> pfs_mutex(M) =====>> [B], [C]
711 | |
712 | | [2]
713 | |
714 | |-> pfs_mutex_class(M.class) =====>> [C]
715 |
716 |-> pfs_thread(T).event_name(M) =====>> [A], [D], [E], [F]
717 |
718 | [3]
719 |
720 3a |-> pfs_account(U, H).event_name(M) =====>> [D], [E], [F]
721 . |
722 . | [4-RESET]
723 . |
724 3b .....+-> pfs_user(U).event_name(M) =====>> [E]
725 . |
726 3c .....+-> pfs_host(H).event_name(M) =====>> [F]
727 @endverbatim
728
729 How to read this diagram:
730 - events that occur during the instrumented code execution are noted with numbers,
731 as in [1]. Code executed by these events has an impact on overhead.
732 - events that occur during TRUNCATE TABLE operations are noted with numbers,
733 followed by "-RESET", as in [4-RESET].
734 Code executed by these events has no impact on overhead,
735 since they are executed by independent monitoring sessions.
736 - events that occur when a reader extracts data from a performance schema table
737 are noted with letters, as in [A]. The name of the table involved,
738 and the method that builds a row are documented. Code executed by these events
739 has no impact on the instrumentation overhead. Note that the table
740 implementation may pull data from different buffers.
741 - nominal code paths are in plain lines. A "nominal" code path corresponds to
742 cases where the performance schema buffers are sized so that no records are lost.
743 - degenerated code paths are in dotted lines. A "degenerated" code path corresponds
744 to edge cases where parent buffers are full, which forces the code to aggregate to
745 grand parents directly.
746
747 Implemented as:
748 - [1] @c start_mutex_wait_v1(), @c end_mutex_wait_v1()
749 - [2] @c destroy_mutex_v1()
750 - [3] @c aggregate_thread_waits()
751 - [4] @c PFS_account::aggregate_waits()
752 - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
753 @c table_ews_by_thread_by_event_name::make_row()
754 - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
755 @c table_events_waits_summary_by_instance::make_mutex_row()
756 - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
757 @c table_ews_global_by_event_name::make_mutex_row()
758 - [D] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
759 @c table_ews_by_account_by_event_name::make_row()
760 - [E] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
761 @c table_ews_by_user_by_event_name::make_row()
762 - [F] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
763 @c table_ews_by_host_by_event_name::make_row()
764
765 Table EVENTS_WAITS_SUMMARY_BY_INSTANCE is a 'on the fly' aggregate,
766 because the data is collected on the fly by (1) and stored into a buffer,
767 pfs_mutex. The table implementation [B] simply reads the results directly
768 from this buffer.
769
770 Table EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME is a 'mixed' aggregate,
771 because some data is collected on the fly (1),
772 some data is preserved with (2) at a later time in the life cycle,
773 and two different buffers pfs_mutex and pfs_mutex_class are used to store the
774 statistics collected. The table implementation [C] is more complex, since
775 it reads from two buffers pfs_mutex and pfs_mutex_class.
776
777 @subsection IMPL_WAIT_RWLOCK Rwlock waits
778
779 @verbatim
780 rwlock_locker(T, R)
781 |
782 | [1]
783 |
784 |-> pfs_rwlock(R) =====>> [B], [C]
785 | |
786 | | [2]
787 | |
788 | |-> pfs_rwlock_class(R.class) =====>> [C]
789 |
790 |-> pfs_thread(T).event_name(R) =====>> [A]
791 |
792 ...
793 @endverbatim
794
795 Implemented as:
796 - [1] @c start_rwlock_rdwait_v1(), @c end_rwlock_rdwait_v1(), ...
797 - [2] @c destroy_rwlock_v1()
798 - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
799 @c table_ews_by_thread_by_event_name::make_row()
800 - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
801 @c table_events_waits_summary_by_instance::make_rwlock_row()
802 - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
803 @c table_ews_global_by_event_name::make_rwlock_row()
804
805 @subsection IMPL_WAIT_COND Cond waits
806
807 @verbatim
808 cond_locker(T, C)
809 |
810 | [1]
811 |
812 |-> pfs_cond(C) =====>> [B], [C]
813 | |
814 | | [2]
815 | |
816 | |-> pfs_cond_class(C.class) =====>> [C]
817 |
818 |-> pfs_thread(T).event_name(C) =====>> [A]
819 |
820 ...
821 @endverbatim
822
823 Implemented as:
824 - [1] @c start_cond_wait_v1(), @c end_cond_wait_v1()
825 - [2] @c destroy_cond_v1()
826 - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
827 @c table_ews_by_thread_by_event_name::make_row()
828 - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
829 @c table_events_waits_summary_by_instance::make_cond_row()
830 - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
831 @c table_ews_global_by_event_name::make_cond_row()
832
833 @subsection IMPL_WAIT_FILE File waits
834
835 @verbatim
836 file_locker(T, F)
837 |
838 | [1]
839 |
840 |-> pfs_file(F) =====>> [B], [C], [D], [E]
841 | |
842 | | [2]
843 | |
844 | |-> pfs_file_class(F.class) =====>> [C], [D]
845 |
846 |-> pfs_thread(T).event_name(F) =====>> [A]
847 |
848 ...
849 @endverbatim
850
851 Implemented as:
852 - [1] @c get_thread_file_name_locker_v1(), @c start_file_wait_v1(),
853 @c end_file_wait_v1(), ...
854 - [2] @c close_file_v1()
855 - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
856 @c table_ews_by_thread_by_event_name::make_row()
857 - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
858 @c table_events_waits_summary_by_instance::make_file_row()
859 - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
860 @c table_ews_global_by_event_name::make_file_row()
861 - [D] FILE_SUMMARY_BY_EVENT_NAME,
862 @c table_file_summary_by_event_name::make_row()
863 - [E] FILE_SUMMARY_BY_INSTANCE,
864 @c table_file_summary_by_instance::make_row()
865
866 @subsection IMPL_WAIT_SOCKET Socket waits
867
868 @verbatim
869 socket_locker(T, S)
870 |
871 | [1]
872 |
873 |-> pfs_socket(S) =====>> [A], [B], [C], [D], [E]
874 |
875 | [2]
876 |
877 |-> pfs_socket_class(S.class) =====>> [C], [D]
878 |
879 |-> pfs_thread(T).event_name(S) =====>> [A]
880 |
881 | [3]
882 |
883 3a |-> pfs_account(U, H).event_name(S) =====>> [F], [G], [H]
884 . |
885 . | [4-RESET]
886 . |
887 3b .....+-> pfs_user(U).event_name(S) =====>> [G]
888 . |
889 3c .....+-> pfs_host(H).event_name(S) =====>> [H]
890 @endverbatim
891
892 Implemented as:
893 - [1] @c start_socket_wait_v1(), @c end_socket_wait_v1().
894 - [2] @c close_socket_v1()
895 - [3] @c aggregate_thread_waits()
896 - [4] @c PFS_account::aggregate_waits()
897 - [5] @c PFS_host::aggregate_waits()
898 - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
899 @c table_ews_by_thread_by_event_name::make_row()
900 - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
901 @c table_events_waits_summary_by_instance::make_socket_row()
902 - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
903 @c table_ews_global_by_event_name::make_socket_row()
904 - [D] SOCKET_SUMMARY_BY_EVENT_NAME,
905 @c table_socket_summary_by_event_name::make_row()
906 - [E] SOCKET_SUMMARY_BY_INSTANCE,
907 @c table_socket_summary_by_instance::make_row()
908 - [F] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
909 @c table_ews_by_account_by_event_name::make_row()
910 - [G] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
911 @c table_ews_by_user_by_event_name::make_row()
912 - [H] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
913 @c table_ews_by_host_by_event_name::make_row()
914
915 @subsection IMPL_WAIT_TABLE Table waits
916
917 @verbatim
918 table_locker(Thread Th, Table Tb, Event = io or lock)
919 |
920 | [1]
921 |
922 1a |-> pfs_table(Tb) =====>> [A], [B], [C]
923 | |
924 | | [2]
925 | |
926 | |-> pfs_table_share(Tb.share) =====>> [B], [C]
927 | |
928 | | [3]
929 | |
930 | |-> global_table_io_stat =====>> [C]
931 | |
932 | |-> global_table_lock_stat =====>> [C]
933 |
934 1b |-> pfs_thread(Th).event_name(E) =====>> [D], [E], [F], [G]
935 | |
936 | | [ 4-RESET]
937 | |
938 | |-> pfs_account(U, H).event_name(E) =====>> [E], [F], [G]
939 | . |
940 | . | [5-RESET]
941 | . |
942 | .....+-> pfs_user(U).event_name(E) =====>> [F]
943 | . |
944 | .....+-> pfs_host(H).event_name(E) =====>> [G]
945 |
946 1c |-> pfs_thread(Th).waits_current(W) =====>> [H]
947 |
948 1d |-> pfs_thread(Th).waits_history(W) =====>> [I]
949 |
950 1e |-> waits_history_long(W) =====>> [J]
951 @endverbatim
952
953 Implemented as:
954 - [1] @c start_table_io_wait_v1(), @c end_table_io_wait_v1()
955 - [2] @c close_table_v1()
956 - [3] @c drop_table_share_v1()
957 - [4] @c TRUNCATE TABLE EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
958 - [5] @c TRUNCATE TABLE EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
959 - [A] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
960 @c table_events_waits_summary_by_instance::make_table_row()
961 - [B] OBJECTS_SUMMARY_GLOBAL_BY_TYPE,
962 @c table_os_global_by_type::make_row()
963 - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
964 @c table_ews_global_by_event_name::make_table_io_row(),
965 @c table_ews_global_by_event_name::make_table_lock_row()
966 - [D] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
967 @c table_ews_by_thread_by_event_name::make_row()
968 - [E] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
969 @c table_ews_by_user_by_account_name::make_row()
970 - [F] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
971 @c table_ews_by_user_by_event_name::make_row()
972 - [G] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
973 @c table_ews_by_host_by_event_name::make_row()
974 - [H] EVENTS_WAITS_CURRENT,
975 @c table_events_waits_common::make_row()
976 - [I] EVENTS_WAITS_HISTORY,
977 @c table_events_waits_common::make_row()
978 - [J] EVENTS_WAITS_HISTORY_LONG,
979 @c table_events_waits_common::make_row()
980
981 @section IMPL_STAGE Implementation for stages aggregates
982
983 For stages, the tables that contains aggregated data are:
984 - EVENTS_STAGES_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
985 - EVENTS_STAGES_SUMMARY_BY_HOST_BY_EVENT_NAME
986 - EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME
987 - EVENTS_STAGES_SUMMARY_BY_USER_BY_EVENT_NAME
988 - EVENTS_STAGES_SUMMARY_GLOBAL_BY_EVENT_NAME
989
990 @verbatim
991 start_stage(T, S)
992 |
993 | [1]
994 |
995 1a |-> pfs_thread(T).event_name(S) =====>> [A], [B], [C], [D], [E]
996 | |
997 | | [2]
998 | |
999 | 2a |-> pfs_account(U, H).event_name(S) =====>> [B], [C], [D], [E]
1000 | . |
1001 | . | [3-RESET]
1002 | . |
1003 | 2b .....+-> pfs_user(U).event_name(S) =====>> [C]
1004 | . |
1005 | 2c .....+-> pfs_host(H).event_name(S) =====>> [D], [E]
1006 | . . |
1007 | . . | [4-RESET]
1008 | 2d . . |
1009 1b |----+----+----+-> pfs_stage_class(S) =====>> [E]
1010
1011 @endverbatim
1012
1013 Implemented as:
1014 - [1] @c start_stage_v1()
1015 - [2] @c delete_thread_v1(), @c aggregate_thread_stages()
1016 - [3] @c PFS_account::aggregate_stages()
1017 - [4] @c PFS_host::aggregate_stages()
1018 - [A] EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME,
1019 @c table_esgs_by_thread_by_event_name::make_row()
1020 - [B] EVENTS_STAGES_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
1021 @c table_esgs_by_account_by_event_name::make_row()
1022 - [C] EVENTS_STAGES_SUMMARY_BY_USER_BY_EVENT_NAME,
1023 @c table_esgs_by_user_by_event_name::make_row()
1024 - [D] EVENTS_STAGES_SUMMARY_BY_HOST_BY_EVENT_NAME,
1025 @c table_esgs_by_host_by_event_name::make_row()
1026 - [E] EVENTS_STAGES_SUMMARY_GLOBAL_BY_EVENT_NAME,
1027 @c table_esgs_global_by_event_name::make_row()
1028
1029 @section IMPL_STATEMENT Implementation for statements consumers
1030
1031 For statements, the tables that contains individual event data are:
1032 - EVENTS_STATEMENTS_CURRENT
1033 - EVENTS_STATEMENTS_HISTORY
1034 - EVENTS_STATEMENTS_HISTORY_LONG
1035
1036 For statements, the tables that contains aggregated data are:
1037 - EVENTS_STATEMENTS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
1038 - EVENTS_STATEMENTS_SUMMARY_BY_HOST_BY_EVENT_NAME
1039 - EVENTS_STATEMENTS_SUMMARY_BY_THREAD_BY_EVENT_NAME
1040 - EVENTS_STATEMENTS_SUMMARY_BY_USER_BY_EVENT_NAME
1041 - EVENTS_STATEMENTS_SUMMARY_GLOBAL_BY_EVENT_NAME
1042 - EVENTS_STATEMENTS_SUMMARY_BY_DIGEST
1043
1044 @verbatim
1045 statement_locker(T, S)
1046 |
1047 | [1]
1048 |
1049 1a |-> pfs_thread(T).event_name(S) =====>> [A], [B], [C], [D], [E]
1050 | |
1051 | | [2]
1052 | |
1053 | 2a |-> pfs_account(U, H).event_name(S) =====>> [B], [C], [D], [E]
1054 | . |
1055 | . | [3-RESET]
1056 | . |
1057 | 2b .....+-> pfs_user(U).event_name(S) =====>> [C]
1058 | . |
1059 | 2c .....+-> pfs_host(H).event_name(S) =====>> [D], [E]
1060 | . . |
1061 | . . | [4-RESET]
1062 | 2d . . |
1063 1b |----+----+----+-> pfs_statement_class(S) =====>> [E]
1064 |
1065 1c |-> pfs_thread(T).statement_current(S) =====>> [F]
1066 |
1067 1d |-> pfs_thread(T).statement_history(S) =====>> [G]
1068 |
1069 1e |-> statement_history_long(S) =====>> [H]
1070 |
1071 1f |-> statement_digest(S) =====>> [I]
1072
1073 @endverbatim
1074
1075 Implemented as:
1076 - [1] @c start_statement_v1(), end_statement_v1()
1077 (1a, 1b) is an aggregation by EVENT_NAME,
1078 (1c, 1d, 1e) is an aggregation by TIME,
1079 (1f) is an aggregation by DIGEST
1080 all of these are orthogonal,
1081 and implemented in end_statement_v1().
1082 - [2] @c delete_thread_v1(), @c aggregate_thread_statements()
1083 - [3] @c PFS_account::aggregate_statements()
1084 - [4] @c PFS_host::aggregate_statements()
1085 - [A] EVENTS_STATEMENTS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
1086 @c table_esms_by_thread_by_event_name::make_row()
1087 - [B] EVENTS_STATEMENTS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
1088 @c table_esms_by_account_by_event_name::make_row()
1089 - [C] EVENTS_STATEMENTS_SUMMARY_BY_USER_BY_EVENT_NAME,
1090 @c table_esms_by_user_by_event_name::make_row()
1091 - [D] EVENTS_STATEMENTS_SUMMARY_BY_HOST_BY_EVENT_NAME,
1092 @c table_esms_by_host_by_event_name::make_row()
1093 - [E] EVENTS_STATEMENTS_SUMMARY_GLOBAL_BY_EVENT_NAME,
1094 @c table_esms_global_by_event_name::make_row()
1095 - [F] EVENTS_STATEMENTS_CURRENT,
1096 @c table_events_statements_current::rnd_next(),
1097 @c table_events_statements_common::make_row()
1098 - [G] EVENTS_STATEMENTS_HISTORY,
1099 @c table_events_statements_history::rnd_next(),
1100 @c table_events_statements_common::make_row()
1101 - [H] EVENTS_STATEMENTS_HISTORY_LONG,
1102 @c table_events_statements_history_long::rnd_next(),
1103 @c table_events_statements_common::make_row()
1104 - [I] EVENTS_STATEMENTS_SUMMARY_BY_DIGEST
1105 @c table_esms_by_digest::make_row()
1106 */
1107
1108 /**
1109 @defgroup Performance_schema Performance Schema
1110 The performance schema component.
1111 For details, see the
1112 @ref PAGE_PERFORMANCE_SCHEMA "performance schema main page".
1113
1114 @defgroup Performance_schema_implementation Performance Schema Implementation
1115 @ingroup Performance_schema
1116
1117 @defgroup Performance_schema_tables Performance Schema Tables
1118 @ingroup Performance_schema_implementation
1119 */
1120
1121 pthread_key(PFS_thread*, THR_PFS);
1122 bool THR_PFS_initialized= false;
1123
1124 /**
1125 Conversion map from PSI_mutex_operation to enum_operation_type.
1126 Indexed by enum PSI_mutex_operation.
1127 */
1128 static enum_operation_type mutex_operation_map[]=
1129 {
1130 OPERATION_TYPE_LOCK,
1131 OPERATION_TYPE_TRYLOCK
1132 };
1133
1134 /**
1135 Conversion map from PSI_rwlock_operation to enum_operation_type.
1136 Indexed by enum PSI_rwlock_operation.
1137 */
1138 static enum_operation_type rwlock_operation_map[]=
1139 {
1140 OPERATION_TYPE_READLOCK,
1141 OPERATION_TYPE_WRITELOCK,
1142 OPERATION_TYPE_TRYREADLOCK,
1143 OPERATION_TYPE_TRYWRITELOCK
1144 };
1145
1146 /**
1147 Conversion map from PSI_cond_operation to enum_operation_type.
1148 Indexed by enum PSI_cond_operation.
1149 */
1150 static enum_operation_type cond_operation_map[]=
1151 {
1152 OPERATION_TYPE_WAIT,
1153 OPERATION_TYPE_TIMEDWAIT
1154 };
1155
1156 /**
1157 Conversion map from PSI_file_operation to enum_operation_type.
1158 Indexed by enum PSI_file_operation.
1159 */
1160 static enum_operation_type file_operation_map[]=
1161 {
1162 OPERATION_TYPE_FILECREATE,
1163 OPERATION_TYPE_FILECREATETMP,
1164 OPERATION_TYPE_FILEOPEN,
1165 OPERATION_TYPE_FILESTREAMOPEN,
1166 OPERATION_TYPE_FILECLOSE,
1167 OPERATION_TYPE_FILESTREAMCLOSE,
1168 OPERATION_TYPE_FILEREAD,
1169 OPERATION_TYPE_FILEWRITE,
1170 OPERATION_TYPE_FILESEEK,
1171 OPERATION_TYPE_FILETELL,
1172 OPERATION_TYPE_FILEFLUSH,
1173 OPERATION_TYPE_FILESTAT,
1174 OPERATION_TYPE_FILEFSTAT,
1175 OPERATION_TYPE_FILECHSIZE,
1176 OPERATION_TYPE_FILEDELETE,
1177 OPERATION_TYPE_FILERENAME,
1178 OPERATION_TYPE_FILESYNC
1179 };
1180
1181 /**
1182 Conversion map from PSI_table_operation to enum_operation_type.
1183 Indexed by enum PSI_table_io_operation.
1184 */
1185 static enum_operation_type table_io_operation_map[]=
1186 {
1187 OPERATION_TYPE_TABLE_FETCH,
1188 OPERATION_TYPE_TABLE_WRITE_ROW,
1189 OPERATION_TYPE_TABLE_UPDATE_ROW,
1190 OPERATION_TYPE_TABLE_DELETE_ROW
1191 };
1192
1193 /**
1194 Conversion map from enum PFS_TL_LOCK_TYPE to enum_operation_type.
1195 Indexed by enum PFS_TL_LOCK_TYPE.
1196 */
1197 static enum_operation_type table_lock_operation_map[]=
1198 {
1199 OPERATION_TYPE_TL_READ_NORMAL, /* PFS_TL_READ */
1200 OPERATION_TYPE_TL_READ_WITH_SHARED_LOCKS, /* PFS_TL_READ_WITH_SHARED_LOCKS */
1201 OPERATION_TYPE_TL_READ_HIGH_PRIORITY, /* PFS_TL_READ_HIGH_PRIORITY */
1202 OPERATION_TYPE_TL_READ_NO_INSERTS, /* PFS_TL_READ_NO_INSERT */
1203 OPERATION_TYPE_TL_WRITE_ALLOW_WRITE, /* PFS_TL_WRITE_ALLOW_WRITE */
1204 OPERATION_TYPE_TL_WRITE_CONCURRENT_INSERT, /* PFS_TL_WRITE_CONCURRENT_INSERT */
1205 OPERATION_TYPE_TL_WRITE_DELAYED, /* PFS_TL_WRITE_DELAYED */
1206 OPERATION_TYPE_TL_WRITE_LOW_PRIORITY, /* PFS_TL_WRITE_LOW_PRIORITY */
1207 OPERATION_TYPE_TL_WRITE_NORMAL, /* PFS_TL_WRITE */
1208 OPERATION_TYPE_TL_READ_EXTERNAL, /* PFS_TL_READ_EXTERNAL */
1209 OPERATION_TYPE_TL_WRITE_EXTERNAL /* PFS_TL_WRITE_EXTERNAL */
1210 };
1211
1212 /**
1213 Conversion map from PSI_socket_operation to enum_operation_type.
1214 Indexed by enum PSI_socket_operation.
1215 */
1216 static enum_operation_type socket_operation_map[]=
1217 {
1218 OPERATION_TYPE_SOCKETCREATE,
1219 OPERATION_TYPE_SOCKETCONNECT,
1220 OPERATION_TYPE_SOCKETBIND,
1221 OPERATION_TYPE_SOCKETCLOSE,
1222 OPERATION_TYPE_SOCKETSEND,
1223 OPERATION_TYPE_SOCKETRECV,
1224 OPERATION_TYPE_SOCKETSENDTO,
1225 OPERATION_TYPE_SOCKETRECVFROM,
1226 OPERATION_TYPE_SOCKETSENDMSG,
1227 OPERATION_TYPE_SOCKETRECVMSG,
1228 OPERATION_TYPE_SOCKETSEEK,
1229 OPERATION_TYPE_SOCKETOPT,
1230 OPERATION_TYPE_SOCKETSTAT,
1231 OPERATION_TYPE_SOCKETSHUTDOWN,
1232 OPERATION_TYPE_SOCKETSELECT
1233 };
1234
1235 /**
1236 Build the prefix name of a class of instruments in a category.
1237 For example, this function builds the string 'wait/sync/mutex/sql/' from
1238 a prefix 'wait/sync/mutex' and a category 'sql'.
1239 This prefix is used later to build each instrument name, such as
1240 'wait/sync/mutex/sql/LOCK_open'.
1241 @param prefix Prefix for this class of instruments
1242 @param category Category name
1243 @param [out] output Buffer of length PFS_MAX_INFO_NAME_LENGTH.
1244 @param [out] output_length Length of the resulting output string.
1245 @return 0 for success, non zero for errors
1246 */
build_prefix(const LEX_STRING * prefix,const char * category,char * output,int * output_length)1247 static int build_prefix(const LEX_STRING *prefix, const char *category,
1248 char *output, int *output_length)
1249 {
1250 int len= strlen(category);
1251 char *out_ptr= output;
1252 int prefix_length= prefix->length;
1253
1254 if (unlikely((prefix_length + len + 1) >=
1255 PFS_MAX_FULL_PREFIX_NAME_LENGTH))
1256 {
1257 pfs_print_error("build_prefix: prefix+category is too long <%s> <%s>\n",
1258 prefix->str, category);
1259 return 1;
1260 }
1261
1262 if (unlikely(strchr(category, '/') != NULL))
1263 {
1264 pfs_print_error("build_prefix: invalid category <%s>\n",
1265 category);
1266 return 1;
1267 }
1268
1269 /* output = prefix + category + '/' */
1270 memcpy(out_ptr, prefix->str, prefix_length);
1271 out_ptr+= prefix_length;
1272 memcpy(out_ptr, category, len);
1273 out_ptr+= len;
1274 *out_ptr= '/';
1275 out_ptr++;
1276 *output_length= out_ptr - output;
1277
1278 return 0;
1279 }
1280
1281 #define REGISTER_BODY_V1(KEY_T, PREFIX, REGISTER_FUNC) \
1282 KEY_T key; \
1283 char formatted_name[PFS_MAX_INFO_NAME_LENGTH]; \
1284 int prefix_length; \
1285 int len; \
1286 int full_length; \
1287 \
1288 DBUG_ASSERT(category != NULL); \
1289 DBUG_ASSERT(info != NULL); \
1290 if (unlikely(build_prefix(&PREFIX, category, \
1291 formatted_name, &prefix_length))) \
1292 { \
1293 for (; count>0; count--, info++) \
1294 *(info->m_key)= 0; \
1295 return ; \
1296 } \
1297 \
1298 for (; count>0; count--, info++) \
1299 { \
1300 DBUG_ASSERT(info->m_key != NULL); \
1301 DBUG_ASSERT(info->m_name != NULL); \
1302 len= strlen(info->m_name); \
1303 full_length= prefix_length + len; \
1304 if (likely(full_length <= PFS_MAX_INFO_NAME_LENGTH)) \
1305 { \
1306 memcpy(formatted_name + prefix_length, info->m_name, len); \
1307 key= REGISTER_FUNC(formatted_name, full_length, info->m_flags); \
1308 } \
1309 else \
1310 { \
1311 pfs_print_error("REGISTER_BODY_V1: name too long <%s> <%s>\n", \
1312 category, info->m_name); \
1313 key= 0; \
1314 } \
1315 \
1316 *(info->m_key)= key; \
1317 } \
1318 return;
1319
1320 /* Use C linkage for the interface functions. */
1321
1322 C_MODE_START
1323
1324 /**
1325 Implementation of the mutex instrumentation interface.
1326 @sa PSI_v1::register_mutex.
1327 */
register_mutex_v1(const char * category,PSI_mutex_info_v1 * info,int count)1328 static void register_mutex_v1(const char *category,
1329 PSI_mutex_info_v1 *info,
1330 int count)
1331 {
1332 REGISTER_BODY_V1(PSI_mutex_key,
1333 mutex_instrument_prefix,
1334 register_mutex_class)
1335 }
1336
1337 /**
1338 Implementation of the rwlock instrumentation interface.
1339 @sa PSI_v1::register_rwlock.
1340 */
register_rwlock_v1(const char * category,PSI_rwlock_info_v1 * info,int count)1341 static void register_rwlock_v1(const char *category,
1342 PSI_rwlock_info_v1 *info,
1343 int count)
1344 {
1345 REGISTER_BODY_V1(PSI_rwlock_key,
1346 rwlock_instrument_prefix,
1347 register_rwlock_class)
1348 }
1349
1350 /**
1351 Implementation of the cond instrumentation interface.
1352 @sa PSI_v1::register_cond.
1353 */
register_cond_v1(const char * category,PSI_cond_info_v1 * info,int count)1354 static void register_cond_v1(const char *category,
1355 PSI_cond_info_v1 *info,
1356 int count)
1357 {
1358 REGISTER_BODY_V1(PSI_cond_key,
1359 cond_instrument_prefix,
1360 register_cond_class)
1361 }
1362
1363 /**
1364 Implementation of the thread instrumentation interface.
1365 @sa PSI_v1::register_thread.
1366 */
register_thread_v1(const char * category,PSI_thread_info_v1 * info,int count)1367 static void register_thread_v1(const char *category,
1368 PSI_thread_info_v1 *info,
1369 int count)
1370 {
1371 REGISTER_BODY_V1(PSI_thread_key,
1372 thread_instrument_prefix,
1373 register_thread_class)
1374 }
1375
1376 /**
1377 Implementation of the file instrumentation interface.
1378 @sa PSI_v1::register_file.
1379 */
register_file_v1(const char * category,PSI_file_info_v1 * info,int count)1380 static void register_file_v1(const char *category,
1381 PSI_file_info_v1 *info,
1382 int count)
1383 {
1384 REGISTER_BODY_V1(PSI_file_key,
1385 file_instrument_prefix,
1386 register_file_class)
1387 }
1388
register_stage_v1(const char * category,PSI_stage_info_v1 ** info_array,int count)1389 static void register_stage_v1(const char *category,
1390 PSI_stage_info_v1 **info_array,
1391 int count)
1392 {
1393 char formatted_name[PFS_MAX_INFO_NAME_LENGTH];
1394 int prefix_length;
1395 int len;
1396 int full_length;
1397 PSI_stage_info_v1 *info;
1398
1399 DBUG_ASSERT(category != NULL);
1400 DBUG_ASSERT(info_array != NULL);
1401 if (unlikely(build_prefix(&stage_instrument_prefix, category,
1402 formatted_name, &prefix_length)))
1403 {
1404 for (; count>0; count--, info_array++)
1405 (*info_array)->m_key= 0;
1406 return ;
1407 }
1408
1409 for (; count>0; count--, info_array++)
1410 {
1411 info= *info_array;
1412 DBUG_ASSERT(info != NULL);
1413 DBUG_ASSERT(info->m_name != NULL);
1414 len= strlen(info->m_name);
1415 full_length= prefix_length + len;
1416 if (likely(full_length <= PFS_MAX_INFO_NAME_LENGTH))
1417 {
1418 memcpy(formatted_name + prefix_length, info->m_name, len);
1419 info->m_key= register_stage_class(formatted_name,
1420 prefix_length,
1421 full_length,
1422 info->m_flags);
1423 }
1424 else
1425 {
1426 pfs_print_error("register_stage_v1: name too long <%s> <%s>\n",
1427 category, info->m_name);
1428 info->m_key= 0;
1429 }
1430 }
1431 return;
1432 }
1433
register_statement_v1(const char * category,PSI_statement_info_v1 * info,int count)1434 static void register_statement_v1(const char *category,
1435 PSI_statement_info_v1 *info,
1436 int count)
1437 {
1438 char formatted_name[PFS_MAX_INFO_NAME_LENGTH];
1439 int prefix_length;
1440 int len;
1441 int full_length;
1442
1443 DBUG_ASSERT(category != NULL);
1444 DBUG_ASSERT(info != NULL);
1445 if (unlikely(build_prefix(&statement_instrument_prefix,
1446 category, formatted_name, &prefix_length)))
1447 {
1448 for (; count>0; count--, info++)
1449 info->m_key= 0;
1450 return ;
1451 }
1452
1453 for (; count>0; count--, info++)
1454 {
1455 DBUG_ASSERT(info->m_name != NULL);
1456 len= strlen(info->m_name);
1457 full_length= prefix_length + len;
1458 if (likely(full_length <= PFS_MAX_INFO_NAME_LENGTH))
1459 {
1460 memcpy(formatted_name + prefix_length, info->m_name, len);
1461 info->m_key= register_statement_class(formatted_name, full_length, info->m_flags);
1462 }
1463 else
1464 {
1465 pfs_print_error("register_statement_v1: name too long <%s>\n",
1466 info->m_name);
1467 info->m_key= 0;
1468 }
1469 }
1470 return;
1471 }
1472
register_socket_v1(const char * category,PSI_socket_info_v1 * info,int count)1473 static void register_socket_v1(const char *category,
1474 PSI_socket_info_v1 *info,
1475 int count)
1476 {
1477 REGISTER_BODY_V1(PSI_socket_key,
1478 socket_instrument_prefix,
1479 register_socket_class)
1480 }
1481
1482 #define INIT_BODY_V1(T, KEY, ID) \
1483 PFS_##T##_class *klass; \
1484 PFS_##T *pfs; \
1485 klass= find_##T##_class(KEY); \
1486 if (unlikely(klass == NULL)) \
1487 return NULL; \
1488 if (! klass->m_enabled) \
1489 return NULL; \
1490 pfs= create_##T(klass, ID); \
1491 return reinterpret_cast<PSI_##T *> (pfs)
1492
1493 /**
1494 Implementation of the mutex instrumentation interface.
1495 @sa PSI_v1::init_mutex.
1496 */
1497 static PSI_mutex*
init_mutex_v1(PSI_mutex_key key,const void * identity)1498 init_mutex_v1(PSI_mutex_key key, const void *identity)
1499 {
1500 INIT_BODY_V1(mutex, key, identity);
1501 }
1502
1503 /**
1504 Implementation of the mutex instrumentation interface.
1505 @sa PSI_v1::destroy_mutex.
1506 */
destroy_mutex_v1(PSI_mutex * mutex)1507 static void destroy_mutex_v1(PSI_mutex* mutex)
1508 {
1509 PFS_mutex *pfs= reinterpret_cast<PFS_mutex*> (mutex);
1510
1511 DBUG_ASSERT(pfs != NULL);
1512
1513 destroy_mutex(pfs);
1514 }
1515
1516 /**
1517 Implementation of the rwlock instrumentation interface.
1518 @sa PSI_v1::init_rwlock.
1519 */
1520 static PSI_rwlock*
init_rwlock_v1(PSI_rwlock_key key,const void * identity)1521 init_rwlock_v1(PSI_rwlock_key key, const void *identity)
1522 {
1523 INIT_BODY_V1(rwlock, key, identity);
1524 }
1525
1526 /**
1527 Implementation of the rwlock instrumentation interface.
1528 @sa PSI_v1::destroy_rwlock.
1529 */
destroy_rwlock_v1(PSI_rwlock * rwlock)1530 static void destroy_rwlock_v1(PSI_rwlock* rwlock)
1531 {
1532 PFS_rwlock *pfs= reinterpret_cast<PFS_rwlock*> (rwlock);
1533
1534 DBUG_ASSERT(pfs != NULL);
1535
1536 destroy_rwlock(pfs);
1537 }
1538
1539 /**
1540 Implementation of the cond instrumentation interface.
1541 @sa PSI_v1::init_cond.
1542 */
1543 static PSI_cond*
init_cond_v1(PSI_cond_key key,const void * identity)1544 init_cond_v1(PSI_cond_key key, const void *identity)
1545 {
1546 INIT_BODY_V1(cond, key, identity);
1547 }
1548
1549 /**
1550 Implementation of the cond instrumentation interface.
1551 @sa PSI_v1::destroy_cond.
1552 */
destroy_cond_v1(PSI_cond * cond)1553 static void destroy_cond_v1(PSI_cond* cond)
1554 {
1555 PFS_cond *pfs= reinterpret_cast<PFS_cond*> (cond);
1556
1557 DBUG_ASSERT(pfs != NULL);
1558
1559 destroy_cond(pfs);
1560 }
1561
1562 /**
1563 Implementation of the table instrumentation interface.
1564 @sa PSI_v1::get_table_share.
1565 */
1566 static PSI_table_share*
get_table_share_v1(my_bool temporary,TABLE_SHARE * share)1567 get_table_share_v1(my_bool temporary, TABLE_SHARE *share)
1568 {
1569 /* Ignore temporary tables and views. */
1570 if (temporary || share->is_view)
1571 return NULL;
1572 /* An instrumented thread is required, for LF_PINS. */
1573 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1574 if (unlikely(pfs_thread == NULL))
1575 return NULL;
1576 PFS_table_share* pfs_share;
1577 pfs_share= find_or_create_table_share(pfs_thread, temporary, share);
1578 return reinterpret_cast<PSI_table_share*> (pfs_share);
1579 }
1580
1581 /**
1582 Implementation of the table instrumentation interface.
1583 @sa PSI_v1::release_table_share.
1584 */
release_table_share_v1(PSI_table_share * share)1585 static void release_table_share_v1(PSI_table_share* share)
1586 {
1587 PFS_table_share* pfs= reinterpret_cast<PFS_table_share*> (share);
1588
1589 if (unlikely(pfs == NULL))
1590 return;
1591
1592 release_table_share(pfs);
1593 }
1594
1595 /**
1596 Implementation of the table instrumentation interface.
1597 @sa PSI_v1::drop_table_share.
1598 */
1599 static void
drop_table_share_v1(my_bool temporary,const char * schema_name,int schema_name_length,const char * table_name,int table_name_length)1600 drop_table_share_v1(my_bool temporary,
1601 const char *schema_name, int schema_name_length,
1602 const char *table_name, int table_name_length)
1603 {
1604 /* Ignore temporary tables. */
1605 if (temporary)
1606 return;
1607 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1608 if (unlikely(pfs_thread == NULL))
1609 return;
1610 /* TODO: temporary tables */
1611 drop_table_share(pfs_thread, temporary, schema_name, schema_name_length,
1612 table_name, table_name_length);
1613 }
1614
1615 /**
1616 Implementation of the table instrumentation interface.
1617 @sa PSI_v1::open_table.
1618 */
1619 static PSI_table*
open_table_v1(PSI_table_share * share,const void * identity)1620 open_table_v1(PSI_table_share *share, const void *identity)
1621 {
1622 PFS_table_share *pfs_table_share= reinterpret_cast<PFS_table_share*> (share);
1623
1624 if (unlikely(pfs_table_share == NULL))
1625 return NULL;
1626
1627 /* This object is not to be instrumented. */
1628 if (! pfs_table_share->m_enabled)
1629 return NULL;
1630
1631 /* This object is instrumented, but all table instruments are disabled. */
1632 if (! global_table_io_class.m_enabled && ! global_table_lock_class.m_enabled)
1633 return NULL;
1634
1635 /*
1636 When the performance schema is off, do not instrument anything.
1637 Table handles have short life cycle, instrumentation will happen
1638 again if needed during the next open().
1639 */
1640 if (! flag_global_instrumentation)
1641 return NULL;
1642
1643 PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1644 if (unlikely(thread == NULL))
1645 return NULL;
1646
1647 PFS_table *pfs_table= create_table(pfs_table_share, thread, identity);
1648 return reinterpret_cast<PSI_table *> (pfs_table);
1649 }
1650
1651 /**
1652 Implementation of the table instrumentation interface.
1653 @sa PSI_v1::unbind_table.
1654 */
unbind_table_v1(PSI_table * table)1655 static void unbind_table_v1(PSI_table *table)
1656 {
1657 PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
1658 if (likely(pfs != NULL))
1659 {
1660 pfs->m_thread_owner= NULL;
1661 }
1662 }
1663
1664 /**
1665 Implementation of the table instrumentation interface.
1666 @sa PSI_v1::rebind_table.
1667 */
1668 static PSI_table *
rebind_table_v1(PSI_table_share * share,const void * identity,PSI_table * table)1669 rebind_table_v1(PSI_table_share *share, const void *identity, PSI_table *table)
1670 {
1671 PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
1672 if (likely(pfs != NULL))
1673 {
1674 PFS_thread *thread;
1675 DBUG_ASSERT(pfs->m_thread_owner == NULL);
1676
1677 /* The table handle was already instrumented, reuse it for this thread. */
1678 thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1679
1680 if (unlikely(! pfs->m_share->m_enabled))
1681 {
1682 destroy_table(pfs);
1683 return NULL;
1684 }
1685
1686 if (unlikely(! global_table_io_class.m_enabled && ! global_table_lock_class.m_enabled))
1687 {
1688 destroy_table(pfs);
1689 return NULL;
1690 }
1691
1692 if (unlikely(! flag_global_instrumentation))
1693 {
1694 destroy_table(pfs);
1695 return NULL;
1696 }
1697
1698 pfs->m_thread_owner= thread;
1699 return table;
1700 }
1701
1702 /* See open_table_v1() */
1703
1704 PFS_table_share *pfs_table_share= reinterpret_cast<PFS_table_share*> (share);
1705
1706 if (unlikely(pfs_table_share == NULL))
1707 return NULL;
1708
1709 if (! pfs_table_share->m_enabled)
1710 return NULL;
1711
1712 if (! global_table_io_class.m_enabled && ! global_table_lock_class.m_enabled)
1713 return NULL;
1714
1715 if (! flag_global_instrumentation)
1716 return NULL;
1717
1718 PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1719 if (unlikely(thread == NULL))
1720 return NULL;
1721
1722 PFS_table *pfs_table= create_table(pfs_table_share, thread, identity);
1723 return reinterpret_cast<PSI_table *> (pfs_table);
1724 }
1725
1726 /**
1727 Implementation of the table instrumentation interface.
1728 @sa PSI_v1::close_table.
1729 */
close_table_v1(PSI_table * table)1730 static void close_table_v1(PSI_table *table)
1731 {
1732 PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
1733 if (unlikely(pfs == NULL))
1734 return;
1735 pfs->aggregate();
1736 destroy_table(pfs);
1737 }
1738
1739 static PSI_socket*
init_socket_v1(PSI_socket_key key,const my_socket * fd,const struct sockaddr * addr,socklen_t addr_len)1740 init_socket_v1(PSI_socket_key key, const my_socket *fd,
1741 const struct sockaddr *addr, socklen_t addr_len)
1742 {
1743 PFS_socket_class *klass;
1744 PFS_socket *pfs;
1745 klass= find_socket_class(key);
1746 if (unlikely(klass == NULL))
1747 return NULL;
1748 if (! klass->m_enabled)
1749 return NULL;
1750 pfs= create_socket(klass, fd, addr, addr_len);
1751 return reinterpret_cast<PSI_socket *> (pfs);
1752 }
1753
destroy_socket_v1(PSI_socket * socket)1754 static void destroy_socket_v1(PSI_socket *socket)
1755 {
1756 PFS_socket *pfs= reinterpret_cast<PFS_socket*> (socket);
1757
1758 DBUG_ASSERT(pfs != NULL);
1759
1760 destroy_socket(pfs);
1761 }
1762
1763 /**
1764 Implementation of the file instrumentation interface.
1765 @sa PSI_v1::create_file.
1766 */
create_file_v1(PSI_file_key key,const char * name,File file)1767 static void create_file_v1(PSI_file_key key, const char *name, File file)
1768 {
1769 if (! flag_global_instrumentation)
1770 return;
1771 int index= (int) file;
1772 if (unlikely(index < 0))
1773 return;
1774 PFS_file_class *klass= find_file_class(key);
1775 if (unlikely(klass == NULL))
1776 return;
1777 if (! klass->m_enabled)
1778 return;
1779
1780 /* A thread is needed for LF_PINS */
1781 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1782 if (unlikely(pfs_thread == NULL))
1783 return;
1784
1785 if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
1786 return;
1787
1788 /*
1789 We want this check after pfs_thread->m_enabled,
1790 to avoid reporting false loss.
1791 */
1792 if (unlikely(index >= file_handle_max))
1793 {
1794 file_handle_lost++;
1795 return;
1796 }
1797
1798 uint len= strlen(name);
1799 PFS_file *pfs_file= find_or_create_file(pfs_thread, klass, name, len, true);
1800
1801 file_handle_array[index]= pfs_file;
1802 }
1803
1804 /**
1805 Arguments given from a parent to a child thread, packaged in one structure.
1806 This data is used when spawning a new instrumented thread.
1807 @sa pfs_spawn_thread.
1808 */
1809 struct PFS_spawn_thread_arg
1810 {
1811 ulonglong m_thread_internal_id;
1812 char m_username[USERNAME_LENGTH];
1813 uint m_username_length;
1814 char m_hostname[HOSTNAME_LENGTH];
1815 uint m_hostname_length;
1816
1817 PSI_thread_key m_child_key;
1818 const void *m_child_identity;
1819 void *(*m_user_start_routine)(void*);
1820 void *m_user_arg;
1821 };
1822
pfs_spawn_thread(void * arg)1823 void* pfs_spawn_thread(void *arg)
1824 {
1825 PFS_spawn_thread_arg *typed_arg= (PFS_spawn_thread_arg*) arg;
1826 void *user_arg;
1827 void *(*user_start_routine)(void*);
1828
1829 PFS_thread *pfs;
1830
1831 /* First, attach instrumentation to this newly created pthread. */
1832 PFS_thread_class *klass= find_thread_class(typed_arg->m_child_key);
1833 if (likely(klass != NULL))
1834 {
1835 pfs= create_thread(klass, typed_arg->m_child_identity, 0);
1836 if (likely(pfs != NULL))
1837 {
1838 clear_thread_account(pfs);
1839
1840 pfs->m_parent_thread_internal_id= typed_arg->m_thread_internal_id;
1841
1842 memcpy(pfs->m_username, typed_arg->m_username, sizeof(pfs->m_username));
1843 pfs->m_username_length= typed_arg->m_username_length;
1844
1845 memcpy(pfs->m_hostname, typed_arg->m_hostname, sizeof(pfs->m_hostname));
1846 pfs->m_hostname_length= typed_arg->m_hostname_length;
1847
1848 set_thread_account(pfs);
1849 }
1850 }
1851 else
1852 {
1853 pfs= NULL;
1854 }
1855 my_pthread_setspecific_ptr(THR_PFS, pfs);
1856
1857 /*
1858 Secondly, free the memory allocated in spawn_thread_v1().
1859 It is preferable to do this before invoking the user
1860 routine, to avoid memory leaks at shutdown, in case
1861 the server exits without waiting for this thread.
1862 */
1863 user_start_routine= typed_arg->m_user_start_routine;
1864 user_arg= typed_arg->m_user_arg;
1865 my_free(typed_arg);
1866
1867 /* Then, execute the user code for this thread. */
1868 (*user_start_routine)(user_arg);
1869
1870 return NULL;
1871 }
1872
1873 /**
1874 Implementation of the thread instrumentation interface.
1875 @sa PSI_v1::spawn_thread.
1876 */
spawn_thread_v1(PSI_thread_key key,pthread_t * thread,const pthread_attr_t * attr,void * (* start_routine)(void *),void * arg)1877 static int spawn_thread_v1(PSI_thread_key key,
1878 pthread_t *thread, const pthread_attr_t *attr,
1879 void *(*start_routine)(void*), void *arg)
1880 {
1881 PFS_spawn_thread_arg *psi_arg;
1882 PFS_thread *parent;
1883
1884 /* psi_arg can not be global, and can not be a local variable. */
1885 psi_arg= (PFS_spawn_thread_arg*) my_malloc(sizeof(PFS_spawn_thread_arg),
1886 MYF(MY_WME));
1887 if (unlikely(psi_arg == NULL))
1888 return EAGAIN;
1889
1890 psi_arg->m_child_key= key;
1891 psi_arg->m_child_identity= (arg ? arg : thread);
1892 psi_arg->m_user_start_routine= start_routine;
1893 psi_arg->m_user_arg= arg;
1894
1895 parent= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1896 if (parent != NULL)
1897 {
1898 /*
1899 Make a copy of the parent attributes.
1900 This is required, because instrumentation for this thread (the parent)
1901 may be destroyed before the child thread instrumentation is created.
1902 */
1903 psi_arg->m_thread_internal_id= parent->m_thread_internal_id;
1904
1905 memcpy(psi_arg->m_username, parent->m_username, sizeof(psi_arg->m_username));
1906 psi_arg->m_username_length= parent->m_username_length;
1907
1908 memcpy(psi_arg->m_hostname, parent->m_hostname, sizeof(psi_arg->m_hostname));
1909 psi_arg->m_hostname_length= parent->m_hostname_length;
1910 }
1911 else
1912 {
1913 psi_arg->m_thread_internal_id= 0;
1914 psi_arg->m_username_length= 0;
1915 psi_arg->m_hostname_length= 0;
1916 }
1917
1918 int result= pthread_create(thread, attr, pfs_spawn_thread, psi_arg);
1919 if (unlikely(result != 0))
1920 my_free(psi_arg);
1921 return result;
1922 }
1923
1924 /**
1925 Implementation of the thread instrumentation interface.
1926 @sa PSI_v1::new_thread.
1927 */
1928 static PSI_thread*
new_thread_v1(PSI_thread_key key,const void * identity,ulonglong processlist_id)1929 new_thread_v1(PSI_thread_key key, const void *identity, ulonglong processlist_id)
1930 {
1931 PFS_thread *pfs;
1932
1933 PFS_thread_class *klass= find_thread_class(key);
1934 if (likely(klass != NULL))
1935 pfs= create_thread(klass, identity, processlist_id);
1936 else
1937 pfs= NULL;
1938
1939 return reinterpret_cast<PSI_thread*> (pfs);
1940 }
1941
1942 /**
1943 Implementation of the thread instrumentation interface.
1944 @sa PSI_v1::set_thread_id.
1945 */
set_thread_id_v1(PSI_thread * thread,ulonglong processlist_id)1946 static void set_thread_id_v1(PSI_thread *thread, ulonglong processlist_id)
1947 {
1948 PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
1949 if (unlikely(pfs == NULL))
1950 return;
1951 pfs->m_processlist_id= processlist_id;
1952 }
1953
1954 /**
1955 Implementation of the thread instrumentation interface.
1956 @sa PSI_v1::get_thread_id.
1957 */
1958 static PSI_thread*
get_thread_v1(void)1959 get_thread_v1(void)
1960 {
1961 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1962 return reinterpret_cast<PSI_thread*> (pfs);
1963 }
1964
1965 /**
1966 Implementation of the thread instrumentation interface.
1967 @sa PSI_v1::set_thread_user.
1968 */
set_thread_user_v1(const char * user,int user_len)1969 static void set_thread_user_v1(const char *user, int user_len)
1970 {
1971 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1972
1973 DBUG_ASSERT((user != NULL) || (user_len == 0));
1974 DBUG_ASSERT(user_len >= 0);
1975 DBUG_ASSERT((uint) user_len <= sizeof(pfs->m_username));
1976
1977 if (unlikely(pfs == NULL))
1978 return;
1979
1980 aggregate_thread(pfs, pfs->m_account, pfs->m_user, pfs->m_host);
1981
1982 pfs->m_session_lock.allocated_to_dirty();
1983
1984 clear_thread_account(pfs);
1985
1986 if (user_len > 0)
1987 memcpy(pfs->m_username, user, user_len);
1988 pfs->m_username_length= user_len;
1989
1990 set_thread_account(pfs);
1991
1992 bool enabled= true;
1993 if (flag_thread_instrumentation)
1994 {
1995 if ((pfs->m_username_length > 0) && (pfs->m_hostname_length > 0))
1996 {
1997 /*
1998 TODO: performance improvement.
1999 Once performance_schema.USERS is exposed,
2000 we can use PFS_user::m_enabled instead of looking up
2001 SETUP_ACTORS every time.
2002 */
2003 lookup_setup_actor(pfs,
2004 pfs->m_username, pfs->m_username_length,
2005 pfs->m_hostname, pfs->m_hostname_length,
2006 &enabled);
2007 }
2008 }
2009
2010 pfs->m_enabled= enabled;
2011
2012 pfs->m_session_lock.dirty_to_allocated();
2013 }
2014
2015 /**
2016 Implementation of the thread instrumentation interface.
2017 @sa PSI_v1::set_thread_account.
2018 */
set_thread_account_v1(const char * user,int user_len,const char * host,int host_len)2019 static void set_thread_account_v1(const char *user, int user_len,
2020 const char *host, int host_len)
2021 {
2022 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2023
2024 DBUG_ASSERT((user != NULL) || (user_len == 0));
2025 DBUG_ASSERT(user_len >= 0);
2026 DBUG_ASSERT((uint) user_len <= sizeof(pfs->m_username));
2027 DBUG_ASSERT((host != NULL) || (host_len == 0));
2028 DBUG_ASSERT(host_len >= 0);
2029
2030 host_len= min<size_t>(host_len, sizeof(pfs->m_hostname));
2031
2032 if (unlikely(pfs == NULL))
2033 return;
2034
2035 pfs->m_session_lock.allocated_to_dirty();
2036
2037 clear_thread_account(pfs);
2038
2039 if (host_len > 0)
2040 memcpy(pfs->m_hostname, host, host_len);
2041 pfs->m_hostname_length= host_len;
2042
2043 if (user_len > 0)
2044 memcpy(pfs->m_username, user, user_len);
2045 pfs->m_username_length= user_len;
2046
2047 set_thread_account(pfs);
2048
2049 bool enabled= true;
2050 if (flag_thread_instrumentation)
2051 {
2052 if ((pfs->m_username_length > 0) && (pfs->m_hostname_length > 0))
2053 {
2054 /*
2055 TODO: performance improvement.
2056 Once performance_schema.USERS is exposed,
2057 we can use PFS_user::m_enabled instead of looking up
2058 SETUP_ACTORS every time.
2059 */
2060 lookup_setup_actor(pfs,
2061 pfs->m_username, pfs->m_username_length,
2062 pfs->m_hostname, pfs->m_hostname_length,
2063 &enabled);
2064 }
2065 }
2066 pfs->m_enabled= enabled;
2067
2068 pfs->m_session_lock.dirty_to_allocated();
2069 }
2070
2071 /**
2072 Implementation of the thread instrumentation interface.
2073 @sa PSI_v1::set_thread_db.
2074 */
set_thread_db_v1(const char * db,int db_len)2075 static void set_thread_db_v1(const char* db, int db_len)
2076 {
2077 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2078
2079 DBUG_ASSERT((db != NULL) || (db_len == 0));
2080 DBUG_ASSERT(db_len >= 0);
2081 DBUG_ASSERT((uint) db_len <= sizeof(pfs->m_dbname));
2082
2083 if (likely(pfs != NULL))
2084 {
2085 pfs->m_stmt_lock.allocated_to_dirty();
2086 if (db_len > 0)
2087 memcpy(pfs->m_dbname, db, db_len);
2088 pfs->m_dbname_length= db_len;
2089 pfs->m_stmt_lock.dirty_to_allocated();
2090 }
2091 }
2092
2093 /**
2094 Implementation of the thread instrumentation interface.
2095 @sa PSI_v1::set_thread_command.
2096 */
set_thread_command_v1(int command)2097 static void set_thread_command_v1(int command)
2098 {
2099 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2100
2101 DBUG_ASSERT(command >= 0);
2102 DBUG_ASSERT(command <= (int) COM_END);
2103
2104 if (likely(pfs != NULL))
2105 {
2106 pfs->m_command= command;
2107 }
2108 }
2109
2110 /**
2111 Implementation of the thread instrumentation interface.
2112 @sa PSI_v1::set_thread_start_time.
2113 */
set_thread_start_time_v1(time_t start_time)2114 static void set_thread_start_time_v1(time_t start_time)
2115 {
2116 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2117
2118 if (likely(pfs != NULL))
2119 {
2120 pfs->m_start_time= start_time;
2121 }
2122 }
2123
2124 /**
2125 Implementation of the thread instrumentation interface.
2126 @sa PSI_v1::set_thread_state.
2127 */
set_thread_state_v1(const char * state)2128 static void set_thread_state_v1(const char* state)
2129 {
2130 /* DEPRECATED. */
2131 }
2132
2133 /**
2134 Implementation of the thread instrumentation interface.
2135 @sa PSI_v1::set_thread_info.
2136 */
set_thread_info_v1(const char * info,uint info_len)2137 static void set_thread_info_v1(const char* info, uint info_len)
2138 {
2139 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2140
2141 DBUG_ASSERT((info != NULL) || (info_len == 0));
2142
2143 if (likely(pfs != NULL))
2144 {
2145 if ((info != NULL) && (info_len > 0))
2146 {
2147 if (info_len > sizeof(pfs->m_processlist_info))
2148 info_len= sizeof(pfs->m_processlist_info);
2149
2150 pfs->m_stmt_lock.allocated_to_dirty();
2151 memcpy(pfs->m_processlist_info, info, info_len);
2152 pfs->m_processlist_info_length= info_len;
2153 pfs->m_stmt_lock.dirty_to_allocated();
2154 }
2155 else
2156 {
2157 pfs->m_stmt_lock.allocated_to_dirty();
2158 pfs->m_processlist_info_length= 0;
2159 pfs->m_stmt_lock.dirty_to_allocated();
2160 }
2161 }
2162 }
2163
2164 /**
2165 Implementation of the thread instrumentation interface.
2166 @sa PSI_v1::set_thread.
2167 */
set_thread_v1(PSI_thread * thread)2168 static void set_thread_v1(PSI_thread* thread)
2169 {
2170 PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
2171 my_pthread_setspecific_ptr(THR_PFS, pfs);
2172 }
2173
2174 /**
2175 Implementation of the thread instrumentation interface.
2176 @sa PSI_v1::delete_current_thread.
2177 */
delete_current_thread_v1(void)2178 static void delete_current_thread_v1(void)
2179 {
2180 PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2181 if (thread != NULL)
2182 {
2183 aggregate_thread(thread, thread->m_account, thread->m_user, thread->m_host);
2184 my_pthread_setspecific_ptr(THR_PFS, NULL);
2185 destroy_thread(thread);
2186 }
2187 }
2188
2189 /**
2190 Implementation of the thread instrumentation interface.
2191 @sa PSI_v1::delete_thread.
2192 */
delete_thread_v1(PSI_thread * thread)2193 static void delete_thread_v1(PSI_thread *thread)
2194 {
2195 PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
2196
2197 if (pfs != NULL)
2198 {
2199 aggregate_thread(pfs, pfs->m_account, pfs->m_user, pfs->m_host);
2200 destroy_thread(pfs);
2201 }
2202 }
2203
2204 /**
2205 Implementation of the mutex instrumentation interface.
2206 @sa PSI_v1::start_mutex_wait.
2207 */
2208 static PSI_mutex_locker*
start_mutex_wait_v1(PSI_mutex_locker_state * state,PSI_mutex * mutex,PSI_mutex_operation op,const char * src_file,uint src_line)2209 start_mutex_wait_v1(PSI_mutex_locker_state *state,
2210 PSI_mutex *mutex, PSI_mutex_operation op,
2211 const char *src_file, uint src_line)
2212 {
2213 PFS_mutex *pfs_mutex= reinterpret_cast<PFS_mutex*> (mutex);
2214 DBUG_ASSERT((int) op >= 0);
2215 DBUG_ASSERT((uint) op < array_elements(mutex_operation_map));
2216 DBUG_ASSERT(state != NULL);
2217
2218 DBUG_ASSERT(pfs_mutex != NULL);
2219 DBUG_ASSERT(pfs_mutex->m_class != NULL);
2220
2221 if (! pfs_mutex->m_enabled)
2222 return NULL;
2223
2224 register uint flags;
2225 ulonglong timer_start= 0;
2226
2227 if (flag_thread_instrumentation)
2228 {
2229 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2230 if (unlikely(pfs_thread == NULL))
2231 return NULL;
2232 if (! pfs_thread->m_enabled)
2233 return NULL;
2234 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2235 flags= STATE_FLAG_THREAD;
2236
2237 if (pfs_mutex->m_timed)
2238 {
2239 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2240 state->m_timer_start= timer_start;
2241 flags|= STATE_FLAG_TIMED;
2242 }
2243
2244 if (flag_events_waits_current)
2245 {
2246 if (unlikely(pfs_thread->m_events_waits_current >=
2247 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2248 {
2249 locker_lost++;
2250 return NULL;
2251 }
2252 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2253 state->m_wait= wait;
2254 flags|= STATE_FLAG_EVENT;
2255
2256 PFS_events_waits *parent_event= wait - 1;
2257 wait->m_event_type= EVENT_TYPE_WAIT;
2258 wait->m_nesting_event_id= parent_event->m_event_id;
2259 wait->m_nesting_event_type= parent_event->m_event_type;
2260
2261 wait->m_thread= pfs_thread;
2262 wait->m_class= pfs_mutex->m_class;
2263 wait->m_timer_start= timer_start;
2264 wait->m_timer_end= 0;
2265 wait->m_object_instance_addr= pfs_mutex->m_identity;
2266 wait->m_event_id= pfs_thread->m_event_id++;
2267 wait->m_end_event_id= 0;
2268 wait->m_operation= mutex_operation_map[(int) op];
2269 wait->m_source_file= src_file;
2270 wait->m_source_line= src_line;
2271 wait->m_wait_class= WAIT_CLASS_MUTEX;
2272
2273 pfs_thread->m_events_waits_current++;
2274 }
2275 }
2276 else
2277 {
2278 if (pfs_mutex->m_timed)
2279 {
2280 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2281 state->m_timer_start= timer_start;
2282 flags= STATE_FLAG_TIMED;
2283 state->m_thread= NULL;
2284 }
2285 else
2286 {
2287 /*
2288 Complete shortcut.
2289 */
2290 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
2291 pfs_mutex->m_mutex_stat.m_wait_stat.aggregate_counted();
2292 return NULL;
2293 }
2294 }
2295
2296 state->m_flags= flags;
2297 state->m_mutex= mutex;
2298 return reinterpret_cast<PSI_mutex_locker*> (state);
2299 }
2300
2301 /**
2302 Implementation of the rwlock instrumentation interface.
2303 @sa PSI_v1::start_rwlock_rdwait
2304 @sa PSI_v1::start_rwlock_wrwait
2305 */
2306 static PSI_rwlock_locker*
start_rwlock_wait_v1(PSI_rwlock_locker_state * state,PSI_rwlock * rwlock,PSI_rwlock_operation op,const char * src_file,uint src_line)2307 start_rwlock_wait_v1(PSI_rwlock_locker_state *state,
2308 PSI_rwlock *rwlock,
2309 PSI_rwlock_operation op,
2310 const char *src_file, uint src_line)
2311 {
2312 PFS_rwlock *pfs_rwlock= reinterpret_cast<PFS_rwlock*> (rwlock);
2313 DBUG_ASSERT(static_cast<int> (op) >= 0);
2314 DBUG_ASSERT(static_cast<uint> (op) < array_elements(rwlock_operation_map));
2315 DBUG_ASSERT(state != NULL);
2316 DBUG_ASSERT(pfs_rwlock != NULL);
2317 DBUG_ASSERT(pfs_rwlock->m_class != NULL);
2318
2319 if (! pfs_rwlock->m_enabled)
2320 return NULL;
2321
2322 register uint flags;
2323 ulonglong timer_start= 0;
2324
2325 if (flag_thread_instrumentation)
2326 {
2327 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2328 if (unlikely(pfs_thread == NULL))
2329 return NULL;
2330 if (! pfs_thread->m_enabled)
2331 return NULL;
2332 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2333 flags= STATE_FLAG_THREAD;
2334
2335 if (pfs_rwlock->m_timed)
2336 {
2337 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2338 state->m_timer_start= timer_start;
2339 flags|= STATE_FLAG_TIMED;
2340 }
2341
2342 if (flag_events_waits_current)
2343 {
2344 if (unlikely(pfs_thread->m_events_waits_current >=
2345 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2346 {
2347 locker_lost++;
2348 return NULL;
2349 }
2350 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2351 state->m_wait= wait;
2352 flags|= STATE_FLAG_EVENT;
2353
2354 PFS_events_waits *parent_event= wait - 1;
2355 wait->m_event_type= EVENT_TYPE_WAIT;
2356 wait->m_nesting_event_id= parent_event->m_event_id;
2357 wait->m_nesting_event_type= parent_event->m_event_type;
2358
2359 wait->m_thread= pfs_thread;
2360 wait->m_class= pfs_rwlock->m_class;
2361 wait->m_timer_start= timer_start;
2362 wait->m_timer_end= 0;
2363 wait->m_object_instance_addr= pfs_rwlock->m_identity;
2364 wait->m_event_id= pfs_thread->m_event_id++;
2365 wait->m_end_event_id= 0;
2366 wait->m_operation= rwlock_operation_map[static_cast<int> (op)];
2367 wait->m_source_file= src_file;
2368 wait->m_source_line= src_line;
2369 wait->m_wait_class= WAIT_CLASS_RWLOCK;
2370
2371 pfs_thread->m_events_waits_current++;
2372 }
2373 }
2374 else
2375 {
2376 if (pfs_rwlock->m_timed)
2377 {
2378 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2379 state->m_timer_start= timer_start;
2380 flags= STATE_FLAG_TIMED;
2381 state->m_thread= NULL;
2382 }
2383 else
2384 {
2385 /*
2386 Complete shortcut.
2387 */
2388 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
2389 pfs_rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
2390 return NULL;
2391 }
2392 }
2393
2394 state->m_flags= flags;
2395 state->m_rwlock= rwlock;
2396 return reinterpret_cast<PSI_rwlock_locker*> (state);
2397 }
2398
2399 /**
2400 Implementation of the cond instrumentation interface.
2401 @sa PSI_v1::start_cond_wait.
2402 */
2403 static PSI_cond_locker*
start_cond_wait_v1(PSI_cond_locker_state * state,PSI_cond * cond,PSI_mutex * mutex,PSI_cond_operation op,const char * src_file,uint src_line)2404 start_cond_wait_v1(PSI_cond_locker_state *state,
2405 PSI_cond *cond, PSI_mutex *mutex,
2406 PSI_cond_operation op,
2407 const char *src_file, uint src_line)
2408 {
2409 /*
2410 Note about the unused PSI_mutex *mutex parameter:
2411 In the pthread library, a call to pthread_cond_wait()
2412 causes an unlock() + lock() on the mutex associated with the condition.
2413 This mutex operation is not instrumented, so the mutex will still
2414 appear as locked when a thread is waiting on a condition.
2415 This has no impact now, as unlock_mutex() is not recording events.
2416 When unlock_mutex() is implemented by later work logs,
2417 this parameter here will be used to adjust the mutex state,
2418 in start_cond_wait_v1() and end_cond_wait_v1().
2419 */
2420 PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
2421 DBUG_ASSERT(static_cast<int> (op) >= 0);
2422 DBUG_ASSERT(static_cast<uint> (op) < array_elements(cond_operation_map));
2423 DBUG_ASSERT(state != NULL);
2424 DBUG_ASSERT(pfs_cond != NULL);
2425 DBUG_ASSERT(pfs_cond->m_class != NULL);
2426
2427 if (! pfs_cond->m_enabled)
2428 return NULL;
2429
2430 register uint flags;
2431 ulonglong timer_start= 0;
2432
2433 if (flag_thread_instrumentation)
2434 {
2435 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2436 if (unlikely(pfs_thread == NULL))
2437 return NULL;
2438 if (! pfs_thread->m_enabled)
2439 return NULL;
2440 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2441 flags= STATE_FLAG_THREAD;
2442
2443 if (pfs_cond->m_timed)
2444 {
2445 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2446 state->m_timer_start= timer_start;
2447 flags|= STATE_FLAG_TIMED;
2448 }
2449
2450 if (flag_events_waits_current)
2451 {
2452 if (unlikely(pfs_thread->m_events_waits_current >=
2453 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2454 {
2455 locker_lost++;
2456 return NULL;
2457 }
2458 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2459 state->m_wait= wait;
2460 flags|= STATE_FLAG_EVENT;
2461
2462 PFS_events_waits *parent_event= wait - 1;
2463 wait->m_event_type= EVENT_TYPE_WAIT;
2464 wait->m_nesting_event_id= parent_event->m_event_id;
2465 wait->m_nesting_event_type= parent_event->m_event_type;
2466
2467 wait->m_thread= pfs_thread;
2468 wait->m_class= pfs_cond->m_class;
2469 wait->m_timer_start= timer_start;
2470 wait->m_timer_end= 0;
2471 wait->m_object_instance_addr= pfs_cond->m_identity;
2472 wait->m_event_id= pfs_thread->m_event_id++;
2473 wait->m_end_event_id= 0;
2474 wait->m_operation= cond_operation_map[static_cast<int> (op)];
2475 wait->m_source_file= src_file;
2476 wait->m_source_line= src_line;
2477 wait->m_wait_class= WAIT_CLASS_COND;
2478
2479 pfs_thread->m_events_waits_current++;
2480 }
2481 }
2482 else
2483 {
2484 if (pfs_cond->m_timed)
2485 {
2486 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2487 state->m_timer_start= timer_start;
2488 flags= STATE_FLAG_TIMED;
2489 }
2490 else
2491 {
2492 /*
2493 Complete shortcut.
2494 */
2495 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
2496 pfs_cond->m_cond_stat.m_wait_stat.aggregate_counted();
2497 return NULL;
2498 }
2499 }
2500
2501 state->m_flags= flags;
2502 state->m_cond= cond;
2503 state->m_mutex= mutex;
2504 return reinterpret_cast<PSI_cond_locker*> (state);
2505 }
2506
lock_flags_to_lock_type(uint flags)2507 static inline PFS_TL_LOCK_TYPE lock_flags_to_lock_type(uint flags)
2508 {
2509 enum thr_lock_type value= static_cast<enum thr_lock_type> (flags);
2510
2511 switch (value)
2512 {
2513 case TL_READ:
2514 return PFS_TL_READ;
2515 case TL_READ_WITH_SHARED_LOCKS:
2516 return PFS_TL_READ_WITH_SHARED_LOCKS;
2517 case TL_READ_HIGH_PRIORITY:
2518 return PFS_TL_READ_HIGH_PRIORITY;
2519 case TL_READ_NO_INSERT:
2520 return PFS_TL_READ_NO_INSERT;
2521 case TL_WRITE_ALLOW_WRITE:
2522 return PFS_TL_WRITE_ALLOW_WRITE;
2523 case TL_WRITE_CONCURRENT_INSERT:
2524 return PFS_TL_WRITE_CONCURRENT_INSERT;
2525 case TL_WRITE_DELAYED:
2526 return PFS_TL_WRITE_DELAYED;
2527 case TL_WRITE_LOW_PRIORITY:
2528 return PFS_TL_WRITE_LOW_PRIORITY;
2529 case TL_WRITE:
2530 return PFS_TL_WRITE;
2531
2532 case TL_WRITE_ONLY:
2533 case TL_IGNORE:
2534 case TL_UNLOCK:
2535 case TL_READ_DEFAULT:
2536 case TL_WRITE_DEFAULT:
2537 default:
2538 DBUG_ASSERT(false);
2539 }
2540
2541 /* Dead code */
2542 return PFS_TL_READ;
2543 }
2544
external_lock_flags_to_lock_type(uint flags)2545 static inline PFS_TL_LOCK_TYPE external_lock_flags_to_lock_type(uint flags)
2546 {
2547 DBUG_ASSERT(flags == F_RDLCK || flags == F_WRLCK);
2548 return (flags == F_RDLCK ? PFS_TL_READ_EXTERNAL : PFS_TL_WRITE_EXTERNAL);
2549 }
2550
2551 /**
2552 Implementation of the table instrumentation interface.
2553 @sa PSI_v1::start_table_io_wait_v1
2554 */
2555 static PSI_table_locker*
start_table_io_wait_v1(PSI_table_locker_state * state,PSI_table * table,PSI_table_io_operation op,uint index,const char * src_file,uint src_line)2556 start_table_io_wait_v1(PSI_table_locker_state *state,
2557 PSI_table *table,
2558 PSI_table_io_operation op,
2559 uint index,
2560 const char *src_file, uint src_line)
2561 {
2562 DBUG_ASSERT(static_cast<int> (op) >= 0);
2563 DBUG_ASSERT(static_cast<uint> (op) < array_elements(table_io_operation_map));
2564 DBUG_ASSERT(state != NULL);
2565 PFS_table *pfs_table= reinterpret_cast<PFS_table*> (table);
2566 DBUG_ASSERT(pfs_table != NULL);
2567 DBUG_ASSERT(pfs_table->m_share != NULL);
2568
2569 if (! pfs_table->m_io_enabled)
2570 return NULL;
2571
2572 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2573
2574 register uint flags;
2575 ulonglong timer_start= 0;
2576
2577 if (flag_thread_instrumentation)
2578 {
2579 if (pfs_thread == NULL)
2580 return NULL;
2581 if (! pfs_thread->m_enabled)
2582 return NULL;
2583 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2584 flags= STATE_FLAG_THREAD;
2585
2586 if (pfs_table->m_io_timed)
2587 {
2588 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2589 state->m_timer_start= timer_start;
2590 flags|= STATE_FLAG_TIMED;
2591 }
2592
2593 if (flag_events_waits_current)
2594 {
2595 if (unlikely(pfs_thread->m_events_waits_current >=
2596 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2597 {
2598 locker_lost++;
2599 return NULL;
2600 }
2601 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2602 state->m_wait= wait;
2603 flags|= STATE_FLAG_EVENT;
2604
2605 PFS_events_waits *parent_event= wait - 1;
2606 wait->m_event_type= EVENT_TYPE_WAIT;
2607 wait->m_nesting_event_id= parent_event->m_event_id;
2608 wait->m_nesting_event_type= parent_event->m_event_type;
2609
2610 PFS_table_share *share= pfs_table->m_share;
2611 wait->m_thread= pfs_thread;
2612 wait->m_class= &global_table_io_class;
2613 wait->m_timer_start= timer_start;
2614 wait->m_timer_end= 0;
2615 wait->m_object_instance_addr= pfs_table->m_identity;
2616 wait->m_event_id= pfs_thread->m_event_id++;
2617 wait->m_end_event_id= 0;
2618 wait->m_operation= table_io_operation_map[static_cast<int> (op)];
2619 wait->m_flags= 0;
2620 wait->m_object_type= share->get_object_type();
2621 wait->m_weak_table_share= share;
2622 wait->m_weak_version= share->get_version();
2623 wait->m_index= index;
2624 wait->m_source_file= src_file;
2625 wait->m_source_line= src_line;
2626 wait->m_wait_class= WAIT_CLASS_TABLE;
2627
2628 pfs_thread->m_events_waits_current++;
2629 }
2630 }
2631 else
2632 {
2633 if (pfs_table->m_io_timed)
2634 {
2635 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2636 state->m_timer_start= timer_start;
2637 flags= STATE_FLAG_TIMED;
2638 }
2639 else
2640 {
2641 /* TODO: consider a shortcut here */
2642 flags= 0;
2643 }
2644 }
2645
2646 state->m_flags= flags;
2647 state->m_table= table;
2648 state->m_io_operation= op;
2649 state->m_index= index;
2650 return reinterpret_cast<PSI_table_locker*> (state);
2651 }
2652
2653 /**
2654 Implementation of the table instrumentation interface.
2655 @sa PSI_v1::start_table_lock_wait.
2656 */
2657 static PSI_table_locker*
start_table_lock_wait_v1(PSI_table_locker_state * state,PSI_table * table,PSI_table_lock_operation op,ulong op_flags,const char * src_file,uint src_line)2658 start_table_lock_wait_v1(PSI_table_locker_state *state,
2659 PSI_table *table,
2660 PSI_table_lock_operation op,
2661 ulong op_flags,
2662 const char *src_file, uint src_line)
2663 {
2664 DBUG_ASSERT(state != NULL);
2665 DBUG_ASSERT((op == PSI_TABLE_LOCK) || (op == PSI_TABLE_EXTERNAL_LOCK));
2666
2667 PFS_table *pfs_table= reinterpret_cast<PFS_table*> (table);
2668
2669 DBUG_ASSERT(pfs_table != NULL);
2670 DBUG_ASSERT(pfs_table->m_share != NULL);
2671
2672 if (! pfs_table->m_lock_enabled)
2673 return NULL;
2674
2675 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2676
2677 PFS_TL_LOCK_TYPE lock_type;
2678
2679 switch (op)
2680 {
2681 case PSI_TABLE_LOCK:
2682 lock_type= lock_flags_to_lock_type(op_flags);
2683 break;
2684 case PSI_TABLE_EXTERNAL_LOCK:
2685 /*
2686 See the handler::external_lock() API design,
2687 there is no handler::external_unlock().
2688 */
2689 if (op_flags == F_UNLCK)
2690 return NULL;
2691 lock_type= external_lock_flags_to_lock_type(op_flags);
2692 break;
2693 default:
2694 lock_type= PFS_TL_READ;
2695 DBUG_ASSERT(false);
2696 }
2697
2698 DBUG_ASSERT((uint) lock_type < array_elements(table_lock_operation_map));
2699
2700 register uint flags;
2701 ulonglong timer_start= 0;
2702
2703 if (flag_thread_instrumentation)
2704 {
2705 if (pfs_thread == NULL)
2706 return NULL;
2707 if (! pfs_thread->m_enabled)
2708 return NULL;
2709 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2710 flags= STATE_FLAG_THREAD;
2711
2712 if (pfs_table->m_lock_timed)
2713 {
2714 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2715 state->m_timer_start= timer_start;
2716 flags|= STATE_FLAG_TIMED;
2717 }
2718
2719 if (flag_events_waits_current)
2720 {
2721 if (unlikely(pfs_thread->m_events_waits_current >=
2722 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2723 {
2724 locker_lost++;
2725 return NULL;
2726 }
2727 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2728 state->m_wait= wait;
2729 flags|= STATE_FLAG_EVENT;
2730
2731 PFS_events_waits *parent_event= wait - 1;
2732 wait->m_event_type= EVENT_TYPE_WAIT;
2733 wait->m_nesting_event_id= parent_event->m_event_id;
2734 wait->m_nesting_event_type= parent_event->m_event_type;
2735
2736 PFS_table_share *share= pfs_table->m_share;
2737 wait->m_thread= pfs_thread;
2738 wait->m_class= &global_table_lock_class;
2739 wait->m_timer_start= timer_start;
2740 wait->m_timer_end= 0;
2741 wait->m_object_instance_addr= pfs_table->m_identity;
2742 wait->m_event_id= pfs_thread->m_event_id++;
2743 wait->m_end_event_id= 0;
2744 wait->m_operation= table_lock_operation_map[lock_type];
2745 wait->m_flags= 0;
2746 wait->m_object_type= share->get_object_type();
2747 wait->m_weak_table_share= share;
2748 wait->m_weak_version= share->get_version();
2749 wait->m_index= 0;
2750 wait->m_source_file= src_file;
2751 wait->m_source_line= src_line;
2752 wait->m_wait_class= WAIT_CLASS_TABLE;
2753
2754 pfs_thread->m_events_waits_current++;
2755 }
2756 }
2757 else
2758 {
2759 if (pfs_table->m_lock_timed)
2760 {
2761 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2762 state->m_timer_start= timer_start;
2763 flags= STATE_FLAG_TIMED;
2764 }
2765 else
2766 {
2767 /* TODO: consider a shortcut here */
2768 flags= 0;
2769 }
2770 }
2771
2772 state->m_flags= flags;
2773 state->m_table= table;
2774 state->m_index= lock_type;
2775 return reinterpret_cast<PSI_table_locker*> (state);
2776 }
2777
2778 /**
2779 Implementation of the file instrumentation interface.
2780 @sa PSI_v1::get_thread_file_name_locker.
2781 */
2782 static PSI_file_locker*
get_thread_file_name_locker_v1(PSI_file_locker_state * state,PSI_file_key key,PSI_file_operation op,const char * name,const void * identity)2783 get_thread_file_name_locker_v1(PSI_file_locker_state *state,
2784 PSI_file_key key,
2785 PSI_file_operation op,
2786 const char *name, const void *identity)
2787 {
2788 DBUG_ASSERT(static_cast<int> (op) >= 0);
2789 DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
2790 DBUG_ASSERT(state != NULL);
2791
2792 if (! flag_global_instrumentation)
2793 return NULL;
2794 PFS_file_class *klass= find_file_class(key);
2795 if (unlikely(klass == NULL))
2796 return NULL;
2797 if (! klass->m_enabled)
2798 return NULL;
2799
2800 /* Needed for the LF_HASH */
2801 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2802 if (unlikely(pfs_thread == NULL))
2803 return NULL;
2804
2805 if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
2806 return NULL;
2807
2808 register uint flags;
2809
2810 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2811 flags= STATE_FLAG_THREAD;
2812
2813 if (klass->m_timed)
2814 flags|= STATE_FLAG_TIMED;
2815
2816 if (flag_events_waits_current)
2817 {
2818 if (unlikely(pfs_thread->m_events_waits_current >=
2819 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2820 {
2821 locker_lost++;
2822 return NULL;
2823 }
2824 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2825 state->m_wait= wait;
2826 flags|= STATE_FLAG_EVENT;
2827
2828 PFS_events_waits *parent_event= wait - 1;
2829 wait->m_event_type= EVENT_TYPE_WAIT;
2830 wait->m_nesting_event_id= parent_event->m_event_id;
2831 wait->m_nesting_event_type= parent_event->m_event_type;
2832
2833 wait->m_thread= pfs_thread;
2834 wait->m_class= klass;
2835 wait->m_timer_start= 0;
2836 wait->m_timer_end= 0;
2837 wait->m_object_instance_addr= NULL;
2838 wait->m_weak_file= NULL;
2839 wait->m_weak_version= 0;
2840 wait->m_event_id= pfs_thread->m_event_id++;
2841 wait->m_end_event_id= 0;
2842 wait->m_operation= file_operation_map[static_cast<int> (op)];
2843 wait->m_wait_class= WAIT_CLASS_FILE;
2844
2845 pfs_thread->m_events_waits_current++;
2846 }
2847
2848 state->m_flags= flags;
2849 state->m_file= NULL;
2850 state->m_name= name;
2851 state->m_class= klass;
2852 state->m_operation= op;
2853 return reinterpret_cast<PSI_file_locker*> (state);
2854 }
2855
2856 /**
2857 Implementation of the file instrumentation interface.
2858 @sa PSI_v1::get_thread_file_stream_locker.
2859 */
2860 static PSI_file_locker*
get_thread_file_stream_locker_v1(PSI_file_locker_state * state,PSI_file * file,PSI_file_operation op)2861 get_thread_file_stream_locker_v1(PSI_file_locker_state *state,
2862 PSI_file *file, PSI_file_operation op)
2863 {
2864 PFS_file *pfs_file= reinterpret_cast<PFS_file*> (file);
2865 DBUG_ASSERT(static_cast<int> (op) >= 0);
2866 DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
2867 DBUG_ASSERT(state != NULL);
2868
2869 if (unlikely(pfs_file == NULL))
2870 return NULL;
2871 DBUG_ASSERT(pfs_file->m_class != NULL);
2872 PFS_file_class *klass= pfs_file->m_class;
2873
2874 if (! pfs_file->m_enabled)
2875 return NULL;
2876
2877 register uint flags;
2878
2879 if (flag_thread_instrumentation)
2880 {
2881 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2882 if (unlikely(pfs_thread == NULL))
2883 return NULL;
2884 if (! pfs_thread->m_enabled)
2885 return NULL;
2886 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2887 flags= STATE_FLAG_THREAD;
2888
2889 if (pfs_file->m_timed)
2890 flags|= STATE_FLAG_TIMED;
2891
2892 if (flag_events_waits_current)
2893 {
2894 if (unlikely(pfs_thread->m_events_waits_current >=
2895 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2896 {
2897 locker_lost++;
2898 return NULL;
2899 }
2900 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2901 state->m_wait= wait;
2902 flags|= STATE_FLAG_EVENT;
2903
2904 PFS_events_waits *parent_event= wait - 1;
2905 wait->m_event_type= EVENT_TYPE_WAIT;
2906 wait->m_nesting_event_id= parent_event->m_event_id;
2907 wait->m_nesting_event_type= parent_event->m_event_type;
2908
2909 wait->m_thread= pfs_thread;
2910 wait->m_class= klass;
2911 wait->m_timer_start= 0;
2912 wait->m_timer_end= 0;
2913 wait->m_object_instance_addr= pfs_file;
2914 wait->m_weak_file= pfs_file;
2915 wait->m_weak_version= pfs_file->get_version();
2916 wait->m_event_id= pfs_thread->m_event_id++;
2917 wait->m_end_event_id= 0;
2918 wait->m_operation= file_operation_map[static_cast<int> (op)];
2919 wait->m_wait_class= WAIT_CLASS_FILE;
2920
2921 pfs_thread->m_events_waits_current++;
2922 }
2923 }
2924 else
2925 {
2926 state->m_thread= NULL;
2927 if (pfs_file->m_timed)
2928 {
2929 flags= STATE_FLAG_TIMED;
2930 }
2931 else
2932 {
2933 /* TODO: consider a shortcut. */
2934 flags= 0;
2935 }
2936 }
2937
2938 state->m_flags= flags;
2939 state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
2940 state->m_operation= op;
2941 state->m_name= NULL;
2942 state->m_class= klass;
2943 return reinterpret_cast<PSI_file_locker*> (state);
2944 }
2945
2946 /**
2947 Implementation of the file instrumentation interface.
2948 @sa PSI_v1::get_thread_file_descriptor_locker.
2949 */
2950 static PSI_file_locker*
get_thread_file_descriptor_locker_v1(PSI_file_locker_state * state,File file,PSI_file_operation op)2951 get_thread_file_descriptor_locker_v1(PSI_file_locker_state *state,
2952 File file, PSI_file_operation op)
2953 {
2954 int index= static_cast<int> (file);
2955 DBUG_ASSERT(static_cast<int> (op) >= 0);
2956 DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
2957 DBUG_ASSERT(state != NULL);
2958
2959 if (unlikely((index < 0) || (index >= file_handle_max)))
2960 return NULL;
2961
2962 PFS_file *pfs_file= file_handle_array[index];
2963 if (unlikely(pfs_file == NULL))
2964 return NULL;
2965
2966 /*
2967 We are about to close a file by descriptor number,
2968 and the calling code still holds the descriptor.
2969 Cleanup the file descriptor <--> file instrument association.
2970 Remove the instrumentation *before* the close to avoid race
2971 conditions with another thread opening a file
2972 (that could be given the same descriptor).
2973 */
2974 if (op == PSI_FILE_CLOSE)
2975 file_handle_array[index]= NULL;
2976
2977 if (! pfs_file->m_enabled)
2978 return NULL;
2979
2980 DBUG_ASSERT(pfs_file->m_class != NULL);
2981 PFS_file_class *klass= pfs_file->m_class;
2982
2983 register uint flags;
2984
2985 if (flag_thread_instrumentation)
2986 {
2987 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2988 if (unlikely(pfs_thread == NULL))
2989 return NULL;
2990 if (! pfs_thread->m_enabled)
2991 return NULL;
2992 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2993 flags= STATE_FLAG_THREAD;
2994
2995 if (pfs_file->m_timed)
2996 flags|= STATE_FLAG_TIMED;
2997
2998 if (flag_events_waits_current)
2999 {
3000 if (unlikely(pfs_thread->m_events_waits_current >=
3001 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
3002 {
3003 locker_lost++;
3004 return NULL;
3005 }
3006 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
3007 state->m_wait= wait;
3008 flags|= STATE_FLAG_EVENT;
3009
3010 PFS_events_waits *parent_event= wait - 1;
3011 wait->m_event_type= EVENT_TYPE_WAIT;
3012 wait->m_nesting_event_id= parent_event->m_event_id;
3013 wait->m_nesting_event_type= parent_event->m_event_type;
3014
3015 wait->m_thread= pfs_thread;
3016 wait->m_class= klass;
3017 wait->m_timer_start= 0;
3018 wait->m_timer_end= 0;
3019 wait->m_object_instance_addr= pfs_file;
3020 wait->m_weak_file= pfs_file;
3021 wait->m_weak_version= pfs_file->get_version();
3022 wait->m_event_id= pfs_thread->m_event_id++;
3023 wait->m_end_event_id= 0;
3024 wait->m_operation= file_operation_map[static_cast<int> (op)];
3025 wait->m_wait_class= WAIT_CLASS_FILE;
3026
3027 pfs_thread->m_events_waits_current++;
3028 }
3029 }
3030 else
3031 {
3032 state->m_thread= NULL;
3033 if (pfs_file->m_timed)
3034 {
3035 flags= STATE_FLAG_TIMED;
3036 }
3037 else
3038 {
3039 /* TODO: consider a shortcut. */
3040 flags= 0;
3041 }
3042 }
3043
3044 state->m_flags= flags;
3045 state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
3046 state->m_operation= op;
3047 state->m_name= NULL;
3048 state->m_class= klass;
3049 return reinterpret_cast<PSI_file_locker*> (state);
3050 }
3051
3052 /** Socket locker */
3053
3054 static PSI_socket_locker*
start_socket_wait_v1(PSI_socket_locker_state * state,PSI_socket * socket,PSI_socket_operation op,size_t count,const char * src_file,uint src_line)3055 start_socket_wait_v1(PSI_socket_locker_state *state,
3056 PSI_socket *socket,
3057 PSI_socket_operation op,
3058 size_t count,
3059 const char *src_file, uint src_line)
3060 {
3061 DBUG_ASSERT(static_cast<int> (op) >= 0);
3062 DBUG_ASSERT(static_cast<uint> (op) < array_elements(socket_operation_map));
3063 DBUG_ASSERT(state != NULL);
3064 PFS_socket *pfs_socket= reinterpret_cast<PFS_socket*> (socket);
3065
3066 DBUG_ASSERT(pfs_socket != NULL);
3067 DBUG_ASSERT(pfs_socket->m_class != NULL);
3068
3069 if (!pfs_socket->m_enabled || pfs_socket->m_idle)
3070 return NULL;
3071
3072 register uint flags= 0;
3073 ulonglong timer_start= 0;
3074
3075 if (flag_thread_instrumentation)
3076 {
3077 /*
3078 Do not use pfs_socket->m_thread_owner here,
3079 as different threads may use concurrently the same socket,
3080 for example during a KILL.
3081 */
3082 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
3083
3084 if (unlikely(pfs_thread == NULL))
3085 return NULL;
3086
3087 if (!pfs_thread->m_enabled)
3088 return NULL;
3089
3090 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
3091 flags= STATE_FLAG_THREAD;
3092
3093 if (pfs_socket->m_timed)
3094 {
3095 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
3096 state->m_timer_start= timer_start;
3097 flags|= STATE_FLAG_TIMED;
3098 }
3099
3100 if (flag_events_waits_current)
3101 {
3102 if (unlikely(pfs_thread->m_events_waits_current >=
3103 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
3104 {
3105 locker_lost++;
3106 return NULL;
3107 }
3108 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
3109 state->m_wait= wait;
3110 flags|= STATE_FLAG_EVENT;
3111
3112 PFS_events_waits *parent_event= wait - 1;
3113 wait->m_event_type= EVENT_TYPE_WAIT;
3114 wait->m_nesting_event_id= parent_event->m_event_id;
3115 wait->m_nesting_event_type= parent_event->m_event_type;
3116 wait->m_thread= pfs_thread;
3117 wait->m_class= pfs_socket->m_class;
3118 wait->m_timer_start= timer_start;
3119 wait->m_timer_end= 0;
3120 wait->m_object_instance_addr= pfs_socket->m_identity;
3121 wait->m_weak_socket= pfs_socket;
3122 wait->m_weak_version= pfs_socket->get_version();
3123 wait->m_event_id= pfs_thread->m_event_id++;
3124 wait->m_end_event_id= 0;
3125 wait->m_operation= socket_operation_map[static_cast<int>(op)];
3126 wait->m_source_file= src_file;
3127 wait->m_source_line= src_line;
3128 wait->m_number_of_bytes= count;
3129 wait->m_wait_class= WAIT_CLASS_SOCKET;
3130
3131 pfs_thread->m_events_waits_current++;
3132 }
3133 }
3134 else
3135 {
3136 if (pfs_socket->m_timed)
3137 {
3138 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
3139 state->m_timer_start= timer_start;
3140 flags= STATE_FLAG_TIMED;
3141 }
3142 else
3143 {
3144 /*
3145 Even if timing is disabled, end_socket_wait() still needs a locker to
3146 capture the number of bytes sent or received by the socket operation.
3147 For operations that do not have a byte count, then just increment the
3148 event counter and return a NULL locker.
3149 */
3150 switch (op)
3151 {
3152 case PSI_SOCKET_CONNECT:
3153 case PSI_SOCKET_CREATE:
3154 case PSI_SOCKET_BIND:
3155 case PSI_SOCKET_SEEK:
3156 case PSI_SOCKET_OPT:
3157 case PSI_SOCKET_STAT:
3158 case PSI_SOCKET_SHUTDOWN:
3159 case PSI_SOCKET_CLOSE:
3160 case PSI_SOCKET_SELECT:
3161 pfs_socket->m_socket_stat.m_io_stat.m_misc.aggregate_counted();
3162 return NULL;
3163 default:
3164 break;
3165 }
3166 }
3167 }
3168
3169 state->m_flags= flags;
3170 state->m_socket= socket;
3171 state->m_operation= op;
3172 return reinterpret_cast<PSI_socket_locker*> (state);
3173 }
3174
3175 /**
3176 Implementation of the mutex instrumentation interface.
3177 @sa PSI_v1::unlock_mutex.
3178 */
unlock_mutex_v1(PSI_mutex * mutex)3179 static void unlock_mutex_v1(PSI_mutex *mutex)
3180 {
3181 PFS_mutex *pfs_mutex= reinterpret_cast<PFS_mutex*> (mutex);
3182
3183 DBUG_ASSERT(pfs_mutex != NULL);
3184
3185 /*
3186 Note that this code is still protected by the instrumented mutex,
3187 and therefore is thread safe. See inline_mysql_mutex_unlock().
3188 */
3189
3190 /* Always update the instrumented state */
3191 pfs_mutex->m_owner= NULL;
3192 pfs_mutex->m_last_locked= 0;
3193
3194 #ifdef LATER_WL2333
3195 /*
3196 See WL#2333: SHOW ENGINE ... LOCK STATUS.
3197 PFS_mutex::m_lock_stat is not exposed in user visible tables
3198 currently, so there is no point spending time computing it.
3199 */
3200 if (! pfs_mutex->m_enabled)
3201 return;
3202
3203 if (! pfs_mutex->m_timed)
3204 return;
3205
3206 ulonglong locked_time;
3207 locked_time= get_timer_pico_value(wait_timer) - pfs_mutex->m_last_locked;
3208 pfs_mutex->m_mutex_stat.m_lock_stat.aggregate_value(locked_time);
3209 #endif
3210 }
3211
3212 /**
3213 Implementation of the rwlock instrumentation interface.
3214 @sa PSI_v1::unlock_rwlock.
3215 */
unlock_rwlock_v1(PSI_rwlock * rwlock)3216 static void unlock_rwlock_v1(PSI_rwlock *rwlock)
3217 {
3218 PFS_rwlock *pfs_rwlock= reinterpret_cast<PFS_rwlock*> (rwlock);
3219 DBUG_ASSERT(pfs_rwlock != NULL);
3220 DBUG_ASSERT(pfs_rwlock == sanitize_rwlock(pfs_rwlock));
3221 DBUG_ASSERT(pfs_rwlock->m_class != NULL);
3222 DBUG_ASSERT(pfs_rwlock->m_lock.is_populated());
3223
3224 bool last_writer= false;
3225 bool last_reader= false;
3226
3227 /*
3228 Note that this code is still protected by the instrumented rwlock,
3229 and therefore is:
3230 - thread safe for write locks
3231 - almost thread safe for read locks (pfs_rwlock->m_readers is unsafe).
3232 See inline_mysql_rwlock_unlock()
3233 */
3234
3235 /* Always update the instrumented state */
3236 if (pfs_rwlock->m_writer != NULL)
3237 {
3238 /* Nominal case, a writer is unlocking. */
3239 last_writer= true;
3240 pfs_rwlock->m_writer= NULL;
3241 /* Reset the readers stats, they could be off */
3242 pfs_rwlock->m_readers= 0;
3243 }
3244 else if (likely(pfs_rwlock->m_readers > 0))
3245 {
3246 /* Nominal case, a reader is unlocking. */
3247 if (--(pfs_rwlock->m_readers) == 0)
3248 last_reader= true;
3249 }
3250 else
3251 {
3252 /*
3253 Edge case, we have no writer and no readers,
3254 on an unlock event.
3255 This is possible for:
3256 - partial instrumentation
3257 - instrumentation disabled at runtime,
3258 see when get_thread_rwlock_locker_v1() returns NULL
3259 No further action is taken here, the next
3260 write lock will put the statistics is a valid state.
3261 */
3262 }
3263
3264 #ifdef LATER_WL2333
3265 /* See WL#2333: SHOW ENGINE ... LOCK STATUS. */
3266
3267 if (! pfs_rwlock->m_enabled)
3268 return;
3269
3270 if (! pfs_rwlock->m_timed)
3271 return;
3272
3273 ulonglong locked_time;
3274 if (last_writer)
3275 {
3276 locked_time= get_timer_pico_value(wait_timer) - pfs_rwlock->m_last_written;
3277 pfs_rwlock->m_rwlock_stat.m_write_lock_stat.aggregate_value(locked_time);
3278 }
3279 else if (last_reader)
3280 {
3281 locked_time= get_timer_pico_value(wait_timer) - pfs_rwlock->m_last_read;
3282 pfs_rwlock->m_rwlock_stat.m_read_lock_stat.aggregate_value(locked_time);
3283 }
3284 #else
3285 (void) last_reader;
3286 (void) last_writer;
3287 #endif
3288 }
3289
3290 /**
3291 Implementation of the cond instrumentation interface.
3292 @sa PSI_v1::signal_cond.
3293 */
signal_cond_v1(PSI_cond * cond)3294 static void signal_cond_v1(PSI_cond* cond)
3295 {
3296 PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
3297
3298 DBUG_ASSERT(pfs_cond != NULL);
3299
3300 pfs_cond->m_cond_stat.m_signal_count++;
3301 }
3302
3303 /**
3304 Implementation of the cond instrumentation interface.
3305 @sa PSI_v1::broadcast_cond.
3306 */
broadcast_cond_v1(PSI_cond * cond)3307 static void broadcast_cond_v1(PSI_cond* cond)
3308 {
3309 PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
3310
3311 DBUG_ASSERT(pfs_cond != NULL);
3312
3313 pfs_cond->m_cond_stat.m_broadcast_count++;
3314 }
3315
3316 /**
3317 Implementation of the idle instrumentation interface.
3318 @sa PSI_v1::start_idle_wait.
3319 */
3320 static PSI_idle_locker*
start_idle_wait_v1(PSI_idle_locker_state * state,const char * src_file,uint src_line)3321 start_idle_wait_v1(PSI_idle_locker_state* state, const char *src_file, uint src_line)
3322 {
3323 DBUG_ASSERT(state != NULL);
3324
3325 if (!flag_global_instrumentation)
3326 return NULL;
3327
3328 if (!global_idle_class.m_enabled)
3329 return NULL;
3330
3331 register uint flags= 0;
3332 ulonglong timer_start= 0;
3333
3334 if (flag_thread_instrumentation)
3335 {
3336 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
3337 if (unlikely(pfs_thread == NULL))
3338 return NULL;
3339 if (!pfs_thread->m_enabled)
3340 return NULL;
3341 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
3342 flags= STATE_FLAG_THREAD;
3343
3344 DBUG_ASSERT(pfs_thread->m_events_statements_count == 0);
3345
3346 if (global_idle_class.m_timed)
3347 {
3348 timer_start= get_timer_raw_value_and_function(idle_timer, &state->m_timer);
3349 state->m_timer_start= timer_start;
3350 flags|= STATE_FLAG_TIMED;
3351 }
3352
3353 if (flag_events_waits_current)
3354 {
3355 if (unlikely(pfs_thread->m_events_waits_current >=
3356 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
3357 {
3358 locker_lost++;
3359 return NULL;
3360 }
3361 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
3362 state->m_wait= wait;
3363 flags|= STATE_FLAG_EVENT;
3364
3365 wait->m_event_type= EVENT_TYPE_WAIT;
3366 /*
3367 IDLE events are waits, but by definition we know that
3368 such waits happen outside of any STAGE and STATEMENT,
3369 so they have no parents.
3370 */
3371 wait->m_nesting_event_id= 0;
3372 /* no need to set wait->m_nesting_event_type */
3373
3374 wait->m_thread= pfs_thread;
3375 wait->m_class= &global_idle_class;
3376 wait->m_timer_start= timer_start;
3377 wait->m_timer_end= 0;
3378 wait->m_event_id= pfs_thread->m_event_id++;
3379 wait->m_end_event_id= 0;
3380 wait->m_operation= OPERATION_TYPE_IDLE;
3381 wait->m_source_file= src_file;
3382 wait->m_source_line= src_line;
3383 wait->m_wait_class= WAIT_CLASS_IDLE;
3384
3385 pfs_thread->m_events_waits_current++;
3386 }
3387 }
3388 else
3389 {
3390 if (global_idle_class.m_timed)
3391 {
3392 timer_start= get_timer_raw_value_and_function(idle_timer, &state->m_timer);
3393 state->m_timer_start= timer_start;
3394 flags= STATE_FLAG_TIMED;
3395 }
3396 }
3397
3398 state->m_flags= flags;
3399 return reinterpret_cast<PSI_idle_locker*> (state);
3400 }
3401
3402 /**
3403 Implementation of the mutex instrumentation interface.
3404 @sa PSI_v1::end_idle_wait.
3405 */
end_idle_wait_v1(PSI_idle_locker * locker)3406 static void end_idle_wait_v1(PSI_idle_locker* locker)
3407 {
3408 PSI_idle_locker_state *state= reinterpret_cast<PSI_idle_locker_state*> (locker);
3409 DBUG_ASSERT(state != NULL);
3410 ulonglong timer_end= 0;
3411 ulonglong wait_time= 0;
3412
3413 register uint flags= state->m_flags;
3414
3415 if (flags & STATE_FLAG_TIMED)
3416 {
3417 timer_end= state->m_timer();
3418 wait_time= timer_end - state->m_timer_start;
3419 }
3420
3421 if (flags & STATE_FLAG_THREAD)
3422 {
3423 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3424 PFS_single_stat *event_name_array;
3425 event_name_array= thread->m_instr_class_waits_stats;
3426
3427 if (flags & STATE_FLAG_TIMED)
3428 {
3429 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3430 event_name_array[GLOBAL_IDLE_EVENT_INDEX].aggregate_value(wait_time);
3431 }
3432 else
3433 {
3434 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3435 event_name_array[GLOBAL_IDLE_EVENT_INDEX].aggregate_counted();
3436 }
3437
3438 if (flags & STATE_FLAG_EVENT)
3439 {
3440 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3441 DBUG_ASSERT(wait != NULL);
3442
3443 wait->m_timer_end= timer_end;
3444 wait->m_end_event_id= thread->m_event_id;
3445 if (flag_events_waits_history)
3446 insert_events_waits_history(thread, wait);
3447 if (flag_events_waits_history_long)
3448 insert_events_waits_history_long(wait);
3449 thread->m_events_waits_current--;
3450
3451 DBUG_ASSERT(wait == thread->m_events_waits_current);
3452 }
3453 }
3454
3455 if (flags & STATE_FLAG_TIMED)
3456 {
3457 /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME (timed) */
3458 global_idle_stat.aggregate_value(wait_time);
3459 }
3460 else
3461 {
3462 /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME (counted) */
3463 global_idle_stat.aggregate_counted();
3464 }
3465 }
3466
3467 /**
3468 Implementation of the mutex instrumentation interface.
3469 @sa PSI_v1::end_mutex_wait.
3470 */
end_mutex_wait_v1(PSI_mutex_locker * locker,int rc)3471 static void end_mutex_wait_v1(PSI_mutex_locker* locker, int rc)
3472 {
3473 PSI_mutex_locker_state *state= reinterpret_cast<PSI_mutex_locker_state*> (locker);
3474 DBUG_ASSERT(state != NULL);
3475
3476 ulonglong timer_end= 0;
3477 ulonglong wait_time= 0;
3478
3479 PFS_mutex *mutex= reinterpret_cast<PFS_mutex *> (state->m_mutex);
3480 DBUG_ASSERT(mutex != NULL);
3481 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3482
3483 register uint flags= state->m_flags;
3484
3485 if (flags & STATE_FLAG_TIMED)
3486 {
3487 timer_end= state->m_timer();
3488 wait_time= timer_end - state->m_timer_start;
3489 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3490 mutex->m_mutex_stat.m_wait_stat.aggregate_value(wait_time);
3491 }
3492 else
3493 {
3494 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3495 mutex->m_mutex_stat.m_wait_stat.aggregate_counted();
3496 }
3497
3498 if (likely(rc == 0))
3499 {
3500 mutex->m_owner= thread;
3501 mutex->m_last_locked= timer_end;
3502 }
3503
3504 if (flags & STATE_FLAG_THREAD)
3505 {
3506 PFS_single_stat *event_name_array;
3507 event_name_array= thread->m_instr_class_waits_stats;
3508 uint index= mutex->m_class->m_event_name_index;
3509
3510 if (flags & STATE_FLAG_TIMED)
3511 {
3512 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3513 event_name_array[index].aggregate_value(wait_time);
3514 }
3515 else
3516 {
3517 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3518 event_name_array[index].aggregate_counted();
3519 }
3520
3521 if (flags & STATE_FLAG_EVENT)
3522 {
3523 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3524 DBUG_ASSERT(wait != NULL);
3525
3526 wait->m_timer_end= timer_end;
3527 wait->m_end_event_id= thread->m_event_id;
3528 if (flag_events_waits_history)
3529 insert_events_waits_history(thread, wait);
3530 if (flag_events_waits_history_long)
3531 insert_events_waits_history_long(wait);
3532 thread->m_events_waits_current--;
3533
3534 DBUG_ASSERT(wait == thread->m_events_waits_current);
3535 }
3536 }
3537 }
3538
3539 /**
3540 Implementation of the rwlock instrumentation interface.
3541 @sa PSI_v1::end_rwlock_rdwait.
3542 */
end_rwlock_rdwait_v1(PSI_rwlock_locker * locker,int rc)3543 static void end_rwlock_rdwait_v1(PSI_rwlock_locker* locker, int rc)
3544 {
3545 PSI_rwlock_locker_state *state= reinterpret_cast<PSI_rwlock_locker_state*> (locker);
3546 DBUG_ASSERT(state != NULL);
3547
3548 ulonglong timer_end= 0;
3549 ulonglong wait_time= 0;
3550
3551 PFS_rwlock *rwlock= reinterpret_cast<PFS_rwlock *> (state->m_rwlock);
3552 DBUG_ASSERT(rwlock != NULL);
3553
3554 if (state->m_flags & STATE_FLAG_TIMED)
3555 {
3556 timer_end= state->m_timer();
3557 wait_time= timer_end - state->m_timer_start;
3558 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3559 rwlock->m_rwlock_stat.m_wait_stat.aggregate_value(wait_time);
3560 }
3561 else
3562 {
3563 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3564 rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
3565 }
3566
3567 if (rc == 0)
3568 {
3569 /*
3570 Warning:
3571 Multiple threads can execute this section concurrently
3572 (since multiple readers can execute in parallel).
3573 The statistics generated are not safe, which is why they are
3574 just statistics, not facts.
3575 */
3576 if (rwlock->m_readers == 0)
3577 rwlock->m_last_read= timer_end;
3578 rwlock->m_writer= NULL;
3579 rwlock->m_readers++;
3580 }
3581
3582 if (state->m_flags & STATE_FLAG_THREAD)
3583 {
3584 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3585 DBUG_ASSERT(thread != NULL);
3586
3587 PFS_single_stat *event_name_array;
3588 event_name_array= thread->m_instr_class_waits_stats;
3589 uint index= rwlock->m_class->m_event_name_index;
3590
3591 if (state->m_flags & STATE_FLAG_TIMED)
3592 {
3593 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3594 event_name_array[index].aggregate_value(wait_time);
3595 }
3596 else
3597 {
3598 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3599 event_name_array[index].aggregate_counted();
3600 }
3601
3602 if (state->m_flags & STATE_FLAG_EVENT)
3603 {
3604 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3605 DBUG_ASSERT(wait != NULL);
3606
3607 wait->m_timer_end= timer_end;
3608 wait->m_end_event_id= thread->m_event_id;
3609 if (flag_events_waits_history)
3610 insert_events_waits_history(thread, wait);
3611 if (flag_events_waits_history_long)
3612 insert_events_waits_history_long(wait);
3613 thread->m_events_waits_current--;
3614
3615 DBUG_ASSERT(wait == thread->m_events_waits_current);
3616 }
3617 }
3618 }
3619
3620 /**
3621 Implementation of the rwlock instrumentation interface.
3622 @sa PSI_v1::end_rwlock_wrwait.
3623 */
end_rwlock_wrwait_v1(PSI_rwlock_locker * locker,int rc)3624 static void end_rwlock_wrwait_v1(PSI_rwlock_locker* locker, int rc)
3625 {
3626 PSI_rwlock_locker_state *state= reinterpret_cast<PSI_rwlock_locker_state*> (locker);
3627 DBUG_ASSERT(state != NULL);
3628
3629 ulonglong timer_end= 0;
3630 ulonglong wait_time= 0;
3631
3632 PFS_rwlock *rwlock= reinterpret_cast<PFS_rwlock *> (state->m_rwlock);
3633 DBUG_ASSERT(rwlock != NULL);
3634 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3635
3636 if (state->m_flags & STATE_FLAG_TIMED)
3637 {
3638 timer_end= state->m_timer();
3639 wait_time= timer_end - state->m_timer_start;
3640 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3641 rwlock->m_rwlock_stat.m_wait_stat.aggregate_value(wait_time);
3642 }
3643 else
3644 {
3645 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3646 rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
3647 }
3648
3649 if (likely(rc == 0))
3650 {
3651 /* Thread safe : we are protected by the instrumented rwlock */
3652 rwlock->m_writer= thread;
3653 rwlock->m_last_written= timer_end;
3654 /* Reset the readers stats, they could be off */
3655 rwlock->m_readers= 0;
3656 rwlock->m_last_read= 0;
3657 }
3658
3659 if (state->m_flags & STATE_FLAG_THREAD)
3660 {
3661 PFS_single_stat *event_name_array;
3662 event_name_array= thread->m_instr_class_waits_stats;
3663 uint index= rwlock->m_class->m_event_name_index;
3664
3665 if (state->m_flags & STATE_FLAG_TIMED)
3666 {
3667 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3668 event_name_array[index].aggregate_value(wait_time);
3669 }
3670 else
3671 {
3672 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3673 event_name_array[index].aggregate_counted();
3674 }
3675
3676 if (state->m_flags & STATE_FLAG_EVENT)
3677 {
3678 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3679 DBUG_ASSERT(wait != NULL);
3680
3681 wait->m_timer_end= timer_end;
3682 wait->m_end_event_id= thread->m_event_id;
3683 if (flag_events_waits_history)
3684 insert_events_waits_history(thread, wait);
3685 if (flag_events_waits_history_long)
3686 insert_events_waits_history_long(wait);
3687 thread->m_events_waits_current--;
3688
3689 DBUG_ASSERT(wait == thread->m_events_waits_current);
3690 }
3691 }
3692 }
3693
3694 /**
3695 Implementation of the cond instrumentation interface.
3696 @sa PSI_v1::end_cond_wait.
3697 */
end_cond_wait_v1(PSI_cond_locker * locker,int rc)3698 static void end_cond_wait_v1(PSI_cond_locker* locker, int rc)
3699 {
3700 PSI_cond_locker_state *state= reinterpret_cast<PSI_cond_locker_state*> (locker);
3701 DBUG_ASSERT(state != NULL);
3702
3703 ulonglong timer_end= 0;
3704 ulonglong wait_time= 0;
3705
3706 PFS_cond *cond= reinterpret_cast<PFS_cond *> (state->m_cond);
3707 /* PFS_mutex *mutex= reinterpret_cast<PFS_mutex *> (state->m_mutex); */
3708
3709 if (state->m_flags & STATE_FLAG_TIMED)
3710 {
3711 timer_end= state->m_timer();
3712 wait_time= timer_end - state->m_timer_start;
3713 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3714 cond->m_cond_stat.m_wait_stat.aggregate_value(wait_time);
3715 }
3716 else
3717 {
3718 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3719 cond->m_cond_stat.m_wait_stat.aggregate_counted();
3720 }
3721
3722 if (state->m_flags & STATE_FLAG_THREAD)
3723 {
3724 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3725 DBUG_ASSERT(thread != NULL);
3726
3727 PFS_single_stat *event_name_array;
3728 event_name_array= thread->m_instr_class_waits_stats;
3729 uint index= cond->m_class->m_event_name_index;
3730
3731 if (state->m_flags & STATE_FLAG_TIMED)
3732 {
3733 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3734 event_name_array[index].aggregate_value(wait_time);
3735 }
3736 else
3737 {
3738 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3739 event_name_array[index].aggregate_counted();
3740 }
3741
3742 if (state->m_flags & STATE_FLAG_EVENT)
3743 {
3744 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3745 DBUG_ASSERT(wait != NULL);
3746
3747 wait->m_timer_end= timer_end;
3748 wait->m_end_event_id= thread->m_event_id;
3749 if (flag_events_waits_history)
3750 insert_events_waits_history(thread, wait);
3751 if (flag_events_waits_history_long)
3752 insert_events_waits_history_long(wait);
3753 thread->m_events_waits_current--;
3754
3755 DBUG_ASSERT(wait == thread->m_events_waits_current);
3756 }
3757 }
3758 }
3759
3760 /**
3761 Implementation of the table instrumentation interface.
3762 @sa PSI_v1::end_table_io_wait.
3763 */
end_table_io_wait_v1(PSI_table_locker * locker)3764 static void end_table_io_wait_v1(PSI_table_locker* locker)
3765 {
3766 PSI_table_locker_state *state= reinterpret_cast<PSI_table_locker_state*> (locker);
3767 DBUG_ASSERT(state != NULL);
3768
3769 ulonglong timer_end= 0;
3770 ulonglong wait_time= 0;
3771
3772 PFS_table *table= reinterpret_cast<PFS_table *> (state->m_table);
3773 DBUG_ASSERT(table != NULL);
3774
3775 PFS_single_stat *stat;
3776 PFS_table_io_stat *table_io_stat;
3777
3778 DBUG_ASSERT((state->m_index < table->m_share->m_key_count) ||
3779 (state->m_index == MAX_INDEXES));
3780
3781 table_io_stat= & table->m_table_stat.m_index_stat[state->m_index];
3782 table_io_stat->m_has_data= true;
3783
3784 switch (state->m_io_operation)
3785 {
3786 case PSI_TABLE_FETCH_ROW:
3787 stat= & table_io_stat->m_fetch;
3788 break;
3789 case PSI_TABLE_WRITE_ROW:
3790 stat= & table_io_stat->m_insert;
3791 break;
3792 case PSI_TABLE_UPDATE_ROW:
3793 stat= & table_io_stat->m_update;
3794 break;
3795 case PSI_TABLE_DELETE_ROW:
3796 stat= & table_io_stat->m_delete;
3797 break;
3798 default:
3799 DBUG_ASSERT(false);
3800 stat= NULL;
3801 break;
3802 }
3803
3804 register uint flags= state->m_flags;
3805
3806 if (flags & STATE_FLAG_TIMED)
3807 {
3808 timer_end= state->m_timer();
3809 wait_time= timer_end - state->m_timer_start;
3810 stat->aggregate_value(wait_time);
3811 }
3812 else
3813 {
3814 stat->aggregate_counted();
3815 }
3816
3817 if (flags & STATE_FLAG_THREAD)
3818 {
3819 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3820 DBUG_ASSERT(thread != NULL);
3821
3822 PFS_single_stat *event_name_array;
3823 event_name_array= thread->m_instr_class_waits_stats;
3824
3825 /*
3826 Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
3827 (for wait/io/table/sql/handler)
3828 */
3829 if (flags & STATE_FLAG_TIMED)
3830 {
3831 event_name_array[GLOBAL_TABLE_IO_EVENT_INDEX].aggregate_value(wait_time);
3832 }
3833 else
3834 {
3835 event_name_array[GLOBAL_TABLE_IO_EVENT_INDEX].aggregate_counted();
3836 }
3837
3838 if (flags & STATE_FLAG_EVENT)
3839 {
3840 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3841 DBUG_ASSERT(wait != NULL);
3842
3843 wait->m_timer_end= timer_end;
3844 wait->m_end_event_id= thread->m_event_id;
3845 if (flag_events_waits_history)
3846 insert_events_waits_history(thread, wait);
3847 if (flag_events_waits_history_long)
3848 insert_events_waits_history_long(wait);
3849 thread->m_events_waits_current--;
3850
3851 DBUG_ASSERT(wait == thread->m_events_waits_current);
3852 }
3853 }
3854
3855 table->m_has_io_stats= true;
3856 }
3857
3858 /**
3859 Implementation of the table instrumentation interface.
3860 @sa PSI_v1::end_table_lock_wait.
3861 */
end_table_lock_wait_v1(PSI_table_locker * locker)3862 static void end_table_lock_wait_v1(PSI_table_locker* locker)
3863 {
3864 PSI_table_locker_state *state= reinterpret_cast<PSI_table_locker_state*> (locker);
3865 DBUG_ASSERT(state != NULL);
3866
3867 ulonglong timer_end= 0;
3868 ulonglong wait_time= 0;
3869
3870 PFS_table *table= reinterpret_cast<PFS_table *> (state->m_table);
3871 DBUG_ASSERT(table != NULL);
3872
3873 PFS_single_stat *stat= & table->m_table_stat.m_lock_stat.m_stat[state->m_index];
3874
3875 register uint flags= state->m_flags;
3876
3877 if (flags & STATE_FLAG_TIMED)
3878 {
3879 timer_end= state->m_timer();
3880 wait_time= timer_end - state->m_timer_start;
3881 stat->aggregate_value(wait_time);
3882 }
3883 else
3884 {
3885 stat->aggregate_counted();
3886 }
3887
3888 if (flags & STATE_FLAG_THREAD)
3889 {
3890 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3891 DBUG_ASSERT(thread != NULL);
3892
3893 PFS_single_stat *event_name_array;
3894 event_name_array= thread->m_instr_class_waits_stats;
3895
3896 /*
3897 Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
3898 (for wait/lock/table/sql/handler)
3899 */
3900 if (flags & STATE_FLAG_TIMED)
3901 {
3902 event_name_array[GLOBAL_TABLE_LOCK_EVENT_INDEX].aggregate_value(wait_time);
3903 }
3904 else
3905 {
3906 event_name_array[GLOBAL_TABLE_LOCK_EVENT_INDEX].aggregate_counted();
3907 }
3908
3909 if (flags & STATE_FLAG_EVENT)
3910 {
3911 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3912 DBUG_ASSERT(wait != NULL);
3913
3914 wait->m_timer_end= timer_end;
3915 wait->m_end_event_id= thread->m_event_id;
3916 if (flag_events_waits_history)
3917 insert_events_waits_history(thread, wait);
3918 if (flag_events_waits_history_long)
3919 insert_events_waits_history_long(wait);
3920 thread->m_events_waits_current--;
3921
3922 DBUG_ASSERT(wait == thread->m_events_waits_current);
3923 }
3924 }
3925
3926 table->m_has_lock_stats= true;
3927 }
3928
3929 static void start_file_wait_v1(PSI_file_locker *locker,
3930 size_t count,
3931 const char *src_file,
3932 uint src_line);
3933
3934 static void end_file_wait_v1(PSI_file_locker *locker,
3935 size_t count);
3936
3937 /**
3938 Implementation of the file instrumentation interface.
3939 @sa PSI_v1::start_file_open_wait.
3940 */
start_file_open_wait_v1(PSI_file_locker * locker,const char * src_file,uint src_line)3941 static void start_file_open_wait_v1(PSI_file_locker *locker,
3942 const char *src_file,
3943 uint src_line)
3944 {
3945 start_file_wait_v1(locker, 0, src_file, src_line);
3946
3947 return;
3948 }
3949
3950 /**
3951 Implementation of the file instrumentation interface.
3952 @sa PSI_v1::end_file_open_wait.
3953 */
end_file_open_wait_v1(PSI_file_locker * locker,void * result)3954 static PSI_file* end_file_open_wait_v1(PSI_file_locker *locker,
3955 void *result)
3956 {
3957 PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
3958 DBUG_ASSERT(state != NULL);
3959
3960 switch (state->m_operation)
3961 {
3962 case PSI_FILE_STAT:
3963 case PSI_FILE_RENAME:
3964 break;
3965 case PSI_FILE_STREAM_OPEN:
3966 case PSI_FILE_CREATE:
3967 case PSI_FILE_OPEN:
3968 if (result != NULL)
3969 {
3970 PFS_file_class *klass= reinterpret_cast<PFS_file_class*> (state->m_class);
3971 PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
3972 const char *name= state->m_name;
3973 uint len= strlen(name);
3974 PFS_file *pfs_file= find_or_create_file(thread, klass, name, len, true);
3975 state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
3976 }
3977 break;
3978 default:
3979 DBUG_ASSERT(false);
3980 break;
3981 }
3982
3983 end_file_wait_v1(locker, 0);
3984
3985 return state->m_file;
3986 }
3987
3988 /**
3989 Implementation of the file instrumentation interface.
3990 @sa PSI_v1::end_file_open_wait_and_bind_to_descriptor.
3991 */
end_file_open_wait_and_bind_to_descriptor_v1(PSI_file_locker * locker,File file)3992 static void end_file_open_wait_and_bind_to_descriptor_v1
3993 (PSI_file_locker *locker, File file)
3994 {
3995 PFS_file *pfs_file= NULL;
3996 int index= (int) file;
3997 PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
3998 DBUG_ASSERT(state != NULL);
3999
4000 if (index >= 0)
4001 {
4002 PFS_file_class *klass= reinterpret_cast<PFS_file_class*> (state->m_class);
4003 PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
4004 const char *name= state->m_name;
4005 uint len= strlen(name);
4006 pfs_file= find_or_create_file(thread, klass, name, len, true);
4007 state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
4008 }
4009
4010 end_file_wait_v1(locker, 0);
4011
4012 if (likely(index >= 0))
4013 {
4014 if (likely(index < file_handle_max))
4015 file_handle_array[index]= pfs_file;
4016 else
4017 {
4018 if (pfs_file != NULL)
4019 release_file(pfs_file);
4020 file_handle_lost++;
4021 }
4022 }
4023 }
4024
4025 /**
4026 Implementation of the file instrumentation interface.
4027 @sa PSI_v1::start_file_wait.
4028 */
start_file_wait_v1(PSI_file_locker * locker,size_t count,const char * src_file,uint src_line)4029 static void start_file_wait_v1(PSI_file_locker *locker,
4030 size_t count,
4031 const char *src_file,
4032 uint src_line)
4033 {
4034 ulonglong timer_start= 0;
4035 PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4036 DBUG_ASSERT(state != NULL);
4037
4038 register uint flags= state->m_flags;
4039
4040 if (flags & STATE_FLAG_TIMED)
4041 {
4042 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
4043 state->m_timer_start= timer_start;
4044 }
4045
4046 if (flags & STATE_FLAG_EVENT)
4047 {
4048 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
4049 DBUG_ASSERT(wait != NULL);
4050
4051 wait->m_timer_start= timer_start;
4052 wait->m_source_file= src_file;
4053 wait->m_source_line= src_line;
4054 wait->m_number_of_bytes= count;
4055 }
4056 }
4057
4058 /**
4059 Implementation of the file instrumentation interface.
4060 @sa PSI_v1::end_file_wait.
4061 */
end_file_wait_v1(PSI_file_locker * locker,size_t byte_count)4062 static void end_file_wait_v1(PSI_file_locker *locker,
4063 size_t byte_count)
4064 {
4065 PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4066 DBUG_ASSERT(state != NULL);
4067 PFS_file *file= reinterpret_cast<PFS_file *> (state->m_file);
4068 PFS_file_class *klass= reinterpret_cast<PFS_file_class *> (state->m_class);
4069 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
4070
4071 ulonglong timer_end= 0;
4072 ulonglong wait_time= 0;
4073 PFS_byte_stat *byte_stat;
4074 register uint flags= state->m_flags;
4075 size_t bytes= ((int)byte_count > -1 ? byte_count : 0);
4076
4077 PFS_file_stat *file_stat;
4078
4079 if (file != NULL)
4080 {
4081 file_stat= & file->m_file_stat;
4082 }
4083 else
4084 {
4085 file_stat= & klass->m_file_stat;
4086 }
4087
4088 switch (state->m_operation)
4089 {
4090 /* Group read operations */
4091 case PSI_FILE_READ:
4092 byte_stat= &file_stat->m_io_stat.m_read;
4093 break;
4094 /* Group write operations */
4095 case PSI_FILE_WRITE:
4096 byte_stat= &file_stat->m_io_stat.m_write;
4097 break;
4098 /* Group remaining operations as miscellaneous */
4099 case PSI_FILE_CREATE:
4100 case PSI_FILE_CREATE_TMP:
4101 case PSI_FILE_OPEN:
4102 case PSI_FILE_STREAM_OPEN:
4103 case PSI_FILE_STREAM_CLOSE:
4104 case PSI_FILE_SEEK:
4105 case PSI_FILE_TELL:
4106 case PSI_FILE_FLUSH:
4107 case PSI_FILE_FSTAT:
4108 case PSI_FILE_CHSIZE:
4109 case PSI_FILE_DELETE:
4110 case PSI_FILE_RENAME:
4111 case PSI_FILE_SYNC:
4112 case PSI_FILE_STAT:
4113 case PSI_FILE_CLOSE:
4114 byte_stat= &file_stat->m_io_stat.m_misc;
4115 break;
4116 default:
4117 DBUG_ASSERT(false);
4118 byte_stat= NULL;
4119 break;
4120 }
4121
4122 /* Aggregation for EVENTS_WAITS_SUMMARY_BY_INSTANCE */
4123 if (flags & STATE_FLAG_TIMED)
4124 {
4125 timer_end= state->m_timer();
4126 wait_time= timer_end - state->m_timer_start;
4127 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
4128 byte_stat->aggregate(wait_time, bytes);
4129 }
4130 else
4131 {
4132 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
4133 byte_stat->aggregate_counted(bytes);
4134 }
4135
4136 if (flags & STATE_FLAG_THREAD)
4137 {
4138 DBUG_ASSERT(thread != NULL);
4139
4140 PFS_single_stat *event_name_array;
4141 event_name_array= thread->m_instr_class_waits_stats;
4142 uint index= klass->m_event_name_index;
4143
4144 if (flags & STATE_FLAG_TIMED)
4145 {
4146 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
4147 event_name_array[index].aggregate_value(wait_time);
4148 }
4149 else
4150 {
4151 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
4152 event_name_array[index].aggregate_counted();
4153 }
4154
4155 if (state->m_flags & STATE_FLAG_EVENT)
4156 {
4157 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
4158 DBUG_ASSERT(wait != NULL);
4159
4160 wait->m_timer_end= timer_end;
4161 wait->m_number_of_bytes= bytes;
4162 wait->m_end_event_id= thread->m_event_id;
4163 wait->m_object_instance_addr= file;
4164 wait->m_weak_file= file;
4165 wait->m_weak_version= (file ? file->get_version() : 0);
4166
4167 if (flag_events_waits_history)
4168 insert_events_waits_history(thread, wait);
4169 if (flag_events_waits_history_long)
4170 insert_events_waits_history_long(wait);
4171 thread->m_events_waits_current--;
4172
4173 DBUG_ASSERT(wait == thread->m_events_waits_current);
4174 }
4175 }
4176 }
4177
4178 /**
4179 Implementation of the file instrumentation interface.
4180 @sa PSI_v1::start_file_close_wait.
4181 */
start_file_close_wait_v1(PSI_file_locker * locker,const char * src_file,uint src_line)4182 static void start_file_close_wait_v1(PSI_file_locker *locker,
4183 const char *src_file,
4184 uint src_line)
4185 {
4186 PFS_thread *thread;
4187 const char *name;
4188 uint len;
4189 PFS_file *pfs_file;
4190 PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4191 DBUG_ASSERT(state != NULL);
4192
4193 switch (state->m_operation)
4194 {
4195 case PSI_FILE_DELETE:
4196 thread= reinterpret_cast<PFS_thread*> (state->m_thread);
4197 name= state->m_name;
4198 len= strlen(name);
4199 pfs_file= find_or_create_file(thread, NULL, name, len, false);
4200 state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
4201 break;
4202 case PSI_FILE_STREAM_CLOSE:
4203 case PSI_FILE_CLOSE:
4204 break;
4205 default:
4206 DBUG_ASSERT(false);
4207 break;
4208 }
4209
4210 start_file_wait_v1(locker, 0, src_file, src_line);
4211
4212 return;
4213 }
4214
4215 /**
4216 Implementation of the file instrumentation interface.
4217 @sa PSI_v1::end_file_close_wait.
4218 */
end_file_close_wait_v1(PSI_file_locker * locker,int rc)4219 static void end_file_close_wait_v1(PSI_file_locker *locker, int rc)
4220 {
4221 PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4222 DBUG_ASSERT(state != NULL);
4223
4224 end_file_wait_v1(locker, 0);
4225
4226 if (rc == 0)
4227 {
4228 PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
4229 PFS_file *file= reinterpret_cast<PFS_file*> (state->m_file);
4230
4231 /* Release or destroy the file if necessary */
4232 switch(state->m_operation)
4233 {
4234 case PSI_FILE_CLOSE:
4235 case PSI_FILE_STREAM_CLOSE:
4236 if (file != NULL)
4237 release_file(file);
4238 break;
4239 case PSI_FILE_DELETE:
4240 if (file != NULL)
4241 destroy_file(thread, file);
4242 break;
4243 default:
4244 DBUG_ASSERT(false);
4245 break;
4246 }
4247 }
4248 return;
4249 }
4250
start_stage_v1(PSI_stage_key key,const char * src_file,int src_line)4251 static void start_stage_v1(PSI_stage_key key, const char *src_file, int src_line)
4252 {
4253 ulonglong timer_value= 0;
4254
4255 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4256 if (unlikely(pfs_thread == NULL))
4257 return;
4258
4259 /* Always update column threads.processlist_state. */
4260 pfs_thread->m_stage= key;
4261
4262 if (! flag_global_instrumentation)
4263 return;
4264
4265 if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
4266 return;
4267
4268 PFS_events_stages *pfs= & pfs_thread->m_stage_current;
4269 PFS_events_waits *child_wait= & pfs_thread->m_events_waits_stack[0];
4270 PFS_events_statements *parent_statement= & pfs_thread->m_statement_stack[0];
4271
4272 PFS_instr_class *old_class= pfs->m_class;
4273 if (old_class != NULL)
4274 {
4275 PFS_stage_stat *event_name_array;
4276 event_name_array= pfs_thread->m_instr_class_stages_stats;
4277 uint index= old_class->m_event_name_index;
4278
4279 /* Finish old event */
4280 if (old_class->m_timed)
4281 {
4282 timer_value= get_timer_raw_value(stage_timer);;
4283 pfs->m_timer_end= timer_value;
4284
4285 /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
4286 ulonglong stage_time= timer_value - pfs->m_timer_start;
4287 event_name_array[index].aggregate_value(stage_time);
4288 }
4289 else
4290 {
4291 /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
4292 event_name_array[index].aggregate_counted();
4293 }
4294
4295 if (flag_events_stages_current)
4296 {
4297 pfs->m_end_event_id= pfs_thread->m_event_id;
4298 if (flag_events_stages_history)
4299 insert_events_stages_history(pfs_thread, pfs);
4300 if (flag_events_stages_history_long)
4301 insert_events_stages_history_long(pfs);
4302 }
4303
4304 /* This stage event is now complete. */
4305 pfs->m_class= NULL;
4306
4307 /* New waits will now be attached directly to the parent statement. */
4308 child_wait->m_event_id= parent_statement->m_event_id;
4309 child_wait->m_event_type= parent_statement->m_event_type;
4310 /* See below for new stages, that may overwrite this. */
4311 }
4312
4313 /* Start new event */
4314
4315 PFS_stage_class *new_klass= find_stage_class(key);
4316 if (unlikely(new_klass == NULL))
4317 return;
4318
4319 if (! new_klass->m_enabled)
4320 return;
4321
4322 pfs->m_class= new_klass;
4323 if (new_klass->m_timed)
4324 {
4325 /*
4326 Do not call the timer again if we have a
4327 TIMER_END for the previous stage already.
4328 */
4329 if (timer_value == 0)
4330 timer_value= get_timer_raw_value(stage_timer);
4331 pfs->m_timer_start= timer_value;
4332 }
4333 else
4334 pfs->m_timer_start= 0;
4335 pfs->m_timer_end= 0;
4336
4337 if (flag_events_stages_current)
4338 {
4339 /* m_thread_internal_id is immutable and already set */
4340 DBUG_ASSERT(pfs->m_thread_internal_id == pfs_thread->m_thread_internal_id);
4341 pfs->m_event_id= pfs_thread->m_event_id++;
4342 pfs->m_end_event_id= 0;
4343 pfs->m_source_file= src_file;
4344 pfs->m_source_line= src_line;
4345
4346 /* New wait events will have this new stage as parent. */
4347 child_wait->m_event_id= pfs->m_event_id;
4348 child_wait->m_event_type= EVENT_TYPE_STAGE;
4349 }
4350 }
4351
end_stage_v1()4352 static void end_stage_v1()
4353 {
4354 ulonglong timer_value= 0;
4355
4356 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4357 if (unlikely(pfs_thread == NULL))
4358 return;
4359
4360 pfs_thread->m_stage= 0;
4361
4362 if (! flag_global_instrumentation)
4363 return;
4364
4365 if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
4366 return;
4367
4368 PFS_events_stages *pfs= & pfs_thread->m_stage_current;
4369
4370 PFS_instr_class *old_class= pfs->m_class;
4371 if (old_class != NULL)
4372 {
4373 PFS_stage_stat *event_name_array;
4374 event_name_array= pfs_thread->m_instr_class_stages_stats;
4375 uint index= old_class->m_event_name_index;
4376
4377 /* Finish old event */
4378 if (old_class->m_timed)
4379 {
4380 timer_value= get_timer_raw_value(stage_timer);;
4381 pfs->m_timer_end= timer_value;
4382
4383 /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
4384 ulonglong stage_time= timer_value - pfs->m_timer_start;
4385 event_name_array[index].aggregate_value(stage_time);
4386 }
4387 else
4388 {
4389 /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
4390 event_name_array[index].aggregate_counted();
4391 }
4392
4393 if (flag_events_stages_current)
4394 {
4395 pfs->m_end_event_id= pfs_thread->m_event_id;
4396 if (flag_events_stages_history)
4397 insert_events_stages_history(pfs_thread, pfs);
4398 if (flag_events_stages_history_long)
4399 insert_events_stages_history_long(pfs);
4400 }
4401
4402 /* New waits will now be attached directly to the parent statement. */
4403 PFS_events_waits *child_wait= & pfs_thread->m_events_waits_stack[0];
4404 PFS_events_statements *parent_statement= & pfs_thread->m_statement_stack[0];
4405 child_wait->m_event_id= parent_statement->m_event_id;
4406 child_wait->m_event_type= parent_statement->m_event_type;
4407
4408 /* This stage is completed */
4409 pfs->m_class= NULL;
4410 }
4411 }
4412
4413 static PSI_statement_locker*
get_thread_statement_locker_v1(PSI_statement_locker_state * state,PSI_statement_key key,const void * charset)4414 get_thread_statement_locker_v1(PSI_statement_locker_state *state,
4415 PSI_statement_key key,
4416 const void *charset)
4417 {
4418 DBUG_ASSERT(state != NULL);
4419 DBUG_ASSERT(charset != NULL);
4420
4421 if (! flag_global_instrumentation)
4422 return NULL;
4423 PFS_statement_class *klass= find_statement_class(key);
4424 if (unlikely(klass == NULL))
4425 return NULL;
4426 if (! klass->m_enabled)
4427 return NULL;
4428
4429 register uint flags;
4430
4431 if (flag_thread_instrumentation)
4432 {
4433 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4434 if (unlikely(pfs_thread == NULL))
4435 return NULL;
4436 if (! pfs_thread->m_enabled)
4437 return NULL;
4438 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
4439 flags= STATE_FLAG_THREAD;
4440
4441 if (klass->m_timed)
4442 flags|= STATE_FLAG_TIMED;
4443
4444 if (flag_events_statements_current)
4445 {
4446 ulonglong event_id= pfs_thread->m_event_id++;
4447
4448 if (pfs_thread->m_events_statements_count >= statement_stack_max)
4449 {
4450 return NULL;
4451 }
4452
4453 pfs_thread->m_stmt_lock.allocated_to_dirty();
4454 PFS_events_statements *pfs= & pfs_thread->m_statement_stack[pfs_thread->m_events_statements_count];
4455 /* m_thread_internal_id is immutable and already set */
4456 DBUG_ASSERT(pfs->m_thread_internal_id == pfs_thread->m_thread_internal_id);
4457 pfs->m_event_id= event_id;
4458 pfs->m_end_event_id= 0;
4459 pfs->m_class= klass;
4460 pfs->m_timer_start= 0;
4461 pfs->m_timer_end= 0;
4462 pfs->m_lock_time= 0;
4463 pfs->m_current_schema_name_length= 0;
4464 pfs->m_sqltext_length= 0;
4465 pfs->m_sqltext_truncated= false;
4466 pfs->m_sqltext_cs_number= system_charset_info->number; /* default */
4467
4468 pfs->m_message_text[0]= '\0';
4469 pfs->m_sql_errno= 0;
4470 pfs->m_sqlstate[0]= '\0';
4471 pfs->m_error_count= 0;
4472 pfs->m_warning_count= 0;
4473 pfs->m_rows_affected= 0;
4474
4475 pfs->m_rows_sent= 0;
4476 pfs->m_rows_examined= 0;
4477 pfs->m_created_tmp_disk_tables= 0;
4478 pfs->m_created_tmp_tables= 0;
4479 pfs->m_select_full_join= 0;
4480 pfs->m_select_full_range_join= 0;
4481 pfs->m_select_range= 0;
4482 pfs->m_select_range_check= 0;
4483 pfs->m_select_scan= 0;
4484 pfs->m_sort_merge_passes= 0;
4485 pfs->m_sort_range= 0;
4486 pfs->m_sort_rows= 0;
4487 pfs->m_sort_scan= 0;
4488 pfs->m_no_index_used= 0;
4489 pfs->m_no_good_index_used= 0;
4490 pfs->m_digest_storage.reset();
4491
4492 /* New stages will have this statement as parent */
4493 PFS_events_stages *child_stage= & pfs_thread->m_stage_current;
4494 child_stage->m_nesting_event_id= event_id;
4495 child_stage->m_nesting_event_type= EVENT_TYPE_STATEMENT;
4496
4497 /* New waits will have this statement as parent, if no stage is instrumented */
4498 PFS_events_waits *child_wait= & pfs_thread->m_events_waits_stack[0];
4499 child_wait->m_nesting_event_id= event_id;
4500 child_wait->m_nesting_event_type= EVENT_TYPE_STATEMENT;
4501
4502 state->m_statement= pfs;
4503 flags|= STATE_FLAG_EVENT;
4504
4505 pfs_thread->m_events_statements_count++;
4506 pfs_thread->m_stmt_lock.dirty_to_allocated();
4507 }
4508 }
4509 else
4510 {
4511 if (klass->m_timed)
4512 flags= STATE_FLAG_TIMED;
4513 else
4514 flags= 0;
4515 }
4516
4517 if (flag_statements_digest)
4518 {
4519 flags|= STATE_FLAG_DIGEST;
4520 }
4521
4522 state->m_discarded= false;
4523 state->m_class= klass;
4524 state->m_flags= flags;
4525
4526 state->m_lock_time= 0;
4527 state->m_rows_sent= 0;
4528 state->m_rows_examined= 0;
4529 state->m_created_tmp_disk_tables= 0;
4530 state->m_created_tmp_tables= 0;
4531 state->m_select_full_join= 0;
4532 state->m_select_full_range_join= 0;
4533 state->m_select_range= 0;
4534 state->m_select_range_check= 0;
4535 state->m_select_scan= 0;
4536 state->m_sort_merge_passes= 0;
4537 state->m_sort_range= 0;
4538 state->m_sort_rows= 0;
4539 state->m_sort_scan= 0;
4540 state->m_no_index_used= 0;
4541 state->m_no_good_index_used= 0;
4542
4543 state->m_digest= NULL;
4544
4545 state->m_schema_name_length= 0;
4546 state->m_cs_number= ((CHARSET_INFO *)charset)->number;
4547
4548 return reinterpret_cast<PSI_statement_locker*> (state);
4549 }
4550
4551 static PSI_statement_locker*
refine_statement_v1(PSI_statement_locker * locker,PSI_statement_key key)4552 refine_statement_v1(PSI_statement_locker *locker,
4553 PSI_statement_key key)
4554 {
4555 PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4556 if (state == NULL)
4557 return NULL;
4558 DBUG_ASSERT(state->m_class != NULL);
4559 PFS_statement_class *klass;
4560 /* Only refine statements for mutable instrumentation */
4561 klass= reinterpret_cast<PFS_statement_class*> (state->m_class);
4562 DBUG_ASSERT(klass->is_mutable());
4563 klass= find_statement_class(key);
4564
4565 uint flags= state->m_flags;
4566
4567 if (unlikely(klass == NULL) || !klass->m_enabled)
4568 {
4569 /* pop statement stack */
4570 if (flags & STATE_FLAG_THREAD)
4571 {
4572 PFS_thread *pfs_thread= reinterpret_cast<PFS_thread *> (state->m_thread);
4573 DBUG_ASSERT(pfs_thread != NULL);
4574 if (pfs_thread->m_events_statements_count > 0)
4575 pfs_thread->m_events_statements_count--;
4576 }
4577
4578 state->m_discarded= true;
4579 return NULL;
4580 }
4581
4582 if ((flags & STATE_FLAG_TIMED) && ! klass->m_timed)
4583 flags= flags & ~STATE_FLAG_TIMED;
4584
4585 if (flags & STATE_FLAG_EVENT)
4586 {
4587 PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4588 DBUG_ASSERT(pfs != NULL);
4589
4590 /* mutate EVENTS_STATEMENTS_CURRENT.EVENT_NAME */
4591 pfs->m_class= klass;
4592 }
4593
4594 state->m_class= klass;
4595 state->m_flags= flags;
4596 return reinterpret_cast<PSI_statement_locker*> (state);
4597 }
4598
start_statement_v1(PSI_statement_locker * locker,const char * db,uint db_len,const char * src_file,uint src_line)4599 static void start_statement_v1(PSI_statement_locker *locker,
4600 const char *db, uint db_len,
4601 const char *src_file, uint src_line)
4602 {
4603 PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4604 DBUG_ASSERT(state != NULL);
4605
4606 register uint flags= state->m_flags;
4607 ulonglong timer_start= 0;
4608
4609 if (flags & STATE_FLAG_TIMED)
4610 {
4611 timer_start= get_timer_raw_value_and_function(statement_timer, & state->m_timer);
4612 state->m_timer_start= timer_start;
4613 }
4614
4615 compile_time_assert(PSI_SCHEMA_NAME_LEN == NAME_LEN);
4616 DBUG_ASSERT(db_len <= sizeof(state->m_schema_name));
4617
4618 if (db_len > 0)
4619 memcpy(state->m_schema_name, db, db_len);
4620 state->m_schema_name_length= db_len;
4621
4622 if (flags & STATE_FLAG_EVENT)
4623 {
4624 PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4625 DBUG_ASSERT(pfs != NULL);
4626
4627 pfs->m_timer_start= timer_start;
4628 pfs->m_source_file= src_file;
4629 pfs->m_source_line= src_line;
4630
4631 DBUG_ASSERT(db_len <= sizeof(pfs->m_current_schema_name));
4632 if (db_len > 0)
4633 memcpy(pfs->m_current_schema_name, db, db_len);
4634 pfs->m_current_schema_name_length= db_len;
4635 }
4636 }
4637
set_statement_text_v1(PSI_statement_locker * locker,const char * text,uint text_len)4638 static void set_statement_text_v1(PSI_statement_locker *locker,
4639 const char *text, uint text_len)
4640 {
4641 PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4642 DBUG_ASSERT(state != NULL);
4643
4644 if (state->m_discarded)
4645 return;
4646
4647 if (state->m_flags & STATE_FLAG_EVENT)
4648 {
4649 PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4650 DBUG_ASSERT(pfs != NULL);
4651 if (text_len > sizeof (pfs->m_sqltext))
4652 {
4653 text_len= sizeof(pfs->m_sqltext);
4654 pfs->m_sqltext_truncated= true;
4655 }
4656 if (text_len)
4657 memcpy(pfs->m_sqltext, text, text_len);
4658 pfs->m_sqltext_length= text_len;
4659 pfs->m_sqltext_cs_number= state->m_cs_number;
4660 }
4661
4662 return;
4663 }
4664
4665 #define SET_STATEMENT_ATTR_BODY(LOCKER, ATTR, VALUE) \
4666 PSI_statement_locker_state *state; \
4667 state= reinterpret_cast<PSI_statement_locker_state*> (LOCKER); \
4668 if (unlikely(state == NULL)) \
4669 return; \
4670 if (state->m_discarded) \
4671 return; \
4672 state->ATTR= VALUE; \
4673 if (state->m_flags & STATE_FLAG_EVENT) \
4674 { \
4675 PFS_events_statements *pfs; \
4676 pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement); \
4677 DBUG_ASSERT(pfs != NULL); \
4678 pfs->ATTR= VALUE; \
4679 } \
4680 return;
4681
4682 #define INC_STATEMENT_ATTR_BODY(LOCKER, ATTR, VALUE) \
4683 PSI_statement_locker_state *state; \
4684 state= reinterpret_cast<PSI_statement_locker_state*> (LOCKER); \
4685 if (unlikely(state == NULL)) \
4686 return; \
4687 if (state->m_discarded) \
4688 return; \
4689 state->ATTR+= VALUE; \
4690 if (state->m_flags & STATE_FLAG_EVENT) \
4691 { \
4692 PFS_events_statements *pfs; \
4693 pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement); \
4694 DBUG_ASSERT(pfs != NULL); \
4695 pfs->ATTR+= VALUE; \
4696 } \
4697 return;
4698
set_statement_lock_time_v1(PSI_statement_locker * locker,ulonglong count)4699 static void set_statement_lock_time_v1(PSI_statement_locker *locker,
4700 ulonglong count)
4701 {
4702 SET_STATEMENT_ATTR_BODY(locker, m_lock_time, count);
4703 }
4704
set_statement_rows_sent_v1(PSI_statement_locker * locker,ulonglong count)4705 static void set_statement_rows_sent_v1(PSI_statement_locker *locker,
4706 ulonglong count)
4707 {
4708 SET_STATEMENT_ATTR_BODY(locker, m_rows_sent, count);
4709 }
4710
set_statement_rows_examined_v1(PSI_statement_locker * locker,ulonglong count)4711 static void set_statement_rows_examined_v1(PSI_statement_locker *locker,
4712 ulonglong count)
4713 {
4714 SET_STATEMENT_ATTR_BODY(locker, m_rows_examined, count);
4715 }
4716
inc_statement_created_tmp_disk_tables_v1(PSI_statement_locker * locker,ulong count)4717 static void inc_statement_created_tmp_disk_tables_v1(PSI_statement_locker *locker,
4718 ulong count)
4719 {
4720 INC_STATEMENT_ATTR_BODY(locker, m_created_tmp_disk_tables, count);
4721 }
4722
inc_statement_created_tmp_tables_v1(PSI_statement_locker * locker,ulong count)4723 static void inc_statement_created_tmp_tables_v1(PSI_statement_locker *locker,
4724 ulong count)
4725 {
4726 INC_STATEMENT_ATTR_BODY(locker, m_created_tmp_tables, count);
4727 }
4728
inc_statement_select_full_join_v1(PSI_statement_locker * locker,ulong count)4729 static void inc_statement_select_full_join_v1(PSI_statement_locker *locker,
4730 ulong count)
4731 {
4732 INC_STATEMENT_ATTR_BODY(locker, m_select_full_join, count);
4733 }
4734
inc_statement_select_full_range_join_v1(PSI_statement_locker * locker,ulong count)4735 static void inc_statement_select_full_range_join_v1(PSI_statement_locker *locker,
4736 ulong count)
4737 {
4738 INC_STATEMENT_ATTR_BODY(locker, m_select_full_range_join, count);
4739 }
4740
inc_statement_select_range_v1(PSI_statement_locker * locker,ulong count)4741 static void inc_statement_select_range_v1(PSI_statement_locker *locker,
4742 ulong count)
4743 {
4744 INC_STATEMENT_ATTR_BODY(locker, m_select_range, count);
4745 }
4746
inc_statement_select_range_check_v1(PSI_statement_locker * locker,ulong count)4747 static void inc_statement_select_range_check_v1(PSI_statement_locker *locker,
4748 ulong count)
4749 {
4750 INC_STATEMENT_ATTR_BODY(locker, m_select_range_check, count);
4751 }
4752
inc_statement_select_scan_v1(PSI_statement_locker * locker,ulong count)4753 static void inc_statement_select_scan_v1(PSI_statement_locker *locker,
4754 ulong count)
4755 {
4756 INC_STATEMENT_ATTR_BODY(locker, m_select_scan, count);
4757 }
4758
inc_statement_sort_merge_passes_v1(PSI_statement_locker * locker,ulong count)4759 static void inc_statement_sort_merge_passes_v1(PSI_statement_locker *locker,
4760 ulong count)
4761 {
4762 INC_STATEMENT_ATTR_BODY(locker, m_sort_merge_passes, count);
4763 }
4764
inc_statement_sort_range_v1(PSI_statement_locker * locker,ulong count)4765 static void inc_statement_sort_range_v1(PSI_statement_locker *locker,
4766 ulong count)
4767 {
4768 INC_STATEMENT_ATTR_BODY(locker, m_sort_range, count);
4769 }
4770
inc_statement_sort_rows_v1(PSI_statement_locker * locker,ulong count)4771 static void inc_statement_sort_rows_v1(PSI_statement_locker *locker,
4772 ulong count)
4773 {
4774 INC_STATEMENT_ATTR_BODY(locker, m_sort_rows, count);
4775 }
4776
inc_statement_sort_scan_v1(PSI_statement_locker * locker,ulong count)4777 static void inc_statement_sort_scan_v1(PSI_statement_locker *locker,
4778 ulong count)
4779 {
4780 INC_STATEMENT_ATTR_BODY(locker, m_sort_scan, count);
4781 }
4782
set_statement_no_index_used_v1(PSI_statement_locker * locker)4783 static void set_statement_no_index_used_v1(PSI_statement_locker *locker)
4784 {
4785 SET_STATEMENT_ATTR_BODY(locker, m_no_index_used, 1);
4786 }
4787
set_statement_no_good_index_used_v1(PSI_statement_locker * locker)4788 static void set_statement_no_good_index_used_v1(PSI_statement_locker *locker)
4789 {
4790 SET_STATEMENT_ATTR_BODY(locker, m_no_good_index_used, 1);
4791 }
4792
end_statement_v1(PSI_statement_locker * locker,void * stmt_da)4793 static void end_statement_v1(PSI_statement_locker *locker, void *stmt_da)
4794 {
4795 PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4796 Diagnostics_area *da= reinterpret_cast<Diagnostics_area*> (stmt_da);
4797 DBUG_ASSERT(state != NULL);
4798 DBUG_ASSERT(da != NULL);
4799
4800 if (state->m_discarded)
4801 return;
4802
4803 PFS_statement_class *klass= reinterpret_cast<PFS_statement_class *> (state->m_class);
4804 DBUG_ASSERT(klass != NULL);
4805
4806 ulonglong timer_end= 0;
4807 ulonglong wait_time= 0;
4808 register uint flags= state->m_flags;
4809
4810 if (flags & STATE_FLAG_TIMED)
4811 {
4812 timer_end= state->m_timer();
4813 wait_time= timer_end - state->m_timer_start;
4814 }
4815
4816 PFS_statement_stat *event_name_array;
4817 uint index= klass->m_event_name_index;
4818 PFS_statement_stat *stat;
4819
4820 /*
4821 Capture statement stats by digest.
4822 */
4823 const sql_digest_storage *digest_storage= NULL;
4824 PFS_statement_stat *digest_stat= NULL;
4825
4826 if (flags & STATE_FLAG_THREAD)
4827 {
4828 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
4829 DBUG_ASSERT(thread != NULL);
4830 event_name_array= thread->m_instr_class_statements_stats;
4831 /* Aggregate to EVENTS_STATEMENTS_SUMMARY_BY_THREAD_BY_EVENT_NAME */
4832 stat= & event_name_array[index];
4833
4834 if (flags & STATE_FLAG_DIGEST)
4835 {
4836 digest_storage= state->m_digest;
4837
4838 if (digest_storage != NULL)
4839 {
4840 /* Populate PFS_statements_digest_stat with computed digest information.*/
4841 digest_stat= find_or_create_digest(thread, digest_storage,
4842 state->m_schema_name,
4843 state->m_schema_name_length);
4844 }
4845 }
4846
4847 if (flags & STATE_FLAG_EVENT)
4848 {
4849 PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4850 DBUG_ASSERT(pfs != NULL);
4851
4852 thread->m_stmt_lock.allocated_to_dirty();
4853
4854 switch(da->status())
4855 {
4856 case Diagnostics_area::DA_EMPTY:
4857 break;
4858 case Diagnostics_area::DA_OK:
4859 memcpy(pfs->m_message_text, da->message(), MYSQL_ERRMSG_SIZE);
4860 pfs->m_message_text[MYSQL_ERRMSG_SIZE]= 0;
4861 pfs->m_rows_affected= da->affected_rows();
4862 pfs->m_warning_count= da->statement_warn_count();
4863 memcpy(pfs->m_sqlstate, "00000", SQLSTATE_LENGTH);
4864 break;
4865 case Diagnostics_area::DA_EOF:
4866 pfs->m_warning_count= da->statement_warn_count();
4867 break;
4868 case Diagnostics_area::DA_ERROR:
4869 memcpy(pfs->m_message_text, da->message(), MYSQL_ERRMSG_SIZE);
4870 pfs->m_message_text[MYSQL_ERRMSG_SIZE]= 0;
4871 pfs->m_sql_errno= da->sql_errno();
4872 pfs->m_error_count++;
4873 memcpy(pfs->m_sqlstate, da->get_sqlstate(), SQLSTATE_LENGTH);
4874 break;
4875 case Diagnostics_area::DA_DISABLED:
4876 break;
4877 }
4878
4879 pfs->m_timer_end= timer_end;
4880 pfs->m_end_event_id= thread->m_event_id;
4881
4882 if (digest_storage != NULL)
4883 {
4884 /*
4885 The following columns in events_statement_current:
4886 - DIGEST,
4887 - DIGEST_TEXT
4888 are computed from the digest storage.
4889 */
4890 pfs->m_digest_storage.copy(digest_storage);
4891 }
4892
4893 if (flag_events_statements_history)
4894 insert_events_statements_history(thread, pfs);
4895 if (flag_events_statements_history_long)
4896 insert_events_statements_history_long(pfs);
4897
4898 DBUG_ASSERT(thread->m_events_statements_count > 0);
4899 thread->m_events_statements_count--;
4900 thread->m_stmt_lock.dirty_to_allocated();
4901 }
4902 }
4903 else
4904 {
4905 if (flags & STATE_FLAG_DIGEST)
4906 {
4907 PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4908
4909 /* An instrumented thread is required, for LF_PINS. */
4910 if (thread != NULL)
4911 {
4912 /* Set digest stat. */
4913 digest_storage= state->m_digest;
4914
4915 if (digest_storage != NULL)
4916 {
4917 /* Populate statements_digest_stat with computed digest information. */
4918 digest_stat= find_or_create_digest(thread, digest_storage,
4919 state->m_schema_name,
4920 state->m_schema_name_length);
4921 }
4922 }
4923 }
4924
4925 event_name_array= global_instr_class_statements_array;
4926 /* Aggregate to EVENTS_STATEMENTS_SUMMARY_GLOBAL_BY_EVENT_NAME */
4927 stat= & event_name_array[index];
4928 }
4929
4930 if (flags & STATE_FLAG_TIMED)
4931 {
4932 /* Aggregate to EVENTS_STATEMENTS_SUMMARY_..._BY_EVENT_NAME (timed) */
4933 stat->aggregate_value(wait_time);
4934 }
4935 else
4936 {
4937 /* Aggregate to EVENTS_STATEMENTS_SUMMARY_..._BY_EVENT_NAME (counted) */
4938 stat->aggregate_counted();
4939 }
4940
4941 stat->m_lock_time+= state->m_lock_time;
4942 stat->m_rows_sent+= state->m_rows_sent;
4943 stat->m_rows_examined+= state->m_rows_examined;
4944 stat->m_created_tmp_disk_tables+= state->m_created_tmp_disk_tables;
4945 stat->m_created_tmp_tables+= state->m_created_tmp_tables;
4946 stat->m_select_full_join+= state->m_select_full_join;
4947 stat->m_select_full_range_join+= state->m_select_full_range_join;
4948 stat->m_select_range+= state->m_select_range;
4949 stat->m_select_range_check+= state->m_select_range_check;
4950 stat->m_select_scan+= state->m_select_scan;
4951 stat->m_sort_merge_passes+= state->m_sort_merge_passes;
4952 stat->m_sort_range+= state->m_sort_range;
4953 stat->m_sort_rows+= state->m_sort_rows;
4954 stat->m_sort_scan+= state->m_sort_scan;
4955 stat->m_no_index_used+= state->m_no_index_used;
4956 stat->m_no_good_index_used+= state->m_no_good_index_used;
4957
4958 if (digest_stat != NULL)
4959 {
4960 if (flags & STATE_FLAG_TIMED)
4961 {
4962 digest_stat->aggregate_value(wait_time);
4963 }
4964 else
4965 {
4966 digest_stat->aggregate_counted();
4967 }
4968
4969 digest_stat->m_lock_time+= state->m_lock_time;
4970 digest_stat->m_rows_sent+= state->m_rows_sent;
4971 digest_stat->m_rows_examined+= state->m_rows_examined;
4972 digest_stat->m_created_tmp_disk_tables+= state->m_created_tmp_disk_tables;
4973 digest_stat->m_created_tmp_tables+= state->m_created_tmp_tables;
4974 digest_stat->m_select_full_join+= state->m_select_full_join;
4975 digest_stat->m_select_full_range_join+= state->m_select_full_range_join;
4976 digest_stat->m_select_range+= state->m_select_range;
4977 digest_stat->m_select_range_check+= state->m_select_range_check;
4978 digest_stat->m_select_scan+= state->m_select_scan;
4979 digest_stat->m_sort_merge_passes+= state->m_sort_merge_passes;
4980 digest_stat->m_sort_range+= state->m_sort_range;
4981 digest_stat->m_sort_rows+= state->m_sort_rows;
4982 digest_stat->m_sort_scan+= state->m_sort_scan;
4983 digest_stat->m_no_index_used+= state->m_no_index_used;
4984 digest_stat->m_no_good_index_used+= state->m_no_good_index_used;
4985 }
4986
4987 switch (da->status())
4988 {
4989 case Diagnostics_area::DA_EMPTY:
4990 break;
4991 case Diagnostics_area::DA_OK:
4992 stat->m_rows_affected+= da->affected_rows();
4993 stat->m_warning_count+= da->statement_warn_count();
4994 if (digest_stat != NULL)
4995 {
4996 digest_stat->m_rows_affected+= da->affected_rows();
4997 digest_stat->m_warning_count+= da->statement_warn_count();
4998 }
4999 break;
5000 case Diagnostics_area::DA_EOF:
5001 stat->m_warning_count+= da->statement_warn_count();
5002 if (digest_stat != NULL)
5003 {
5004 digest_stat->m_warning_count+= da->statement_warn_count();
5005 }
5006 break;
5007 case Diagnostics_area::DA_ERROR:
5008 stat->m_error_count++;
5009 if (digest_stat != NULL)
5010 {
5011 digest_stat->m_error_count++;
5012 }
5013 break;
5014 case Diagnostics_area::DA_DISABLED:
5015 break;
5016 }
5017 }
5018
5019 /**
5020 Implementation of the socket instrumentation interface.
5021 @sa PSI_v1::end_socket_wait.
5022 */
end_socket_wait_v1(PSI_socket_locker * locker,size_t byte_count)5023 static void end_socket_wait_v1(PSI_socket_locker *locker, size_t byte_count)
5024 {
5025 PSI_socket_locker_state *state= reinterpret_cast<PSI_socket_locker_state*> (locker);
5026 DBUG_ASSERT(state != NULL);
5027
5028 PFS_socket *socket= reinterpret_cast<PFS_socket *>(state->m_socket);
5029 DBUG_ASSERT(socket != NULL);
5030
5031 ulonglong timer_end= 0;
5032 ulonglong wait_time= 0;
5033 PFS_byte_stat *byte_stat;
5034 register uint flags= state->m_flags;
5035 size_t bytes= ((int)byte_count > -1 ? byte_count : 0);
5036
5037 switch (state->m_operation)
5038 {
5039 /* Group read operations */
5040 case PSI_SOCKET_RECV:
5041 case PSI_SOCKET_RECVFROM:
5042 case PSI_SOCKET_RECVMSG:
5043 byte_stat= &socket->m_socket_stat.m_io_stat.m_read;
5044 break;
5045 /* Group write operations */
5046 case PSI_SOCKET_SEND:
5047 case PSI_SOCKET_SENDTO:
5048 case PSI_SOCKET_SENDMSG:
5049 byte_stat= &socket->m_socket_stat.m_io_stat.m_write;
5050 break;
5051 /* Group remaining operations as miscellaneous */
5052 case PSI_SOCKET_CONNECT:
5053 case PSI_SOCKET_CREATE:
5054 case PSI_SOCKET_BIND:
5055 case PSI_SOCKET_SEEK:
5056 case PSI_SOCKET_OPT:
5057 case PSI_SOCKET_STAT:
5058 case PSI_SOCKET_SHUTDOWN:
5059 case PSI_SOCKET_SELECT:
5060 case PSI_SOCKET_CLOSE:
5061 byte_stat= &socket->m_socket_stat.m_io_stat.m_misc;
5062 break;
5063 default:
5064 DBUG_ASSERT(false);
5065 byte_stat= NULL;
5066 break;
5067 }
5068
5069 /* Aggregation for EVENTS_WAITS_SUMMARY_BY_INSTANCE */
5070 if (flags & STATE_FLAG_TIMED)
5071 {
5072 timer_end= state->m_timer();
5073 wait_time= timer_end - state->m_timer_start;
5074
5075 /* Aggregate to the socket instrument for now (timed) */
5076 byte_stat->aggregate(wait_time, bytes);
5077 }
5078 else
5079 {
5080 /* Aggregate to the socket instrument (event count and byte count) */
5081 byte_stat->aggregate_counted(bytes);
5082 }
5083
5084 /* Aggregate to EVENTS_WAITS_HISTORY and EVENTS_WAITS_HISTORY_LONG */
5085 if (flags & STATE_FLAG_EVENT)
5086 {
5087 PFS_thread *thread= reinterpret_cast<PFS_thread *>(state->m_thread);
5088 DBUG_ASSERT(thread != NULL);
5089 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
5090 DBUG_ASSERT(wait != NULL);
5091
5092 wait->m_timer_end= timer_end;
5093 wait->m_end_event_id= thread->m_event_id;
5094 wait->m_number_of_bytes= bytes;
5095
5096 if (flag_events_waits_history)
5097 insert_events_waits_history(thread, wait);
5098 if (flag_events_waits_history_long)
5099 insert_events_waits_history_long(wait);
5100 thread->m_events_waits_current--;
5101
5102 DBUG_ASSERT(wait == thread->m_events_waits_current);
5103 }
5104 }
5105
set_socket_state_v1(PSI_socket * socket,PSI_socket_state state)5106 static void set_socket_state_v1(PSI_socket *socket, PSI_socket_state state)
5107 {
5108 DBUG_ASSERT((state == PSI_SOCKET_STATE_IDLE) || (state == PSI_SOCKET_STATE_ACTIVE));
5109 PFS_socket *pfs= reinterpret_cast<PFS_socket*>(socket);
5110 DBUG_ASSERT(pfs != NULL);
5111 DBUG_ASSERT(pfs->m_idle || (state == PSI_SOCKET_STATE_IDLE));
5112 DBUG_ASSERT(!pfs->m_idle || (state == PSI_SOCKET_STATE_ACTIVE));
5113 pfs->m_idle= (state == PSI_SOCKET_STATE_IDLE);
5114 }
5115
5116 /**
5117 Set socket descriptor and address info.
5118 */
set_socket_info_v1(PSI_socket * socket,const my_socket * fd,const struct sockaddr * addr,socklen_t addr_len)5119 static void set_socket_info_v1(PSI_socket *socket,
5120 const my_socket *fd,
5121 const struct sockaddr *addr,
5122 socklen_t addr_len)
5123 {
5124 PFS_socket *pfs= reinterpret_cast<PFS_socket*>(socket);
5125 DBUG_ASSERT(pfs != NULL);
5126
5127 /** Set socket descriptor */
5128 if (fd != NULL)
5129 pfs->m_fd= *fd;
5130
5131 /** Set raw socket address and length */
5132 if (likely(addr != NULL && addr_len > 0))
5133 {
5134 pfs->m_addr_len= addr_len;
5135
5136 /** Restrict address length to size of struct */
5137 if (unlikely(pfs->m_addr_len > sizeof(sockaddr_storage)))
5138 pfs->m_addr_len= sizeof(struct sockaddr_storage);
5139
5140 memcpy(&pfs->m_sock_addr, addr, pfs->m_addr_len);
5141 }
5142 }
5143
5144 /**
5145 Implementation of the socket instrumentation interface.
5146 @sa PSI_v1::set_socket_info.
5147 */
set_socket_thread_owner_v1(PSI_socket * socket)5148 static void set_socket_thread_owner_v1(PSI_socket *socket)
5149 {
5150 PFS_socket *pfs_socket= reinterpret_cast<PFS_socket*>(socket);
5151 DBUG_ASSERT(pfs_socket != NULL);
5152 pfs_socket->m_thread_owner= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
5153 }
5154
5155 struct PSI_digest_locker*
pfs_digest_start_v1(PSI_statement_locker * locker)5156 pfs_digest_start_v1(PSI_statement_locker *locker)
5157 {
5158 PSI_statement_locker_state *statement_state;
5159 statement_state= reinterpret_cast<PSI_statement_locker_state*> (locker);
5160 DBUG_ASSERT(statement_state != NULL);
5161
5162 if (statement_state->m_discarded)
5163 return NULL;
5164
5165 if (statement_state->m_flags & STATE_FLAG_DIGEST)
5166 {
5167 return reinterpret_cast<PSI_digest_locker*> (locker);
5168 }
5169
5170 return NULL;
5171 }
5172
pfs_digest_end_v1(PSI_digest_locker * locker,const sql_digest_storage * digest)5173 void pfs_digest_end_v1(PSI_digest_locker *locker, const sql_digest_storage *digest)
5174 {
5175 PSI_statement_locker_state *statement_state;
5176 statement_state= reinterpret_cast<PSI_statement_locker_state*> (locker);
5177 DBUG_ASSERT(statement_state != NULL);
5178 DBUG_ASSERT(digest != NULL);
5179
5180 if (statement_state->m_discarded)
5181 return;
5182
5183 if (statement_state->m_flags & STATE_FLAG_DIGEST)
5184 {
5185 statement_state->m_digest= digest;
5186 }
5187 }
5188
5189 /**
5190 Implementation of the thread attribute connection interface
5191 @sa PSI_v1::set_thread_connect_attr.
5192 */
set_thread_connect_attrs_v1(const char * buffer,uint length,const void * from_cs)5193 static int set_thread_connect_attrs_v1(const char *buffer, uint length,
5194 const void *from_cs)
5195 {
5196
5197 PFS_thread *thd= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
5198
5199 DBUG_ASSERT(buffer != NULL);
5200
5201 if (likely(thd != NULL) && session_connect_attrs_size_per_thread > 0)
5202 {
5203 const CHARSET_INFO *cs = static_cast<const CHARSET_INFO *> (from_cs);
5204
5205 /* copy from the input buffer as much as we can fit */
5206 uint copy_size= (uint)(length < session_connect_attrs_size_per_thread ?
5207 length : session_connect_attrs_size_per_thread);
5208 thd->m_session_lock.allocated_to_dirty();
5209 memcpy(thd->m_session_connect_attrs, buffer, copy_size);
5210 thd->m_session_connect_attrs_length= copy_size;
5211 thd->m_session_connect_attrs_cs_number= cs->number;
5212 thd->m_session_lock.dirty_to_allocated();
5213
5214 if (copy_size == length)
5215 return 0;
5216
5217 session_connect_attrs_lost++;
5218 return 1;
5219 }
5220 return 0;
5221 }
5222
5223
5224 /**
5225 Implementation of the instrumentation interface.
5226 @sa PSI_v1.
5227 */
5228 PSI_v1 PFS_v1=
5229 {
5230 register_mutex_v1,
5231 register_rwlock_v1,
5232 register_cond_v1,
5233 register_thread_v1,
5234 register_file_v1,
5235 register_stage_v1,
5236 register_statement_v1,
5237 register_socket_v1,
5238 init_mutex_v1,
5239 destroy_mutex_v1,
5240 init_rwlock_v1,
5241 destroy_rwlock_v1,
5242 init_cond_v1,
5243 destroy_cond_v1,
5244 init_socket_v1,
5245 destroy_socket_v1,
5246 get_table_share_v1,
5247 release_table_share_v1,
5248 drop_table_share_v1,
5249 open_table_v1,
5250 unbind_table_v1,
5251 rebind_table_v1,
5252 close_table_v1,
5253 create_file_v1,
5254 spawn_thread_v1,
5255 new_thread_v1,
5256 set_thread_id_v1,
5257 get_thread_v1,
5258 set_thread_user_v1,
5259 set_thread_account_v1,
5260 set_thread_db_v1,
5261 set_thread_command_v1,
5262 set_thread_start_time_v1,
5263 set_thread_state_v1,
5264 set_thread_info_v1,
5265 set_thread_v1,
5266 delete_current_thread_v1,
5267 delete_thread_v1,
5268 get_thread_file_name_locker_v1,
5269 get_thread_file_stream_locker_v1,
5270 get_thread_file_descriptor_locker_v1,
5271 unlock_mutex_v1,
5272 unlock_rwlock_v1,
5273 signal_cond_v1,
5274 broadcast_cond_v1,
5275 start_idle_wait_v1,
5276 end_idle_wait_v1,
5277 start_mutex_wait_v1,
5278 end_mutex_wait_v1,
5279 start_rwlock_wait_v1, /* read */
5280 end_rwlock_rdwait_v1,
5281 start_rwlock_wait_v1, /* write */
5282 end_rwlock_wrwait_v1,
5283 start_cond_wait_v1,
5284 end_cond_wait_v1,
5285 start_table_io_wait_v1,
5286 end_table_io_wait_v1,
5287 start_table_lock_wait_v1,
5288 end_table_lock_wait_v1,
5289 start_file_open_wait_v1,
5290 end_file_open_wait_v1,
5291 end_file_open_wait_and_bind_to_descriptor_v1,
5292 start_file_wait_v1,
5293 end_file_wait_v1,
5294 start_file_close_wait_v1,
5295 end_file_close_wait_v1,
5296 start_stage_v1,
5297 end_stage_v1,
5298 get_thread_statement_locker_v1,
5299 refine_statement_v1,
5300 start_statement_v1,
5301 set_statement_text_v1,
5302 set_statement_lock_time_v1,
5303 set_statement_rows_sent_v1,
5304 set_statement_rows_examined_v1,
5305 inc_statement_created_tmp_disk_tables_v1,
5306 inc_statement_created_tmp_tables_v1,
5307 inc_statement_select_full_join_v1,
5308 inc_statement_select_full_range_join_v1,
5309 inc_statement_select_range_v1,
5310 inc_statement_select_range_check_v1,
5311 inc_statement_select_scan_v1,
5312 inc_statement_sort_merge_passes_v1,
5313 inc_statement_sort_range_v1,
5314 inc_statement_sort_rows_v1,
5315 inc_statement_sort_scan_v1,
5316 set_statement_no_index_used_v1,
5317 set_statement_no_good_index_used_v1,
5318 end_statement_v1,
5319 start_socket_wait_v1,
5320 end_socket_wait_v1,
5321 set_socket_state_v1,
5322 set_socket_info_v1,
5323 set_socket_thread_owner_v1,
5324 pfs_digest_start_v1,
5325 pfs_digest_end_v1,
5326 set_thread_connect_attrs_v1,
5327 };
5328
get_interface(int version)5329 static void* get_interface(int version)
5330 {
5331 switch (version)
5332 {
5333 case PSI_VERSION_1:
5334 return &PFS_v1;
5335 default:
5336 return NULL;
5337 }
5338 }
5339
5340 C_MODE_END
5341
5342 struct PSI_bootstrap PFS_bootstrap=
5343 {
5344 get_interface
5345 };
5346