1 /* Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software Foundation,
21 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22
23 /**
24 @file storage/perfschema/pfs.cc
25 The performance schema implementation of all instruments.
26 */
27 #include "my_global.h"
28 #include "thr_lock.h"
29 #include "mysql/psi/psi.h"
30 #include "mysql/psi/mysql_thread.h"
31 #include "my_pthread.h"
32 #include "sql_const.h"
33 #include "pfs.h"
34 #include "pfs_instr_class.h"
35 #include "pfs_instr.h"
36 #include "pfs_host.h"
37 #include "pfs_user.h"
38 #include "pfs_account.h"
39 #include "pfs_global.h"
40 #include "pfs_column_values.h"
41 #include "pfs_timer.h"
42 #include "pfs_events_waits.h"
43 #include "pfs_events_stages.h"
44 #include "pfs_events_statements.h"
45 #include "pfs_setup_actor.h"
46 #include "pfs_setup_object.h"
47 #include "sql_error.h"
48 #include "sql_acl.h"
49 #include "sp_head.h"
50 #include "pfs_digest.h"
51
52 using std::min;
53 /**
54 @page PAGE_PERFORMANCE_SCHEMA The Performance Schema main page
55 MySQL PERFORMANCE_SCHEMA implementation.
56
57 @section INTRO Introduction
58 The PERFORMANCE_SCHEMA is a way to introspect the internal execution of
59 the server at runtime.
60 The performance schema focuses primarily on performance data,
61 as opposed to the INFORMATION_SCHEMA whose purpose is to inspect metadata.
62
63 From a user point of view, the performance schema consists of:
64 - a dedicated database schema, named PERFORMANCE_SCHEMA,
65 - SQL tables, used to query the server internal state or change
66 configuration settings.
67
68 From an implementation point of view, the performance schema is a dedicated
69 Storage Engine which exposes data collected by 'Instrumentation Points'
70 placed in the server code.
71
72 @section INTERFACES Multiple interfaces
73
74 The performance schema exposes many different interfaces,
75 for different components, and for different purposes.
76
77 @subsection INT_INSTRUMENTING Instrumenting interface
78
79 All the data representing the server internal state exposed
80 in the performance schema must be first collected:
81 this is the role of the instrumenting interface.
82 The instrumenting interface is a coding interface provided
83 by implementors (of the performance schema) to implementors
84 (of the server or server components).
85
86 This interface is available to:
87 - C implementations
88 - C++ implementations
89 - the core SQL layer (/sql)
90 - the mysys library (/mysys)
91 - MySQL plugins, including storage engines,
92 - third party plugins, including third party storage engines.
93
94 For details, see the @ref PAGE_INSTRUMENTATION_INTERFACE
95 "instrumentation interface page".
96
97 @subsection INT_COMPILING Compiling interface
98
99 The implementation of the performance schema can be enabled or disabled at
100 build time, when building MySQL from the source code.
101
102 When building with the performance schema code, some compilation flags
103 are available to change the default values used in the code, if required.
104
105 For more details, see:
106 @verbatim ./configure --help @endverbatim
107
108 To compile with the performance schema:
109 @verbatim ./configure --with-perfschema @endverbatim
110
111 The implementation of all the compiling options is located in
112 @verbatim ./storage/perfschema/plug.in @endverbatim
113
114 @subsection INT_STARTUP Server startup interface
115
116 The server startup interface consists of the "./mysqld ..."
117 command line used to start the server.
118 When the performance schema is compiled in the server binary,
119 extra command line options are available.
120
121 These extra start options allow the DBA to:
122 - enable or disable the performance schema
123 - specify some sizing parameters.
124
125 To see help for the performance schema startup options, see:
126 @verbatim ./sql/mysqld --verbose --help @endverbatim
127
128 The implementation of all the startup options is located in
129 @verbatim ./sql/mysqld.cc, my_long_options[] @endverbatim
130
131 @subsection INT_BOOTSTRAP Server bootstrap interface
132
133 The bootstrap interface is a private interface exposed by
134 the performance schema, and used by the SQL layer.
135 Its role is to advertise all the SQL tables natively
136 supported by the performance schema to the SQL server.
137 The code consists of creating MySQL tables for the
138 performance schema itself, and is used in './mysql --bootstrap'
139 mode when a server is installed.
140
141 The implementation of the database creation script is located in
142 @verbatim ./scripts/mysql_system_tables.sql @endverbatim
143
144 @subsection INT_CONFIG Runtime configuration interface
145
146 When the performance schema is used at runtime, various configuration
147 parameters can be used to specify what kind of data is collected,
148 what kind of aggregations are computed, what kind of timers are used,
149 what events are timed, etc.
150
151 For all these capabilities, not a single statement or special syntax
152 was introduced in the parser.
153 Instead of new SQL statements, the interface consists of DML
154 (SELECT, INSERT, UPDATE, DELETE) against special "SETUP" tables.
155
156 For example:
157 @verbatim mysql> update performance_schema.SETUP_INSTRUMENTS
158 set ENABLED='YES', TIMED='YES';
159 Query OK, 234 rows affected (0.00 sec)
160 Rows matched: 234 Changed: 234 Warnings: 0 @endverbatim
161
162 @subsection INT_STATUS Internal audit interface
163
164 The internal audit interface is provided to the DBA to inspect if the
165 performance schema code itself is functioning properly.
166 This interface is necessary because a failure caused while
167 instrumenting code in the server should not cause failures in the
168 MySQL server itself, so that the performance schema implementation
169 never raises errors during runtime execution.
170
171 This auditing interface consists of:
172 @verbatim SHOW ENGINE PERFORMANCE_SCHEMA STATUS; @endverbatim
173 It displays data related to the memory usage of the performance schema,
174 as well as statistics about lost events, if any.
175
176 The SHOW STATUS command is implemented in
177 @verbatim ./storage/perfschema/pfs_engine_table.cc @endverbatim
178
179 @subsection INT_QUERY Query interface
180
181 The query interface is used to query the internal state of a running server.
182 It is provided as SQL tables.
183
184 For example:
185 @verbatim mysql> select * from performance_schema.EVENTS_WAITS_CURRENT;
186 @endverbatim
187
188 @section DESIGN_PRINCIPLES Design principles
189
190 @subsection PRINCIPLE_BEHAVIOR No behavior changes
191
192 The primary goal of the performance schema is to measure (instrument) the
193 execution of the server. A good measure should not cause any change
194 in behavior.
195
196 To achieve this, the overall design of the performance schema complies
197 with the following very severe design constraints:
198
199 The parser is unchanged. There are no new keywords, no new statements.
200 This guarantees that existing applications will run the same way with or
201 without the performance schema.
202
203 All the instrumentation points return "void", there are no error codes.
204 Even if the performance schema internally fails, execution of the server
205 code will proceed.
206
207 None of the instrumentation points allocate memory.
208 All the memory used by the performance schema is pre-allocated at startup,
209 and is considered "static" during the server life time.
210
211 None of the instrumentation points use any pthread_mutex, pthread_rwlock,
212 or pthread_cond (or platform equivalents).
213 Executing the instrumentation point should not cause thread scheduling to
214 change in the server.
215
216 In other words, the implementation of the instrumentation points,
217 including all the code called by the instrumentation points, is:
218 - malloc free
219 - mutex free
220 - rwlock free
221
222 TODO: All the code located in storage/perfschema is malloc free,
223 but unfortunately the usage of LF_HASH introduces some memory allocation.
224 This should be revised if possible, to use a lock-free,
225 malloc-free hash code table.
226
227 @subsection PRINCIPLE_PERFORMANCE No performance hit
228
229 The instrumentation of the server should be as fast as possible.
230 In cases when there are choices between:
231 - doing some processing when recording the performance data
232 in the instrumentation,
233 - doing some processing when retrieving the performance data,
234
235 priority is given in the design to make the instrumentation faster,
236 pushing some complexity to data retrieval.
237
238 As a result, some parts of the design, related to:
239 - the setup code path,
240 - the query code path,
241
242 might appear to be sub-optimal.
243
244 The criterion used here is to optimize primarily the critical path (data
245 collection), possibly at the expense of non-critical code paths.
246
247 @subsection PRINCIPLE_NOT_INTRUSIVE Unintrusive instrumentation
248
249 For the performance schema in general to be successful, the barrier
250 of entry for a developer should be low, so it's easy to instrument code.
251
252 In particular, the instrumentation interface:
253 - is available for C and C++ code (so it's a C interface),
254 - does not require parameters that the calling code can't easily provide,
255 - supports partial instrumentation (for example, instrumenting mutexes does
256 not require that every mutex is instrumented)
257
258 @subsection PRINCIPLE_EXTENDABLE Extendable instrumentation
259
260 As the content of the performance schema improves,
261 with more tables exposed and more data collected,
262 the instrumentation interface will also be augmented
263 to support instrumenting new concepts.
264 Existing instrumentations should not be affected when additional
265 instrumentation is made available, and making a new instrumentation
266 available should not require existing instrumented code to support it.
267
268 @subsection PRINCIPLE_VERSIONED Versioned instrumentation
269
270 Given that the instrumentation offered by the performance schema will
271 be augmented with time, when more features are implemented,
272 the interface itself should be versioned, to keep compatibility
273 with previous instrumented code.
274
275 For example, after both plugin-A and plugin-B have been instrumented for
276 mutexes, read write locks and conditions, using the instrumentation
277 interface, we can anticipate that the instrumentation interface
278 is expanded to support file based operations.
279
280 Plugin-A, a file based storage engine, will most likely use the expanded
281 interface and instrument its file usage, using the version 2
282 interface, while Plugin-B, a network based storage engine, will not change
283 its code and not release a new binary.
284
285 When later the instrumentation interface is expanded to support network
286 based operations (which will define interface version 3), the Plugin-B code
287 can then be changed to make use of it.
288
289 Note, this is just an example to illustrate the design concept here.
290 Both mutexes and file instrumentation are already available
291 since version 1 of the instrumentation interface.
292
293 @subsection PRINCIPLE_DEPLOYMENT Easy deployment
294
295 Internally, we might want every plugin implementation to upgrade the
296 instrumented code to the latest available, but this will cause additional
297 work and this is not practical if the code change is monolithic.
298
299 Externally, for third party plugin implementors, asking implementors to
300 always stay aligned to the latest instrumentation and make new releases,
301 even when the change does not provide new functionality for them,
302 is a bad idea.
303
304 For example, requiring a network based engine to re-release because the
305 instrumentation interface changed for file based operations, will create
306 too many deployment issues.
307
308 So, the performance schema implementation must support concurrently,
309 in the same deployment, multiple versions of the instrumentation
310 interface, and ensure binary compatibility with each version.
311
312 In addition to this, the performance schema can be included or excluded
313 from the server binary, using build time configuration options.
314
315 Regardless, the following types of deployment are valid:
316 - a server supporting the performance schema + a storage engine
317 that is not instrumented
318 - a server not supporting the performance schema + a storage engine
319 that is instrumented
320 */
321
322 /**
323 @page PAGE_INSTRUMENTATION_INTERFACE Performance schema: instrumentation interface page.
324 MySQL performance schema instrumentation interface.
325
326 @section INTRO Introduction
327
328 The instrumentation interface consist of two layers:
329 - a raw ABI (Application Binary Interface) layer, that exposes the primitive
330 instrumentation functions exported by the performance schema instrumentation
331 - an API (Application Programing Interface) layer,
332 that provides many helpers for a developer instrumenting some code,
333 to make the instrumentation as easy as possible.
334
335 The ABI layer consists of:
336 @code
337 #include "mysql/psi/psi.h"
338 @endcode
339
340 The API layer consists of:
341 @code
342 #include "mysql/psi/mutex_mutex.h"
343 #include "mysql/psi/mutex_file.h"
344 @endcode
345
346 The first helper is for mutexes, rwlocks and conditions,
347 the second for file io.
348
349 The API layer exposes C macros and typedefs which will expand:
350 - either to non-instrumented code, when compiled without the performance
351 schema instrumentation
352 - or to instrumented code, that will issue the raw calls to the ABI layer
353 so that the implementation can collect data.
354
355 Note that all the names introduced (for example, @c mysql_mutex_lock) do not
356 collide with any other namespace.
357 In particular, the macro @c mysql_mutex_lock is on purpose not named
358 @c pthread_mutex_lock.
359 This is to:
360 - avoid overloading @c pthread_mutex_lock with yet another macro,
361 which is dangerous as it can affect user code and pollute
362 the end-user namespace.
363 - allow the developer instrumenting code to selectively instrument
364 some code but not all.
365
366 @section PRINCIPLES Design principles
367
368 The ABI part is designed as a facade, that exposes basic primitives.
369 The expectation is that each primitive will be very stable over time,
370 but the list will constantly grow when more instruments are supported.
371 To support binary compatibility with plugins compiled with a different
372 version of the instrumentation, the ABI itself is versioned
373 (see @c PSI_v1, @c PSI_v2).
374
375 For a given instrumentation point in the API, the basic coding pattern
376 used is:
377 - (a) notify the performance schema of the operation
378 about to be performed.
379 - (b) execute the instrumented code.
380 - (c) notify the performance schema that the operation
381 is completed.
382
383 An opaque "locker" pointer is returned by (a), that is given to (c).
384 This pointer helps the implementation to keep context, for performances.
385
386 The following code fragment is annotated to show how in detail this pattern
387 in implemented, when the instrumentation is compiled in:
388
389 @verbatim
390 static inline int mysql_mutex_lock(
391 mysql_mutex_t *that, myf flags, const char *src_file, uint src_line)
392 {
393 int result;
394 struct PSI_mutex_locker_state state;
395 struct PSI_mutex_locker *locker= NULL;
396
397 ............... (a)
398 locker= PSI_server->start_mutex_wait(&state, that->p_psi,
399 PSI_MUTEX_LOCK, locker, src_file, src_line);
400
401 ............... (b)
402 result= pthread_mutex_lock(&that->m_mutex);
403
404 ............... (c)
405 PSI_server->end_mutex_wait(locker, result);
406
407 return result;
408 }
409 @endverbatim
410
411 When the performance schema instrumentation is not compiled in,
412 the code becomes simply a wrapper, expanded in line by the compiler:
413
414 @verbatim
415 static inline int mysql_mutex_lock(...)
416 {
417 int result;
418
419 ............... (b)
420 result= pthread_mutex_lock(&that->m_mutex);
421
422 return result;
423 }
424 @endverbatim
425 */
426
427 /**
428 @page PAGE_AGGREGATES Performance schema: the aggregates page.
429 Performance schema aggregates.
430
431 @section INTRO Introduction
432
433 Aggregates tables are tables that can be formally defined as
434 SELECT ... from EVENTS_WAITS_HISTORY_INFINITE ... group by 'group clause'.
435
436 Each group clause defines a different kind of aggregate, and corresponds to
437 a different table exposed by the performance schema.
438
439 Aggregates can be either:
440 - computed on the fly,
441 - computed on demand, based on other available data.
442
443 'EVENTS_WAITS_HISTORY_INFINITE' is a table that does not exist,
444 the best approximation is EVENTS_WAITS_HISTORY_LONG.
445 Aggregates computed on the fly in fact are based on EVENTS_WAITS_CURRENT,
446 while aggregates computed on demand are based on other
447 EVENTS_WAITS_SUMMARY_BY_xxx tables.
448
449 To better understand the implementation itself, a bit of math is
450 required first, to understand the model behind the code:
451 the code is deceptively simple, the real complexity resides
452 in the flyweight of pointers between various performance schema buffers.
453
454 @section DIMENSION Concept of dimension
455
456 An event measured by the instrumentation has many attributes.
457 An event is represented as a data point P(x1, x2, ..., xN),
458 where each x_i coordinate represents a given attribute value.
459
460 Examples of attributes are:
461 - the time waited
462 - the object waited on
463 - the instrument waited on
464 - the thread that waited
465 - the operation performed
466 - per object or per operation additional attributes, such as spins,
467 number of bytes, etc.
468
469 Computing an aggregate per thread is fundamentally different from
470 computing an aggregate by instrument, so the "_BY_THREAD" and
471 "_BY_EVENT_NAME" aggregates are different dimensions,
472 operating on different x_i and x_j coordinates.
473 These aggregates are "orthogonal".
474
475 @section PROJECTION Concept of projection
476
477 A given x_i attribute value can convey either just one basic information,
478 such as a number of bytes, or can convey implied information,
479 such as an object fully qualified name.
480
481 For example, from the value "test.t1", the name of the object schema
482 "test" can be separated from the object name "t1", so that now aggregates
483 by object schema can be implemented.
484
485 In math terms, that corresponds to defining a function:
486 F_i (x): x --> y
487 Applying this function to our point P gives another point P':
488
489 F_i (P):
490 P(x1, x2, ..., x{i-1}, x_i, x{i+1}, ..., x_N)
491 --> P' (x1, x2, ..., x{i-1}, f_i(x_i), x{i+1}, ..., x_N)
492
493 That function defines in fact an aggregate !
494 In SQL terms, this aggregate would look like the following table:
495
496 @verbatim
497 CREATE VIEW EVENTS_WAITS_SUMMARY_BY_Func_i AS
498 SELECT col_1, col_2, ..., col_{i-1},
499 Func_i(col_i),
500 COUNT(col_i),
501 MIN(col_i), AVG(col_i), MAX(col_i), -- if col_i is a numeric value
502 col_{i+1}, ..., col_N
503 FROM EVENTS_WAITS_HISTORY_INFINITE
504 group by col_1, col_2, ..., col_{i-1}, col{i+1}, ..., col_N.
505 @endverbatim
506
507 Note that not all columns have to be included,
508 in particular some columns that are dependent on the x_i column should
509 be removed, so that in practice, MySQL's aggregation method tends to
510 remove many attributes at each aggregation steps.
511
512 For example, when aggregating wait events by object instances,
513 - the wait_time and number_of_bytes can be summed,
514 and sum(wait_time) now becomes an object instance attribute.
515 - the source, timer_start, timer_end columns are not in the
516 _BY_INSTANCE table, because these attributes are only
517 meaningful for a wait.
518
519 @section COMPOSITION Concept of composition
520
521 Now, the "test.t1" --> "test" example was purely theory,
522 just to explain the concept, and does not lead very far.
523 Let's look at a more interesting example of data that can be derived
524 from the row event.
525
526 An event creates a transient object, PFS_wait_locker, per operation.
527 This object's life cycle is extremely short: it's created just
528 before the start_wait() instrumentation call, and is destroyed in
529 the end_wait() call.
530
531 The wait locker itself contains a pointer to the object instance
532 waited on.
533 That allows to implement a wait_locker --> object instance projection,
534 with m_target.
535 The object instance life cycle depends on _init and _destroy calls
536 from the code, such as mysql_mutex_init()
537 and mysql_mutex_destroy() for a mutex.
538
539 The object instance waited on contains a pointer to the object class,
540 which is represented by the instrument name.
541 That allows to implement an object instance --> object class projection.
542 The object class life cycle is permanent, as instruments are loaded in
543 the server and never removed.
544
545 The object class is named in such a way
546 (for example, "wait/sync/mutex/sql/LOCK_open",
547 "wait/io/file/maria/data_file) that the component ("sql", "maria")
548 that it belongs to can be inferred.
549 That allows to implement an object class --> server component projection.
550
551 Back to math again, we have, for example for mutexes:
552
553 F1 (l) : PFS_wait_locker l --> PFS_mutex m = l->m_target.m_mutex
554
555 F1_to_2 (m) : PFS_mutex m --> PFS_mutex_class i = m->m_class
556
557 F2_to_3 (i) : PFS_mutex_class i --> const char *component =
558 substring(i->m_name, ...)
559
560 Per components aggregates are not implemented, this is just an illustration.
561
562 F1 alone defines this aggregate:
563
564 EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_INSTANCE
565 (or MUTEX_INSTANCE)
566
567 F1_to_2 alone could define this aggregate:
568
569 EVENTS_WAITS_SUMMARY_BY_INSTANCE --> EVENTS_WAITS_SUMMARY_BY_EVENT_NAME
570
571 Alternatively, using function composition, with
572 F2 = F1_to_2 o F1, F2 defines:
573
574 EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_EVENT_NAME
575
576 Likewise, F_2_to_3 defines:
577
578 EVENTS_WAITS_SUMMARY_BY_EVENT_NAME --> EVENTS_WAITS_SUMMARY_BY_COMPONENT
579
580 and F3 = F_2_to_3 o F_1_to_2 o F1 defines:
581
582 EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_COMPONENT
583
584 What has all this to do with the code ?
585
586 Functions (or aggregates) such as F_3 are not implemented as is.
587 Instead, they are decomposed into F_2_to_3 o F_1_to_2 o F1,
588 and each intermediate aggregate is stored into an internal buffer.
589 This allows to support every F1, F2, F3 aggregates from shared
590 internal buffers, where computation already performed to compute F2
591 is reused when computing F3.
592
593 @section OBJECT_GRAPH Object graph
594
595 In terms of object instances, or records, pointers between
596 different buffers define an object instance graph.
597
598 For example, assuming the following scenario:
599 - A mutex class "M" is instrumented, the instrument name
600 is "wait/sync/mutex/sql/M"
601 - This mutex instrument has been instantiated twice,
602 mutex instances are noted M-1 and M-2
603 - Threads T-A and T-B are locking mutex instance M-1
604 - Threads T-C and T-D are locking mutex instance M-2
605
606 The performance schema will record the following data:
607 - EVENTS_WAITS_CURRENT has 4 rows, one for each mutex locker
608 - EVENTS_WAITS_SUMMARY_BY_INSTANCE shows 2 rows, for M-1 and M-2
609 - EVENTS_WAITS_SUMMARY_BY_EVENT_NAME shows 1 row, for M
610
611 The graph of structures will look like:
612
613 @verbatim
614 PFS_wait_locker (T-A, M-1) ----------
615 |
616 v
617 PFS_mutex (M-1)
618 - m_wait_stat ------------
619 ^ |
620 | |
621 PFS_wait_locker (T-B, M-1) ---------- |
622 v
623 PFS_mutex_class (M)
624 - m_wait_stat
625 PFS_wait_locker (T-C, M-2) ---------- ^
626 | |
627 v |
628 PFS_mutex (M-2) |
629 - m_wait_stat ------------
630 ^
631 |
632 PFS_wait_locker (T-D, M-2) ----------
633
634 || || ||
635 || || ||
636 vv vv vv
637
638 EVENTS_WAITS_CURRENT ..._SUMMARY_BY_INSTANCE ..._SUMMARY_BY_EVENT_NAME
639 @endverbatim
640
641 @section ON_THE_FLY On the fly aggregates
642
643 'On the fly' aggregates are computed during the code execution.
644 This is necessary because the data the aggregate is based on is volatile,
645 and can not be kept indefinitely.
646
647 With on the fly aggregates:
648 - the writer thread does all the computation
649 - the reader thread accesses the result directly
650
651 This model is to be avoided if possible, due to the overhead
652 caused when instrumenting code.
653
654 @section HIGHER_LEVEL Higher level aggregates
655
656 'Higher level' aggregates are implemented on demand only.
657 The code executing a SELECT from the aggregate table is
658 collecting data from multiple internal buffers to produce the result.
659
660 With higher level aggregates:
661 - the reader thread does all the computation
662 - the writer thread has no overhead.
663
664 @section MIXED Mixed level aggregates
665
666 The 'Mixed' model is a compromise between 'On the fly' and 'Higher level'
667 aggregates, for internal buffers that are not permanent.
668
669 While an object is present in a buffer, the higher level model is used.
670 When an object is about to be destroyed, statistics are saved into
671 a 'parent' buffer with a longer life cycle, to follow the on the fly model.
672
673 With mixed aggregates:
674 - the reader thread does a lot of complex computation,
675 - the writer thread has minimal overhead, on destroy events.
676
677 @section IMPL_WAIT Implementation for waits aggregates
678
679 For waits, the tables that contains aggregated wait data are:
680 - EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
681 - EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME
682 - EVENTS_WAITS_SUMMARY_BY_INSTANCE
683 - EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
684 - EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME
685 - EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME
686 - FILE_SUMMARY_BY_EVENT_NAME
687 - FILE_SUMMARY_BY_INSTANCE
688 - SOCKET_SUMMARY_BY_INSTANCE
689 - SOCKET_SUMMARY_BY_EVENT_NAME
690 - OBJECTS_SUMMARY_GLOBAL_BY_TYPE
691
692 The instrumented code that generates waits events consist of:
693 - mutexes (mysql_mutex_t)
694 - rwlocks (mysql_rwlock_t)
695 - conditions (mysql_cond_t)
696 - file io (MYSQL_FILE)
697 - socket io (MYSQL_SOCKET)
698 - table io
699 - table lock
700 - idle
701
702 The flow of data between aggregates tables varies for each instrumentation.
703
704 @subsection IMPL_WAIT_MUTEX Mutex waits
705
706 @verbatim
707 mutex_locker(T, M)
708 |
709 | [1]
710 |
711 |-> pfs_mutex(M) =====>> [B], [C]
712 | |
713 | | [2]
714 | |
715 | |-> pfs_mutex_class(M.class) =====>> [C]
716 |
717 |-> pfs_thread(T).event_name(M) =====>> [A], [D], [E], [F]
718 |
719 | [3]
720 |
721 3a |-> pfs_account(U, H).event_name(M) =====>> [D], [E], [F]
722 . |
723 . | [4-RESET]
724 . |
725 3b .....+-> pfs_user(U).event_name(M) =====>> [E]
726 . |
727 3c .....+-> pfs_host(H).event_name(M) =====>> [F]
728 @endverbatim
729
730 How to read this diagram:
731 - events that occur during the instrumented code execution are noted with numbers,
732 as in [1]. Code executed by these events has an impact on overhead.
733 - events that occur during TRUNCATE TABLE operations are noted with numbers,
734 followed by "-RESET", as in [4-RESET].
735 Code executed by these events has no impact on overhead,
736 since they are executed by independent monitoring sessions.
737 - events that occur when a reader extracts data from a performance schema table
738 are noted with letters, as in [A]. The name of the table involved,
739 and the method that builds a row are documented. Code executed by these events
740 has no impact on the instrumentation overhead. Note that the table
741 implementation may pull data from different buffers.
742 - nominal code paths are in plain lines. A "nominal" code path corresponds to
743 cases where the performance schema buffers are sized so that no records are lost.
744 - degenerated code paths are in dotted lines. A "degenerated" code path corresponds
745 to edge cases where parent buffers are full, which forces the code to aggregate to
746 grand parents directly.
747
748 Implemented as:
749 - [1] @c start_mutex_wait_v1(), @c end_mutex_wait_v1()
750 - [2] @c destroy_mutex_v1()
751 - [3] @c aggregate_thread_waits()
752 - [4] @c PFS_account::aggregate_waits()
753 - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
754 @c table_ews_by_thread_by_event_name::make_row()
755 - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
756 @c table_events_waits_summary_by_instance::make_mutex_row()
757 - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
758 @c table_ews_global_by_event_name::make_mutex_row()
759 - [D] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
760 @c table_ews_by_account_by_event_name::make_row()
761 - [E] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
762 @c table_ews_by_user_by_event_name::make_row()
763 - [F] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
764 @c table_ews_by_host_by_event_name::make_row()
765
766 Table EVENTS_WAITS_SUMMARY_BY_INSTANCE is a 'on the fly' aggregate,
767 because the data is collected on the fly by (1) and stored into a buffer,
768 pfs_mutex. The table implementation [B] simply reads the results directly
769 from this buffer.
770
771 Table EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME is a 'mixed' aggregate,
772 because some data is collected on the fly (1),
773 some data is preserved with (2) at a later time in the life cycle,
774 and two different buffers pfs_mutex and pfs_mutex_class are used to store the
775 statistics collected. The table implementation [C] is more complex, since
776 it reads from two buffers pfs_mutex and pfs_mutex_class.
777
778 @subsection IMPL_WAIT_RWLOCK Rwlock waits
779
780 @verbatim
781 rwlock_locker(T, R)
782 |
783 | [1]
784 |
785 |-> pfs_rwlock(R) =====>> [B], [C]
786 | |
787 | | [2]
788 | |
789 | |-> pfs_rwlock_class(R.class) =====>> [C]
790 |
791 |-> pfs_thread(T).event_name(R) =====>> [A]
792 |
793 ...
794 @endverbatim
795
796 Implemented as:
797 - [1] @c start_rwlock_rdwait_v1(), @c end_rwlock_rdwait_v1(), ...
798 - [2] @c destroy_rwlock_v1()
799 - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
800 @c table_ews_by_thread_by_event_name::make_row()
801 - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
802 @c table_events_waits_summary_by_instance::make_rwlock_row()
803 - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
804 @c table_ews_global_by_event_name::make_rwlock_row()
805
806 @subsection IMPL_WAIT_COND Cond waits
807
808 @verbatim
809 cond_locker(T, C)
810 |
811 | [1]
812 |
813 |-> pfs_cond(C) =====>> [B], [C]
814 | |
815 | | [2]
816 | |
817 | |-> pfs_cond_class(C.class) =====>> [C]
818 |
819 |-> pfs_thread(T).event_name(C) =====>> [A]
820 |
821 ...
822 @endverbatim
823
824 Implemented as:
825 - [1] @c start_cond_wait_v1(), @c end_cond_wait_v1()
826 - [2] @c destroy_cond_v1()
827 - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
828 @c table_ews_by_thread_by_event_name::make_row()
829 - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
830 @c table_events_waits_summary_by_instance::make_cond_row()
831 - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
832 @c table_ews_global_by_event_name::make_cond_row()
833
834 @subsection IMPL_WAIT_FILE File waits
835
836 @verbatim
837 file_locker(T, F)
838 |
839 | [1]
840 |
841 |-> pfs_file(F) =====>> [B], [C], [D], [E]
842 | |
843 | | [2]
844 | |
845 | |-> pfs_file_class(F.class) =====>> [C], [D]
846 |
847 |-> pfs_thread(T).event_name(F) =====>> [A]
848 |
849 ...
850 @endverbatim
851
852 Implemented as:
853 - [1] @c get_thread_file_name_locker_v1(), @c start_file_wait_v1(),
854 @c end_file_wait_v1(), ...
855 - [2] @c close_file_v1()
856 - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
857 @c table_ews_by_thread_by_event_name::make_row()
858 - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
859 @c table_events_waits_summary_by_instance::make_file_row()
860 - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
861 @c table_ews_global_by_event_name::make_file_row()
862 - [D] FILE_SUMMARY_BY_EVENT_NAME,
863 @c table_file_summary_by_event_name::make_row()
864 - [E] FILE_SUMMARY_BY_INSTANCE,
865 @c table_file_summary_by_instance::make_row()
866
867 @subsection IMPL_WAIT_SOCKET Socket waits
868
869 @verbatim
870 socket_locker(T, S)
871 |
872 | [1]
873 |
874 |-> pfs_socket(S) =====>> [A], [B], [C], [D], [E]
875 |
876 | [2]
877 |
878 |-> pfs_socket_class(S.class) =====>> [C], [D]
879 |
880 |-> pfs_thread(T).event_name(S) =====>> [A]
881 |
882 | [3]
883 |
884 3a |-> pfs_account(U, H).event_name(S) =====>> [F], [G], [H]
885 . |
886 . | [4-RESET]
887 . |
888 3b .....+-> pfs_user(U).event_name(S) =====>> [G]
889 . |
890 3c .....+-> pfs_host(H).event_name(S) =====>> [H]
891 @endverbatim
892
893 Implemented as:
894 - [1] @c start_socket_wait_v1(), @c end_socket_wait_v1().
895 - [2] @c close_socket_v1()
896 - [3] @c aggregate_thread_waits()
897 - [4] @c PFS_account::aggregate_waits()
898 - [5] @c PFS_host::aggregate_waits()
899 - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
900 @c table_ews_by_thread_by_event_name::make_row()
901 - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
902 @c table_events_waits_summary_by_instance::make_socket_row()
903 - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
904 @c table_ews_global_by_event_name::make_socket_row()
905 - [D] SOCKET_SUMMARY_BY_EVENT_NAME,
906 @c table_socket_summary_by_event_name::make_row()
907 - [E] SOCKET_SUMMARY_BY_INSTANCE,
908 @c table_socket_summary_by_instance::make_row()
909 - [F] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
910 @c table_ews_by_account_by_event_name::make_row()
911 - [G] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
912 @c table_ews_by_user_by_event_name::make_row()
913 - [H] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
914 @c table_ews_by_host_by_event_name::make_row()
915
916 @subsection IMPL_WAIT_TABLE Table waits
917
918 @verbatim
919 table_locker(Thread Th, Table Tb, Event = io or lock)
920 |
921 | [1]
922 |
923 1a |-> pfs_table(Tb) =====>> [A], [B], [C]
924 | |
925 | | [2]
926 | |
927 | |-> pfs_table_share(Tb.share) =====>> [B], [C]
928 | |
929 | | [3]
930 | |
931 | |-> global_table_io_stat =====>> [C]
932 | |
933 | |-> global_table_lock_stat =====>> [C]
934 |
935 1b |-> pfs_thread(Th).event_name(E) =====>> [D], [E], [F], [G]
936 | |
937 | | [ 4-RESET]
938 | |
939 | |-> pfs_account(U, H).event_name(E) =====>> [E], [F], [G]
940 | . |
941 | . | [5-RESET]
942 | . |
943 | .....+-> pfs_user(U).event_name(E) =====>> [F]
944 | . |
945 | .....+-> pfs_host(H).event_name(E) =====>> [G]
946 |
947 1c |-> pfs_thread(Th).waits_current(W) =====>> [H]
948 |
949 1d |-> pfs_thread(Th).waits_history(W) =====>> [I]
950 |
951 1e |-> waits_history_long(W) =====>> [J]
952 @endverbatim
953
954 Implemented as:
955 - [1] @c start_table_io_wait_v1(), @c end_table_io_wait_v1()
956 - [2] @c close_table_v1()
957 - [3] @c drop_table_share_v1()
958 - [4] @c TRUNCATE TABLE EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
959 - [5] @c TRUNCATE TABLE EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
960 - [A] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
961 @c table_events_waits_summary_by_instance::make_table_row()
962 - [B] OBJECTS_SUMMARY_GLOBAL_BY_TYPE,
963 @c table_os_global_by_type::make_row()
964 - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
965 @c table_ews_global_by_event_name::make_table_io_row(),
966 @c table_ews_global_by_event_name::make_table_lock_row()
967 - [D] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
968 @c table_ews_by_thread_by_event_name::make_row()
969 - [E] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
970 @c table_ews_by_user_by_account_name::make_row()
971 - [F] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
972 @c table_ews_by_user_by_event_name::make_row()
973 - [G] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
974 @c table_ews_by_host_by_event_name::make_row()
975 - [H] EVENTS_WAITS_CURRENT,
976 @c table_events_waits_common::make_row()
977 - [I] EVENTS_WAITS_HISTORY,
978 @c table_events_waits_common::make_row()
979 - [J] EVENTS_WAITS_HISTORY_LONG,
980 @c table_events_waits_common::make_row()
981
982 @section IMPL_STAGE Implementation for stages aggregates
983
984 For stages, the tables that contains aggregated data are:
985 - EVENTS_STAGES_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
986 - EVENTS_STAGES_SUMMARY_BY_HOST_BY_EVENT_NAME
987 - EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME
988 - EVENTS_STAGES_SUMMARY_BY_USER_BY_EVENT_NAME
989 - EVENTS_STAGES_SUMMARY_GLOBAL_BY_EVENT_NAME
990
991 @verbatim
992 start_stage(T, S)
993 |
994 | [1]
995 |
996 1a |-> pfs_thread(T).event_name(S) =====>> [A], [B], [C], [D], [E]
997 | |
998 | | [2]
999 | |
1000 | 2a |-> pfs_account(U, H).event_name(S) =====>> [B], [C], [D], [E]
1001 | . |
1002 | . | [3-RESET]
1003 | . |
1004 | 2b .....+-> pfs_user(U).event_name(S) =====>> [C]
1005 | . |
1006 | 2c .....+-> pfs_host(H).event_name(S) =====>> [D], [E]
1007 | . . |
1008 | . . | [4-RESET]
1009 | 2d . . |
1010 1b |----+----+----+-> pfs_stage_class(S) =====>> [E]
1011
1012 @endverbatim
1013
1014 Implemented as:
1015 - [1] @c start_stage_v1()
1016 - [2] @c delete_thread_v1(), @c aggregate_thread_stages()
1017 - [3] @c PFS_account::aggregate_stages()
1018 - [4] @c PFS_host::aggregate_stages()
1019 - [A] EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME,
1020 @c table_esgs_by_thread_by_event_name::make_row()
1021 - [B] EVENTS_STAGES_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
1022 @c table_esgs_by_account_by_event_name::make_row()
1023 - [C] EVENTS_STAGES_SUMMARY_BY_USER_BY_EVENT_NAME,
1024 @c table_esgs_by_user_by_event_name::make_row()
1025 - [D] EVENTS_STAGES_SUMMARY_BY_HOST_BY_EVENT_NAME,
1026 @c table_esgs_by_host_by_event_name::make_row()
1027 - [E] EVENTS_STAGES_SUMMARY_GLOBAL_BY_EVENT_NAME,
1028 @c table_esgs_global_by_event_name::make_row()
1029
1030 @section IMPL_STATEMENT Implementation for statements consumers
1031
1032 For statements, the tables that contains individual event data are:
1033 - EVENTS_STATEMENTS_CURRENT
1034 - EVENTS_STATEMENTS_HISTORY
1035 - EVENTS_STATEMENTS_HISTORY_LONG
1036
1037 For statements, the tables that contains aggregated data are:
1038 - EVENTS_STATEMENTS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
1039 - EVENTS_STATEMENTS_SUMMARY_BY_HOST_BY_EVENT_NAME
1040 - EVENTS_STATEMENTS_SUMMARY_BY_THREAD_BY_EVENT_NAME
1041 - EVENTS_STATEMENTS_SUMMARY_BY_USER_BY_EVENT_NAME
1042 - EVENTS_STATEMENTS_SUMMARY_GLOBAL_BY_EVENT_NAME
1043 - EVENTS_STATEMENTS_SUMMARY_BY_DIGEST
1044
1045 @verbatim
1046 statement_locker(T, S)
1047 |
1048 | [1]
1049 |
1050 1a |-> pfs_thread(T).event_name(S) =====>> [A], [B], [C], [D], [E]
1051 | |
1052 | | [2]
1053 | |
1054 | 2a |-> pfs_account(U, H).event_name(S) =====>> [B], [C], [D], [E]
1055 | . |
1056 | . | [3-RESET]
1057 | . |
1058 | 2b .....+-> pfs_user(U).event_name(S) =====>> [C]
1059 | . |
1060 | 2c .....+-> pfs_host(H).event_name(S) =====>> [D], [E]
1061 | . . |
1062 | . . | [4-RESET]
1063 | 2d . . |
1064 1b |----+----+----+-> pfs_statement_class(S) =====>> [E]
1065 |
1066 1c |-> pfs_thread(T).statement_current(S) =====>> [F]
1067 |
1068 1d |-> pfs_thread(T).statement_history(S) =====>> [G]
1069 |
1070 1e |-> statement_history_long(S) =====>> [H]
1071 |
1072 1f |-> statement_digest(S) =====>> [I]
1073
1074 @endverbatim
1075
1076 Implemented as:
1077 - [1] @c start_statement_v1(), end_statement_v1()
1078 (1a, 1b) is an aggregation by EVENT_NAME,
1079 (1c, 1d, 1e) is an aggregation by TIME,
1080 (1f) is an aggregation by DIGEST
1081 all of these are orthogonal,
1082 and implemented in end_statement_v1().
1083 - [2] @c delete_thread_v1(), @c aggregate_thread_statements()
1084 - [3] @c PFS_account::aggregate_statements()
1085 - [4] @c PFS_host::aggregate_statements()
1086 - [A] EVENTS_STATEMENTS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
1087 @c table_esms_by_thread_by_event_name::make_row()
1088 - [B] EVENTS_STATEMENTS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
1089 @c table_esms_by_account_by_event_name::make_row()
1090 - [C] EVENTS_STATEMENTS_SUMMARY_BY_USER_BY_EVENT_NAME,
1091 @c table_esms_by_user_by_event_name::make_row()
1092 - [D] EVENTS_STATEMENTS_SUMMARY_BY_HOST_BY_EVENT_NAME,
1093 @c table_esms_by_host_by_event_name::make_row()
1094 - [E] EVENTS_STATEMENTS_SUMMARY_GLOBAL_BY_EVENT_NAME,
1095 @c table_esms_global_by_event_name::make_row()
1096 - [F] EVENTS_STATEMENTS_CURRENT,
1097 @c table_events_statements_current::rnd_next(),
1098 @c table_events_statements_common::make_row()
1099 - [G] EVENTS_STATEMENTS_HISTORY,
1100 @c table_events_statements_history::rnd_next(),
1101 @c table_events_statements_common::make_row()
1102 - [H] EVENTS_STATEMENTS_HISTORY_LONG,
1103 @c table_events_statements_history_long::rnd_next(),
1104 @c table_events_statements_common::make_row()
1105 - [I] EVENTS_STATEMENTS_SUMMARY_BY_DIGEST
1106 @c table_esms_by_digest::make_row()
1107 */
1108
1109 /**
1110 @defgroup Performance_schema Performance Schema
1111 The performance schema component.
1112 For details, see the
1113 @ref PAGE_PERFORMANCE_SCHEMA "performance schema main page".
1114
1115 @defgroup Performance_schema_implementation Performance Schema Implementation
1116 @ingroup Performance_schema
1117
1118 @defgroup Performance_schema_tables Performance Schema Tables
1119 @ingroup Performance_schema_implementation
1120 */
1121
1122 pthread_key(PFS_thread*, THR_PFS);
1123 bool THR_PFS_initialized= false;
1124
1125 /**
1126 Conversion map from PSI_mutex_operation to enum_operation_type.
1127 Indexed by enum PSI_mutex_operation.
1128 */
1129 static enum_operation_type mutex_operation_map[]=
1130 {
1131 OPERATION_TYPE_LOCK,
1132 OPERATION_TYPE_TRYLOCK
1133 };
1134
1135 /**
1136 Conversion map from PSI_rwlock_operation to enum_operation_type.
1137 Indexed by enum PSI_rwlock_operation.
1138 */
1139 static enum_operation_type rwlock_operation_map[]=
1140 {
1141 OPERATION_TYPE_READLOCK,
1142 OPERATION_TYPE_WRITELOCK,
1143 OPERATION_TYPE_TRYREADLOCK,
1144 OPERATION_TYPE_TRYWRITELOCK
1145 };
1146
1147 /**
1148 Conversion map from PSI_cond_operation to enum_operation_type.
1149 Indexed by enum PSI_cond_operation.
1150 */
1151 static enum_operation_type cond_operation_map[]=
1152 {
1153 OPERATION_TYPE_WAIT,
1154 OPERATION_TYPE_TIMEDWAIT
1155 };
1156
1157 /**
1158 Conversion map from PSI_file_operation to enum_operation_type.
1159 Indexed by enum PSI_file_operation.
1160 */
1161 static enum_operation_type file_operation_map[]=
1162 {
1163 OPERATION_TYPE_FILECREATE,
1164 OPERATION_TYPE_FILECREATETMP,
1165 OPERATION_TYPE_FILEOPEN,
1166 OPERATION_TYPE_FILESTREAMOPEN,
1167 OPERATION_TYPE_FILECLOSE,
1168 OPERATION_TYPE_FILESTREAMCLOSE,
1169 OPERATION_TYPE_FILEREAD,
1170 OPERATION_TYPE_FILEWRITE,
1171 OPERATION_TYPE_FILESEEK,
1172 OPERATION_TYPE_FILETELL,
1173 OPERATION_TYPE_FILEFLUSH,
1174 OPERATION_TYPE_FILESTAT,
1175 OPERATION_TYPE_FILEFSTAT,
1176 OPERATION_TYPE_FILECHSIZE,
1177 OPERATION_TYPE_FILEDELETE,
1178 OPERATION_TYPE_FILERENAME,
1179 OPERATION_TYPE_FILESYNC
1180 };
1181
1182 /**
1183 Conversion map from PSI_table_operation to enum_operation_type.
1184 Indexed by enum PSI_table_io_operation.
1185 */
1186 static enum_operation_type table_io_operation_map[]=
1187 {
1188 OPERATION_TYPE_TABLE_FETCH,
1189 OPERATION_TYPE_TABLE_WRITE_ROW,
1190 OPERATION_TYPE_TABLE_UPDATE_ROW,
1191 OPERATION_TYPE_TABLE_DELETE_ROW
1192 };
1193
1194 /**
1195 Conversion map from enum PFS_TL_LOCK_TYPE to enum_operation_type.
1196 Indexed by enum PFS_TL_LOCK_TYPE.
1197 */
1198 static enum_operation_type table_lock_operation_map[]=
1199 {
1200 OPERATION_TYPE_TL_READ_NORMAL, /* PFS_TL_READ */
1201 OPERATION_TYPE_TL_READ_WITH_SHARED_LOCKS, /* PFS_TL_READ_WITH_SHARED_LOCKS */
1202 OPERATION_TYPE_TL_READ_HIGH_PRIORITY, /* PFS_TL_READ_HIGH_PRIORITY */
1203 OPERATION_TYPE_TL_READ_NO_INSERTS, /* PFS_TL_READ_NO_INSERT */
1204 OPERATION_TYPE_TL_WRITE_ALLOW_WRITE, /* PFS_TL_WRITE_ALLOW_WRITE */
1205 OPERATION_TYPE_TL_WRITE_CONCURRENT_INSERT, /* PFS_TL_WRITE_CONCURRENT_INSERT */
1206 OPERATION_TYPE_TL_WRITE_DELAYED, /* PFS_TL_WRITE_DELAYED */
1207 OPERATION_TYPE_TL_WRITE_LOW_PRIORITY, /* PFS_TL_WRITE_LOW_PRIORITY */
1208 OPERATION_TYPE_TL_WRITE_NORMAL, /* PFS_TL_WRITE */
1209 OPERATION_TYPE_TL_READ_EXTERNAL, /* PFS_TL_READ_EXTERNAL */
1210 OPERATION_TYPE_TL_WRITE_EXTERNAL /* PFS_TL_WRITE_EXTERNAL */
1211 };
1212
1213 /**
1214 Conversion map from PSI_socket_operation to enum_operation_type.
1215 Indexed by enum PSI_socket_operation.
1216 */
1217 static enum_operation_type socket_operation_map[]=
1218 {
1219 OPERATION_TYPE_SOCKETCREATE,
1220 OPERATION_TYPE_SOCKETCONNECT,
1221 OPERATION_TYPE_SOCKETBIND,
1222 OPERATION_TYPE_SOCKETCLOSE,
1223 OPERATION_TYPE_SOCKETSEND,
1224 OPERATION_TYPE_SOCKETRECV,
1225 OPERATION_TYPE_SOCKETSENDTO,
1226 OPERATION_TYPE_SOCKETRECVFROM,
1227 OPERATION_TYPE_SOCKETSENDMSG,
1228 OPERATION_TYPE_SOCKETRECVMSG,
1229 OPERATION_TYPE_SOCKETSEEK,
1230 OPERATION_TYPE_SOCKETOPT,
1231 OPERATION_TYPE_SOCKETSTAT,
1232 OPERATION_TYPE_SOCKETSHUTDOWN,
1233 OPERATION_TYPE_SOCKETSELECT
1234 };
1235
1236 /**
1237 Build the prefix name of a class of instruments in a category.
1238 For example, this function builds the string 'wait/sync/mutex/sql/' from
1239 a prefix 'wait/sync/mutex' and a category 'sql'.
1240 This prefix is used later to build each instrument name, such as
1241 'wait/sync/mutex/sql/LOCK_open'.
1242 @param prefix Prefix for this class of instruments
1243 @param category Category name
1244 @param [out] output Buffer of length PFS_MAX_INFO_NAME_LENGTH.
1245 @param [out] output_length Length of the resulting output string.
1246 @return 0 for success, non zero for errors
1247 */
build_prefix(const LEX_STRING * prefix,const char * category,char * output,int * output_length)1248 static int build_prefix(const LEX_STRING *prefix, const char *category,
1249 char *output, int *output_length)
1250 {
1251 int len= strlen(category);
1252 char *out_ptr= output;
1253 int prefix_length= prefix->length;
1254
1255 if (unlikely((prefix_length + len + 1) >=
1256 PFS_MAX_FULL_PREFIX_NAME_LENGTH))
1257 {
1258 pfs_print_error("build_prefix: prefix+category is too long <%s> <%s>\n",
1259 prefix->str, category);
1260 return 1;
1261 }
1262
1263 if (unlikely(strchr(category, '/') != NULL))
1264 {
1265 pfs_print_error("build_prefix: invalid category <%s>\n",
1266 category);
1267 return 1;
1268 }
1269
1270 /* output = prefix + category + '/' */
1271 memcpy(out_ptr, prefix->str, prefix_length);
1272 out_ptr+= prefix_length;
1273 memcpy(out_ptr, category, len);
1274 out_ptr+= len;
1275 *out_ptr= '/';
1276 out_ptr++;
1277 *output_length= out_ptr - output;
1278
1279 return 0;
1280 }
1281
1282 #define REGISTER_BODY_V1(KEY_T, PREFIX, REGISTER_FUNC) \
1283 KEY_T key; \
1284 char formatted_name[PFS_MAX_INFO_NAME_LENGTH]; \
1285 int prefix_length; \
1286 int len; \
1287 int full_length; \
1288 \
1289 DBUG_ASSERT(category != NULL); \
1290 DBUG_ASSERT(info != NULL); \
1291 if (unlikely(build_prefix(&PREFIX, category, \
1292 formatted_name, &prefix_length))) \
1293 { \
1294 for (; count>0; count--, info++) \
1295 *(info->m_key)= 0; \
1296 return ; \
1297 } \
1298 \
1299 for (; count>0; count--, info++) \
1300 { \
1301 DBUG_ASSERT(info->m_key != NULL); \
1302 DBUG_ASSERT(info->m_name != NULL); \
1303 len= strlen(info->m_name); \
1304 full_length= prefix_length + len; \
1305 if (likely(full_length <= PFS_MAX_INFO_NAME_LENGTH)) \
1306 { \
1307 memcpy(formatted_name + prefix_length, info->m_name, len); \
1308 key= REGISTER_FUNC(formatted_name, full_length, info->m_flags); \
1309 } \
1310 else \
1311 { \
1312 pfs_print_error("REGISTER_BODY_V1: name too long <%s> <%s>\n", \
1313 category, info->m_name); \
1314 key= 0; \
1315 } \
1316 \
1317 *(info->m_key)= key; \
1318 } \
1319 return;
1320
1321 /* Use C linkage for the interface functions. */
1322
1323 C_MODE_START
1324
1325 /**
1326 Implementation of the mutex instrumentation interface.
1327 @sa PSI_v1::register_mutex.
1328 */
register_mutex_v1(const char * category,PSI_mutex_info_v1 * info,int count)1329 static void register_mutex_v1(const char *category,
1330 PSI_mutex_info_v1 *info,
1331 int count)
1332 {
1333 REGISTER_BODY_V1(PSI_mutex_key,
1334 mutex_instrument_prefix,
1335 register_mutex_class)
1336 }
1337
1338 /**
1339 Implementation of the rwlock instrumentation interface.
1340 @sa PSI_v1::register_rwlock.
1341 */
register_rwlock_v1(const char * category,PSI_rwlock_info_v1 * info,int count)1342 static void register_rwlock_v1(const char *category,
1343 PSI_rwlock_info_v1 *info,
1344 int count)
1345 {
1346 REGISTER_BODY_V1(PSI_rwlock_key,
1347 rwlock_instrument_prefix,
1348 register_rwlock_class)
1349 }
1350
1351 /**
1352 Implementation of the cond instrumentation interface.
1353 @sa PSI_v1::register_cond.
1354 */
register_cond_v1(const char * category,PSI_cond_info_v1 * info,int count)1355 static void register_cond_v1(const char *category,
1356 PSI_cond_info_v1 *info,
1357 int count)
1358 {
1359 REGISTER_BODY_V1(PSI_cond_key,
1360 cond_instrument_prefix,
1361 register_cond_class)
1362 }
1363
1364 /**
1365 Implementation of the thread instrumentation interface.
1366 @sa PSI_v1::register_thread.
1367 */
register_thread_v1(const char * category,PSI_thread_info_v1 * info,int count)1368 static void register_thread_v1(const char *category,
1369 PSI_thread_info_v1 *info,
1370 int count)
1371 {
1372 REGISTER_BODY_V1(PSI_thread_key,
1373 thread_instrument_prefix,
1374 register_thread_class)
1375 }
1376
1377 /**
1378 Implementation of the file instrumentation interface.
1379 @sa PSI_v1::register_file.
1380 */
register_file_v1(const char * category,PSI_file_info_v1 * info,int count)1381 static void register_file_v1(const char *category,
1382 PSI_file_info_v1 *info,
1383 int count)
1384 {
1385 REGISTER_BODY_V1(PSI_file_key,
1386 file_instrument_prefix,
1387 register_file_class)
1388 }
1389
register_stage_v1(const char * category,PSI_stage_info_v1 ** info_array,int count)1390 static void register_stage_v1(const char *category,
1391 PSI_stage_info_v1 **info_array,
1392 int count)
1393 {
1394 char formatted_name[PFS_MAX_INFO_NAME_LENGTH];
1395 int prefix_length;
1396 int len;
1397 int full_length;
1398 PSI_stage_info_v1 *info;
1399
1400 DBUG_ASSERT(category != NULL);
1401 DBUG_ASSERT(info_array != NULL);
1402 if (unlikely(build_prefix(&stage_instrument_prefix, category,
1403 formatted_name, &prefix_length)))
1404 {
1405 for (; count>0; count--, info_array++)
1406 (*info_array)->m_key= 0;
1407 return ;
1408 }
1409
1410 for (; count>0; count--, info_array++)
1411 {
1412 info= *info_array;
1413 DBUG_ASSERT(info != NULL);
1414 DBUG_ASSERT(info->m_name != NULL);
1415 len= strlen(info->m_name);
1416 full_length= prefix_length + len;
1417 if (likely(full_length <= PFS_MAX_INFO_NAME_LENGTH))
1418 {
1419 memcpy(formatted_name + prefix_length, info->m_name, len);
1420 info->m_key= register_stage_class(formatted_name,
1421 prefix_length,
1422 full_length,
1423 info->m_flags);
1424 }
1425 else
1426 {
1427 pfs_print_error("register_stage_v1: name too long <%s> <%s>\n",
1428 category, info->m_name);
1429 info->m_key= 0;
1430 }
1431 }
1432 return;
1433 }
1434
register_statement_v1(const char * category,PSI_statement_info_v1 * info,int count)1435 static void register_statement_v1(const char *category,
1436 PSI_statement_info_v1 *info,
1437 int count)
1438 {
1439 char formatted_name[PFS_MAX_INFO_NAME_LENGTH];
1440 int prefix_length;
1441 int len;
1442 int full_length;
1443
1444 DBUG_ASSERT(category != NULL);
1445 DBUG_ASSERT(info != NULL);
1446 if (unlikely(build_prefix(&statement_instrument_prefix,
1447 category, formatted_name, &prefix_length)))
1448 {
1449 for (; count>0; count--, info++)
1450 info->m_key= 0;
1451 return ;
1452 }
1453
1454 for (; count>0; count--, info++)
1455 {
1456 DBUG_ASSERT(info->m_name != NULL);
1457 len= strlen(info->m_name);
1458 full_length= prefix_length + len;
1459 if (likely(full_length <= PFS_MAX_INFO_NAME_LENGTH))
1460 {
1461 memcpy(formatted_name + prefix_length, info->m_name, len);
1462 info->m_key= register_statement_class(formatted_name, full_length, info->m_flags);
1463 }
1464 else
1465 {
1466 pfs_print_error("register_statement_v1: name too long <%s>\n",
1467 info->m_name);
1468 info->m_key= 0;
1469 }
1470 }
1471 return;
1472 }
1473
register_socket_v1(const char * category,PSI_socket_info_v1 * info,int count)1474 static void register_socket_v1(const char *category,
1475 PSI_socket_info_v1 *info,
1476 int count)
1477 {
1478 REGISTER_BODY_V1(PSI_socket_key,
1479 socket_instrument_prefix,
1480 register_socket_class)
1481 }
1482
1483 #define INIT_BODY_V1(T, KEY, ID) \
1484 PFS_##T##_class *klass; \
1485 PFS_##T *pfs; \
1486 klass= find_##T##_class(KEY); \
1487 if (unlikely(klass == NULL)) \
1488 return NULL; \
1489 if (! klass->m_enabled) \
1490 return NULL; \
1491 pfs= create_##T(klass, ID); \
1492 return reinterpret_cast<PSI_##T *> (pfs)
1493
1494 /**
1495 Implementation of the mutex instrumentation interface.
1496 @sa PSI_v1::init_mutex.
1497 */
1498 static PSI_mutex*
init_mutex_v1(PSI_mutex_key key,const void * identity)1499 init_mutex_v1(PSI_mutex_key key, const void *identity)
1500 {
1501 INIT_BODY_V1(mutex, key, identity);
1502 }
1503
1504 /**
1505 Implementation of the mutex instrumentation interface.
1506 @sa PSI_v1::destroy_mutex.
1507 */
destroy_mutex_v1(PSI_mutex * mutex)1508 static void destroy_mutex_v1(PSI_mutex* mutex)
1509 {
1510 PFS_mutex *pfs= reinterpret_cast<PFS_mutex*> (mutex);
1511
1512 DBUG_ASSERT(pfs != NULL);
1513
1514 destroy_mutex(pfs);
1515 }
1516
1517 /**
1518 Implementation of the rwlock instrumentation interface.
1519 @sa PSI_v1::init_rwlock.
1520 */
1521 static PSI_rwlock*
init_rwlock_v1(PSI_rwlock_key key,const void * identity)1522 init_rwlock_v1(PSI_rwlock_key key, const void *identity)
1523 {
1524 INIT_BODY_V1(rwlock, key, identity);
1525 }
1526
1527 /**
1528 Implementation of the rwlock instrumentation interface.
1529 @sa PSI_v1::destroy_rwlock.
1530 */
destroy_rwlock_v1(PSI_rwlock * rwlock)1531 static void destroy_rwlock_v1(PSI_rwlock* rwlock)
1532 {
1533 PFS_rwlock *pfs= reinterpret_cast<PFS_rwlock*> (rwlock);
1534
1535 DBUG_ASSERT(pfs != NULL);
1536
1537 destroy_rwlock(pfs);
1538 }
1539
1540 /**
1541 Implementation of the cond instrumentation interface.
1542 @sa PSI_v1::init_cond.
1543 */
1544 static PSI_cond*
init_cond_v1(PSI_cond_key key,const void * identity)1545 init_cond_v1(PSI_cond_key key, const void *identity)
1546 {
1547 INIT_BODY_V1(cond, key, identity);
1548 }
1549
1550 /**
1551 Implementation of the cond instrumentation interface.
1552 @sa PSI_v1::destroy_cond.
1553 */
destroy_cond_v1(PSI_cond * cond)1554 static void destroy_cond_v1(PSI_cond* cond)
1555 {
1556 PFS_cond *pfs= reinterpret_cast<PFS_cond*> (cond);
1557
1558 DBUG_ASSERT(pfs != NULL);
1559
1560 destroy_cond(pfs);
1561 }
1562
1563 /**
1564 Implementation of the table instrumentation interface.
1565 @sa PSI_v1::get_table_share.
1566 */
1567 static PSI_table_share*
get_table_share_v1(my_bool temporary,TABLE_SHARE * share)1568 get_table_share_v1(my_bool temporary, TABLE_SHARE *share)
1569 {
1570 /* Ignore temporary tables and views. */
1571 if (temporary || share->is_view)
1572 return NULL;
1573 /* An instrumented thread is required, for LF_PINS. */
1574 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1575 if (unlikely(pfs_thread == NULL))
1576 return NULL;
1577 PFS_table_share* pfs_share;
1578 pfs_share= find_or_create_table_share(pfs_thread, temporary, share);
1579 return reinterpret_cast<PSI_table_share*> (pfs_share);
1580 }
1581
1582 /**
1583 Implementation of the table instrumentation interface.
1584 @sa PSI_v1::release_table_share.
1585 */
release_table_share_v1(PSI_table_share * share)1586 static void release_table_share_v1(PSI_table_share* share)
1587 {
1588 PFS_table_share* pfs= reinterpret_cast<PFS_table_share*> (share);
1589
1590 if (unlikely(pfs == NULL))
1591 return;
1592
1593 release_table_share(pfs);
1594 }
1595
1596 /**
1597 Implementation of the table instrumentation interface.
1598 @sa PSI_v1::drop_table_share.
1599 */
1600 static void
drop_table_share_v1(my_bool temporary,const char * schema_name,int schema_name_length,const char * table_name,int table_name_length)1601 drop_table_share_v1(my_bool temporary,
1602 const char *schema_name, int schema_name_length,
1603 const char *table_name, int table_name_length)
1604 {
1605 /* Ignore temporary tables. */
1606 if (temporary)
1607 return;
1608 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1609 if (unlikely(pfs_thread == NULL))
1610 return;
1611 /* TODO: temporary tables */
1612 drop_table_share(pfs_thread, temporary, schema_name, schema_name_length,
1613 table_name, table_name_length);
1614 }
1615
1616 /**
1617 Implementation of the table instrumentation interface.
1618 @sa PSI_v1::open_table.
1619 */
1620 static PSI_table*
open_table_v1(PSI_table_share * share,const void * identity)1621 open_table_v1(PSI_table_share *share, const void *identity)
1622 {
1623 PFS_table_share *pfs_table_share= reinterpret_cast<PFS_table_share*> (share);
1624
1625 if (unlikely(pfs_table_share == NULL))
1626 return NULL;
1627
1628 /* This object is not to be instrumented. */
1629 if (! pfs_table_share->m_enabled)
1630 return NULL;
1631
1632 /* This object is instrumented, but all table instruments are disabled. */
1633 if (! global_table_io_class.m_enabled && ! global_table_lock_class.m_enabled)
1634 return NULL;
1635
1636 /*
1637 When the performance schema is off, do not instrument anything.
1638 Table handles have short life cycle, instrumentation will happen
1639 again if needed during the next open().
1640 */
1641 if (! flag_global_instrumentation)
1642 return NULL;
1643
1644 PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1645 if (unlikely(thread == NULL))
1646 return NULL;
1647
1648 PFS_table *pfs_table= create_table(pfs_table_share, thread, identity);
1649 return reinterpret_cast<PSI_table *> (pfs_table);
1650 }
1651
1652 /**
1653 Implementation of the table instrumentation interface.
1654 @sa PSI_v1::unbind_table.
1655 */
unbind_table_v1(PSI_table * table)1656 static void unbind_table_v1(PSI_table *table)
1657 {
1658 PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
1659 if (likely(pfs != NULL))
1660 {
1661 pfs->m_thread_owner= NULL;
1662 }
1663 }
1664
1665 /**
1666 Implementation of the table instrumentation interface.
1667 @sa PSI_v1::rebind_table.
1668 */
1669 static PSI_table *
rebind_table_v1(PSI_table_share * share,const void * identity,PSI_table * table)1670 rebind_table_v1(PSI_table_share *share, const void *identity, PSI_table *table)
1671 {
1672 PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
1673 if (likely(pfs != NULL))
1674 {
1675 PFS_thread *thread;
1676 DBUG_ASSERT(pfs->m_thread_owner == NULL);
1677
1678 /* The table handle was already instrumented, reuse it for this thread. */
1679 thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1680
1681 if (unlikely(! pfs->m_share->m_enabled))
1682 {
1683 destroy_table(pfs);
1684 return NULL;
1685 }
1686
1687 if (unlikely(! global_table_io_class.m_enabled && ! global_table_lock_class.m_enabled))
1688 {
1689 destroy_table(pfs);
1690 return NULL;
1691 }
1692
1693 if (unlikely(! flag_global_instrumentation))
1694 {
1695 destroy_table(pfs);
1696 return NULL;
1697 }
1698
1699 pfs->m_thread_owner= thread;
1700 return table;
1701 }
1702
1703 /* See open_table_v1() */
1704
1705 PFS_table_share *pfs_table_share= reinterpret_cast<PFS_table_share*> (share);
1706
1707 if (unlikely(pfs_table_share == NULL))
1708 return NULL;
1709
1710 if (! pfs_table_share->m_enabled)
1711 return NULL;
1712
1713 if (! global_table_io_class.m_enabled && ! global_table_lock_class.m_enabled)
1714 return NULL;
1715
1716 if (! flag_global_instrumentation)
1717 return NULL;
1718
1719 PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1720 if (unlikely(thread == NULL))
1721 return NULL;
1722
1723 PFS_table *pfs_table= create_table(pfs_table_share, thread, identity);
1724 return reinterpret_cast<PSI_table *> (pfs_table);
1725 }
1726
1727 /**
1728 Implementation of the table instrumentation interface.
1729 @sa PSI_v1::close_table.
1730 */
close_table_v1(PSI_table * table)1731 static void close_table_v1(PSI_table *table)
1732 {
1733 PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
1734 if (unlikely(pfs == NULL))
1735 return;
1736 pfs->aggregate();
1737 destroy_table(pfs);
1738 }
1739
1740 static PSI_socket*
init_socket_v1(PSI_socket_key key,const my_socket * fd,const struct sockaddr * addr,socklen_t addr_len)1741 init_socket_v1(PSI_socket_key key, const my_socket *fd,
1742 const struct sockaddr *addr, socklen_t addr_len)
1743 {
1744 PFS_socket_class *klass;
1745 PFS_socket *pfs;
1746 klass= find_socket_class(key);
1747 if (unlikely(klass == NULL))
1748 return NULL;
1749 if (! klass->m_enabled)
1750 return NULL;
1751 pfs= create_socket(klass, fd, addr, addr_len);
1752 return reinterpret_cast<PSI_socket *> (pfs);
1753 }
1754
destroy_socket_v1(PSI_socket * socket)1755 static void destroy_socket_v1(PSI_socket *socket)
1756 {
1757 PFS_socket *pfs= reinterpret_cast<PFS_socket*> (socket);
1758
1759 DBUG_ASSERT(pfs != NULL);
1760
1761 destroy_socket(pfs);
1762 }
1763
1764 /**
1765 Implementation of the file instrumentation interface.
1766 @sa PSI_v1::create_file.
1767 */
create_file_v1(PSI_file_key key,const char * name,File file)1768 static void create_file_v1(PSI_file_key key, const char *name, File file)
1769 {
1770 if (! flag_global_instrumentation)
1771 return;
1772 int index= (int) file;
1773 if (unlikely(index < 0))
1774 return;
1775 PFS_file_class *klass= find_file_class(key);
1776 if (unlikely(klass == NULL))
1777 return;
1778 if (! klass->m_enabled)
1779 return;
1780
1781 /* A thread is needed for LF_PINS */
1782 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1783 if (unlikely(pfs_thread == NULL))
1784 return;
1785
1786 if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
1787 return;
1788
1789 /*
1790 We want this check after pfs_thread->m_enabled,
1791 to avoid reporting false loss.
1792 */
1793 if (unlikely(index >= file_handle_max))
1794 {
1795 file_handle_lost++;
1796 return;
1797 }
1798
1799 uint len= strlen(name);
1800 PFS_file *pfs_file= find_or_create_file(pfs_thread, klass, name, len, true);
1801
1802 file_handle_array[index]= pfs_file;
1803 }
1804
1805 /**
1806 Arguments given from a parent to a child thread, packaged in one structure.
1807 This data is used when spawning a new instrumented thread.
1808 @sa pfs_spawn_thread.
1809 */
1810 struct PFS_spawn_thread_arg
1811 {
1812 ulonglong m_thread_internal_id;
1813 char m_username[USERNAME_LENGTH];
1814 uint m_username_length;
1815 char m_hostname[HOSTNAME_LENGTH];
1816 uint m_hostname_length;
1817
1818 PSI_thread_key m_child_key;
1819 const void *m_child_identity;
1820 void *(*m_user_start_routine)(void*);
1821 void *m_user_arg;
1822 };
1823
pfs_spawn_thread(void * arg)1824 void* pfs_spawn_thread(void *arg)
1825 {
1826 PFS_spawn_thread_arg *typed_arg= (PFS_spawn_thread_arg*) arg;
1827 void *user_arg;
1828 void *(*user_start_routine)(void*);
1829
1830 PFS_thread *pfs;
1831
1832 /* First, attach instrumentation to this newly created pthread. */
1833 PFS_thread_class *klass= find_thread_class(typed_arg->m_child_key);
1834 if (likely(klass != NULL))
1835 {
1836 pfs= create_thread(klass, typed_arg->m_child_identity, 0);
1837 if (likely(pfs != NULL))
1838 {
1839 clear_thread_account(pfs);
1840
1841 pfs->m_parent_thread_internal_id= typed_arg->m_thread_internal_id;
1842
1843 memcpy(pfs->m_username, typed_arg->m_username, sizeof(pfs->m_username));
1844 pfs->m_username_length= typed_arg->m_username_length;
1845
1846 memcpy(pfs->m_hostname, typed_arg->m_hostname, sizeof(pfs->m_hostname));
1847 pfs->m_hostname_length= typed_arg->m_hostname_length;
1848
1849 set_thread_account(pfs);
1850 }
1851 }
1852 else
1853 {
1854 pfs= NULL;
1855 }
1856 my_pthread_setspecific_ptr(THR_PFS, pfs);
1857
1858 /*
1859 Secondly, free the memory allocated in spawn_thread_v1().
1860 It is preferable to do this before invoking the user
1861 routine, to avoid memory leaks at shutdown, in case
1862 the server exits without waiting for this thread.
1863 */
1864 user_start_routine= typed_arg->m_user_start_routine;
1865 user_arg= typed_arg->m_user_arg;
1866 my_free(typed_arg);
1867
1868 /* Then, execute the user code for this thread. */
1869 (*user_start_routine)(user_arg);
1870
1871 return NULL;
1872 }
1873
1874 /**
1875 Implementation of the thread instrumentation interface.
1876 @sa PSI_v1::spawn_thread.
1877 */
spawn_thread_v1(PSI_thread_key key,pthread_t * thread,const pthread_attr_t * attr,void * (* start_routine)(void *),void * arg)1878 static int spawn_thread_v1(PSI_thread_key key,
1879 pthread_t *thread, const pthread_attr_t *attr,
1880 void *(*start_routine)(void*), void *arg)
1881 {
1882 PFS_spawn_thread_arg *psi_arg;
1883 PFS_thread *parent;
1884
1885 /* psi_arg can not be global, and can not be a local variable. */
1886 psi_arg= (PFS_spawn_thread_arg*) my_malloc(sizeof(PFS_spawn_thread_arg),
1887 MYF(MY_WME));
1888 if (unlikely(psi_arg == NULL))
1889 return EAGAIN;
1890
1891 psi_arg->m_child_key= key;
1892 psi_arg->m_child_identity= (arg ? arg : thread);
1893 psi_arg->m_user_start_routine= start_routine;
1894 psi_arg->m_user_arg= arg;
1895
1896 parent= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1897 if (parent != NULL)
1898 {
1899 /*
1900 Make a copy of the parent attributes.
1901 This is required, because instrumentation for this thread (the parent)
1902 may be destroyed before the child thread instrumentation is created.
1903 */
1904 psi_arg->m_thread_internal_id= parent->m_thread_internal_id;
1905
1906 memcpy(psi_arg->m_username, parent->m_username, sizeof(psi_arg->m_username));
1907 psi_arg->m_username_length= parent->m_username_length;
1908
1909 memcpy(psi_arg->m_hostname, parent->m_hostname, sizeof(psi_arg->m_hostname));
1910 psi_arg->m_hostname_length= parent->m_hostname_length;
1911 }
1912 else
1913 {
1914 psi_arg->m_thread_internal_id= 0;
1915 psi_arg->m_username_length= 0;
1916 psi_arg->m_hostname_length= 0;
1917 }
1918
1919 int result= pthread_create(thread, attr, pfs_spawn_thread, psi_arg);
1920 if (unlikely(result != 0))
1921 my_free(psi_arg);
1922 return result;
1923 }
1924
1925 /**
1926 Implementation of the thread instrumentation interface.
1927 @sa PSI_v1::new_thread.
1928 */
1929 static PSI_thread*
new_thread_v1(PSI_thread_key key,const void * identity,ulonglong processlist_id)1930 new_thread_v1(PSI_thread_key key, const void *identity, ulonglong processlist_id)
1931 {
1932 PFS_thread *pfs;
1933
1934 PFS_thread_class *klass= find_thread_class(key);
1935 if (likely(klass != NULL))
1936 pfs= create_thread(klass, identity, processlist_id);
1937 else
1938 pfs= NULL;
1939
1940 return reinterpret_cast<PSI_thread*> (pfs);
1941 }
1942
1943 /**
1944 Implementation of the thread instrumentation interface.
1945 @sa PSI_v1::set_thread_id.
1946 */
set_thread_id_v1(PSI_thread * thread,ulonglong processlist_id)1947 static void set_thread_id_v1(PSI_thread *thread, ulonglong processlist_id)
1948 {
1949 PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
1950 if (unlikely(pfs == NULL))
1951 return;
1952 pfs->m_processlist_id= processlist_id;
1953 }
1954
1955 /**
1956 Implementation of the thread instrumentation interface.
1957 @sa PSI_v1::get_thread_id.
1958 */
1959 static PSI_thread*
get_thread_v1(void)1960 get_thread_v1(void)
1961 {
1962 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1963 return reinterpret_cast<PSI_thread*> (pfs);
1964 }
1965
1966 /**
1967 Implementation of the thread instrumentation interface.
1968 @sa PSI_v1::set_thread_user.
1969 */
set_thread_user_v1(const char * user,int user_len)1970 static void set_thread_user_v1(const char *user, int user_len)
1971 {
1972 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1973
1974 DBUG_ASSERT((user != NULL) || (user_len == 0));
1975 DBUG_ASSERT(user_len >= 0);
1976 DBUG_ASSERT((uint) user_len <= sizeof(pfs->m_username));
1977
1978 if (unlikely(pfs == NULL))
1979 return;
1980
1981 aggregate_thread(pfs, pfs->m_account, pfs->m_user, pfs->m_host);
1982
1983 pfs->m_session_lock.allocated_to_dirty();
1984
1985 clear_thread_account(pfs);
1986
1987 if (user_len > 0)
1988 memcpy(pfs->m_username, user, user_len);
1989 pfs->m_username_length= user_len;
1990
1991 set_thread_account(pfs);
1992
1993 bool enabled= true;
1994 if (flag_thread_instrumentation)
1995 {
1996 if ((pfs->m_username_length > 0) && (pfs->m_hostname_length > 0))
1997 {
1998 /*
1999 TODO: performance improvement.
2000 Once performance_schema.USERS is exposed,
2001 we can use PFS_user::m_enabled instead of looking up
2002 SETUP_ACTORS every time.
2003 */
2004 lookup_setup_actor(pfs,
2005 pfs->m_username, pfs->m_username_length,
2006 pfs->m_hostname, pfs->m_hostname_length,
2007 &enabled);
2008 }
2009 }
2010
2011 pfs->m_enabled= enabled;
2012
2013 pfs->m_session_lock.dirty_to_allocated();
2014 }
2015
2016 /**
2017 Implementation of the thread instrumentation interface.
2018 @sa PSI_v1::set_thread_account.
2019 */
set_thread_account_v1(const char * user,int user_len,const char * host,int host_len)2020 static void set_thread_account_v1(const char *user, int user_len,
2021 const char *host, int host_len)
2022 {
2023 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2024
2025 DBUG_ASSERT((user != NULL) || (user_len == 0));
2026 DBUG_ASSERT(user_len >= 0);
2027 DBUG_ASSERT((uint) user_len <= sizeof(pfs->m_username));
2028 DBUG_ASSERT((host != NULL) || (host_len == 0));
2029 DBUG_ASSERT(host_len >= 0);
2030
2031 host_len= min<size_t>(host_len, sizeof(pfs->m_hostname));
2032
2033 if (unlikely(pfs == NULL))
2034 return;
2035
2036 pfs->m_session_lock.allocated_to_dirty();
2037
2038 clear_thread_account(pfs);
2039
2040 if (acl_is_utility_user(user, host, NULL)) {
2041 /* We do not want the utility user to show up in any PFS statistics,
2042 so we keep this pfs session dirty. This fixes many, but not all tables.
2043 The remaining seems to honor m_enabled, so we also set that to false. */
2044 pfs->m_enabled= false;
2045 pfs->m_disable_instrumentation = true;
2046 return;
2047 }
2048
2049 if (host_len > 0)
2050 memcpy(pfs->m_hostname, host, host_len);
2051 pfs->m_hostname_length= host_len;
2052
2053 if (user_len > 0)
2054 memcpy(pfs->m_username, user, user_len);
2055 pfs->m_username_length= user_len;
2056
2057 set_thread_account(pfs);
2058
2059 bool enabled= true;
2060 if (flag_thread_instrumentation)
2061 {
2062 if ((pfs->m_username_length > 0) && (pfs->m_hostname_length > 0))
2063 {
2064 /*
2065 TODO: performance improvement.
2066 Once performance_schema.USERS is exposed,
2067 we can use PFS_user::m_enabled instead of looking up
2068 SETUP_ACTORS every time.
2069 */
2070 lookup_setup_actor(pfs,
2071 pfs->m_username, pfs->m_username_length,
2072 pfs->m_hostname, pfs->m_hostname_length,
2073 &enabled);
2074 }
2075 }
2076 pfs->m_enabled= enabled;
2077
2078 pfs->m_session_lock.dirty_to_allocated();
2079 }
2080
2081 /**
2082 Implementation of the thread instrumentation interface.
2083 @sa PSI_v1::set_thread_db.
2084 */
set_thread_db_v1(const char * db,int db_len)2085 static void set_thread_db_v1(const char* db, int db_len)
2086 {
2087 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2088
2089 DBUG_ASSERT((db != NULL) || (db_len == 0));
2090 DBUG_ASSERT(db_len >= 0);
2091 DBUG_ASSERT((uint) db_len <= sizeof(pfs->m_dbname));
2092
2093 if (likely(pfs != NULL))
2094 {
2095 pfs->m_stmt_lock.allocated_to_dirty();
2096 if (db_len > 0)
2097 memcpy(pfs->m_dbname, db, db_len);
2098 pfs->m_dbname_length= db_len;
2099 pfs->m_stmt_lock.dirty_to_allocated();
2100 }
2101 }
2102
2103 /**
2104 Implementation of the thread instrumentation interface.
2105 @sa PSI_v1::set_thread_command.
2106 */
set_thread_command_v1(int command)2107 static void set_thread_command_v1(int command)
2108 {
2109 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2110
2111 DBUG_ASSERT(command >= 0);
2112 DBUG_ASSERT(command <= (int) COM_END);
2113
2114 if (likely(pfs != NULL))
2115 {
2116 pfs->m_command= command;
2117 }
2118 }
2119
2120 /**
2121 Implementation of the thread instrumentation interface.
2122 @sa PSI_v1::set_thread_start_time.
2123 */
set_thread_start_time_v1(time_t start_time)2124 static void set_thread_start_time_v1(time_t start_time)
2125 {
2126 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2127
2128 if (likely(pfs != NULL))
2129 {
2130 pfs->m_start_time= start_time;
2131 }
2132 }
2133
2134 /**
2135 Implementation of the thread instrumentation interface.
2136 @sa PSI_v1::set_thread_state.
2137 */
set_thread_state_v1(const char * state)2138 static void set_thread_state_v1(const char* state)
2139 {
2140 /* DEPRECATED. */
2141 }
2142
2143 /**
2144 Implementation of the thread instrumentation interface.
2145 @sa PSI_v1::set_thread_info.
2146 */
set_thread_info_v1(const char * info,uint info_len)2147 static void set_thread_info_v1(const char* info, uint info_len)
2148 {
2149 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2150
2151 DBUG_ASSERT((info != NULL) || (info_len == 0));
2152
2153 if (likely(pfs != NULL))
2154 {
2155 if ((info != NULL) && (info_len > 0))
2156 {
2157 if (info_len > sizeof(pfs->m_processlist_info))
2158 info_len= sizeof(pfs->m_processlist_info);
2159
2160 pfs->m_stmt_lock.allocated_to_dirty();
2161 memcpy(pfs->m_processlist_info, info, info_len);
2162 pfs->m_processlist_info_length= info_len;
2163 pfs->m_stmt_lock.dirty_to_allocated();
2164 }
2165 else
2166 {
2167 pfs->m_stmt_lock.allocated_to_dirty();
2168 pfs->m_processlist_info_length= 0;
2169 pfs->m_stmt_lock.dirty_to_allocated();
2170 }
2171 }
2172 }
2173
2174 /**
2175 Implementation of the thread instrumentation interface.
2176 @sa PSI_v1::set_thread.
2177 */
set_thread_v1(PSI_thread * thread)2178 static void set_thread_v1(PSI_thread* thread)
2179 {
2180 PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
2181 my_pthread_setspecific_ptr(THR_PFS, pfs);
2182 }
2183
2184 /**
2185 Implementation of the thread instrumentation interface.
2186 @sa PSI_v1::delete_current_thread.
2187 */
delete_current_thread_v1(void)2188 static void delete_current_thread_v1(void)
2189 {
2190 PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2191 if (thread != NULL)
2192 {
2193 aggregate_thread(thread, thread->m_account, thread->m_user, thread->m_host);
2194 my_pthread_setspecific_ptr(THR_PFS, NULL);
2195 destroy_thread(thread);
2196 }
2197 }
2198
2199 /**
2200 Implementation of the thread instrumentation interface.
2201 @sa PSI_v1::delete_thread.
2202 */
delete_thread_v1(PSI_thread * thread)2203 static void delete_thread_v1(PSI_thread *thread)
2204 {
2205 PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
2206
2207 if (pfs != NULL)
2208 {
2209 aggregate_thread(pfs, pfs->m_account, pfs->m_user, pfs->m_host);
2210 destroy_thread(pfs);
2211 }
2212 }
2213
2214 /**
2215 Implementation of the mutex instrumentation interface.
2216 @sa PSI_v1::start_mutex_wait.
2217 */
2218 static PSI_mutex_locker*
start_mutex_wait_v1(PSI_mutex_locker_state * state,PSI_mutex * mutex,PSI_mutex_operation op,const char * src_file,uint src_line)2219 start_mutex_wait_v1(PSI_mutex_locker_state *state,
2220 PSI_mutex *mutex, PSI_mutex_operation op,
2221 const char *src_file, uint src_line)
2222 {
2223 PFS_mutex *pfs_mutex= reinterpret_cast<PFS_mutex*> (mutex);
2224 DBUG_ASSERT((int) op >= 0);
2225 DBUG_ASSERT((uint) op < array_elements(mutex_operation_map));
2226 DBUG_ASSERT(state != NULL);
2227
2228 DBUG_ASSERT(pfs_mutex != NULL);
2229 DBUG_ASSERT(pfs_mutex->m_class != NULL);
2230
2231 if (! pfs_mutex->m_enabled)
2232 return NULL;
2233
2234 uint flags;
2235 ulonglong timer_start= 0;
2236
2237 if (flag_thread_instrumentation)
2238 {
2239 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2240 if (unlikely(pfs_thread == NULL))
2241 return NULL;
2242 if (! pfs_thread->m_enabled)
2243 return NULL;
2244 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2245 flags= STATE_FLAG_THREAD;
2246
2247 if (pfs_mutex->m_timed)
2248 {
2249 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2250 state->m_timer_start= timer_start;
2251 flags|= STATE_FLAG_TIMED;
2252 }
2253
2254 if (flag_events_waits_current)
2255 {
2256 if (unlikely(pfs_thread->m_events_waits_current >=
2257 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2258 {
2259 locker_lost++;
2260 return NULL;
2261 }
2262 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2263 state->m_wait= wait;
2264 flags|= STATE_FLAG_EVENT;
2265
2266 PFS_events_waits *parent_event= wait - 1;
2267 wait->m_event_type= EVENT_TYPE_WAIT;
2268 wait->m_nesting_event_id= parent_event->m_event_id;
2269 wait->m_nesting_event_type= parent_event->m_event_type;
2270
2271 wait->m_thread= pfs_thread;
2272 wait->m_class= pfs_mutex->m_class;
2273 wait->m_timer_start= timer_start;
2274 wait->m_timer_end= 0;
2275 wait->m_object_instance_addr= pfs_mutex->m_identity;
2276 wait->m_event_id= pfs_thread->m_event_id++;
2277 wait->m_end_event_id= 0;
2278 wait->m_operation= mutex_operation_map[(int) op];
2279 wait->m_source_file= src_file;
2280 wait->m_source_line= src_line;
2281 wait->m_wait_class= WAIT_CLASS_MUTEX;
2282
2283 pfs_thread->m_events_waits_current++;
2284 }
2285 }
2286 else
2287 {
2288 if (pfs_mutex->m_timed)
2289 {
2290 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2291 state->m_timer_start= timer_start;
2292 flags= STATE_FLAG_TIMED;
2293 state->m_thread= NULL;
2294 }
2295 else
2296 {
2297 /*
2298 Complete shortcut.
2299 */
2300 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
2301 pfs_mutex->m_mutex_stat.m_wait_stat.aggregate_counted();
2302 return NULL;
2303 }
2304 }
2305
2306 state->m_flags= flags;
2307 state->m_mutex= mutex;
2308 return reinterpret_cast<PSI_mutex_locker*> (state);
2309 }
2310
2311 /**
2312 Implementation of the rwlock instrumentation interface.
2313 @sa PSI_v1::start_rwlock_rdwait
2314 @sa PSI_v1::start_rwlock_wrwait
2315 */
2316 static PSI_rwlock_locker*
start_rwlock_wait_v1(PSI_rwlock_locker_state * state,PSI_rwlock * rwlock,PSI_rwlock_operation op,const char * src_file,uint src_line)2317 start_rwlock_wait_v1(PSI_rwlock_locker_state *state,
2318 PSI_rwlock *rwlock,
2319 PSI_rwlock_operation op,
2320 const char *src_file, uint src_line)
2321 {
2322 PFS_rwlock *pfs_rwlock= reinterpret_cast<PFS_rwlock*> (rwlock);
2323 DBUG_ASSERT(static_cast<int> (op) >= 0);
2324 DBUG_ASSERT(static_cast<uint> (op) < array_elements(rwlock_operation_map));
2325 DBUG_ASSERT(state != NULL);
2326 DBUG_ASSERT(pfs_rwlock != NULL);
2327 DBUG_ASSERT(pfs_rwlock->m_class != NULL);
2328
2329 if (! pfs_rwlock->m_enabled)
2330 return NULL;
2331
2332 uint flags;
2333 ulonglong timer_start= 0;
2334
2335 if (flag_thread_instrumentation)
2336 {
2337 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2338 if (unlikely(pfs_thread == NULL))
2339 return NULL;
2340 if (! pfs_thread->m_enabled)
2341 return NULL;
2342 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2343 flags= STATE_FLAG_THREAD;
2344
2345 if (pfs_rwlock->m_timed)
2346 {
2347 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2348 state->m_timer_start= timer_start;
2349 flags|= STATE_FLAG_TIMED;
2350 }
2351
2352 if (flag_events_waits_current)
2353 {
2354 if (unlikely(pfs_thread->m_events_waits_current >=
2355 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2356 {
2357 locker_lost++;
2358 return NULL;
2359 }
2360 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2361 state->m_wait= wait;
2362 flags|= STATE_FLAG_EVENT;
2363
2364 PFS_events_waits *parent_event= wait - 1;
2365 wait->m_event_type= EVENT_TYPE_WAIT;
2366 wait->m_nesting_event_id= parent_event->m_event_id;
2367 wait->m_nesting_event_type= parent_event->m_event_type;
2368
2369 wait->m_thread= pfs_thread;
2370 wait->m_class= pfs_rwlock->m_class;
2371 wait->m_timer_start= timer_start;
2372 wait->m_timer_end= 0;
2373 wait->m_object_instance_addr= pfs_rwlock->m_identity;
2374 wait->m_event_id= pfs_thread->m_event_id++;
2375 wait->m_end_event_id= 0;
2376 wait->m_operation= rwlock_operation_map[static_cast<int> (op)];
2377 wait->m_source_file= src_file;
2378 wait->m_source_line= src_line;
2379 wait->m_wait_class= WAIT_CLASS_RWLOCK;
2380
2381 pfs_thread->m_events_waits_current++;
2382 }
2383 }
2384 else
2385 {
2386 if (pfs_rwlock->m_timed)
2387 {
2388 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2389 state->m_timer_start= timer_start;
2390 flags= STATE_FLAG_TIMED;
2391 state->m_thread= NULL;
2392 }
2393 else
2394 {
2395 /*
2396 Complete shortcut.
2397 */
2398 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
2399 pfs_rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
2400 return NULL;
2401 }
2402 }
2403
2404 state->m_flags= flags;
2405 state->m_rwlock= rwlock;
2406 return reinterpret_cast<PSI_rwlock_locker*> (state);
2407 }
2408
2409 /**
2410 Implementation of the cond instrumentation interface.
2411 @sa PSI_v1::start_cond_wait.
2412 */
2413 static PSI_cond_locker*
start_cond_wait_v1(PSI_cond_locker_state * state,PSI_cond * cond,PSI_mutex * mutex,PSI_cond_operation op,const char * src_file,uint src_line)2414 start_cond_wait_v1(PSI_cond_locker_state *state,
2415 PSI_cond *cond, PSI_mutex *mutex,
2416 PSI_cond_operation op,
2417 const char *src_file, uint src_line)
2418 {
2419 /*
2420 Note about the unused PSI_mutex *mutex parameter:
2421 In the pthread library, a call to pthread_cond_wait()
2422 causes an unlock() + lock() on the mutex associated with the condition.
2423 This mutex operation is not instrumented, so the mutex will still
2424 appear as locked when a thread is waiting on a condition.
2425 This has no impact now, as unlock_mutex() is not recording events.
2426 When unlock_mutex() is implemented by later work logs,
2427 this parameter here will be used to adjust the mutex state,
2428 in start_cond_wait_v1() and end_cond_wait_v1().
2429 */
2430 PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
2431 DBUG_ASSERT(static_cast<int> (op) >= 0);
2432 DBUG_ASSERT(static_cast<uint> (op) < array_elements(cond_operation_map));
2433 DBUG_ASSERT(state != NULL);
2434 DBUG_ASSERT(pfs_cond != NULL);
2435 DBUG_ASSERT(pfs_cond->m_class != NULL);
2436
2437 if (! pfs_cond->m_enabled)
2438 return NULL;
2439
2440 uint flags;
2441 ulonglong timer_start= 0;
2442
2443 if (flag_thread_instrumentation)
2444 {
2445 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2446 if (unlikely(pfs_thread == NULL))
2447 return NULL;
2448 if (! pfs_thread->m_enabled)
2449 return NULL;
2450 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2451 flags= STATE_FLAG_THREAD;
2452
2453 if (pfs_cond->m_timed)
2454 {
2455 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2456 state->m_timer_start= timer_start;
2457 flags|= STATE_FLAG_TIMED;
2458 }
2459
2460 if (flag_events_waits_current)
2461 {
2462 if (unlikely(pfs_thread->m_events_waits_current >=
2463 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2464 {
2465 locker_lost++;
2466 return NULL;
2467 }
2468 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2469 state->m_wait= wait;
2470 flags|= STATE_FLAG_EVENT;
2471
2472 PFS_events_waits *parent_event= wait - 1;
2473 wait->m_event_type= EVENT_TYPE_WAIT;
2474 wait->m_nesting_event_id= parent_event->m_event_id;
2475 wait->m_nesting_event_type= parent_event->m_event_type;
2476
2477 wait->m_thread= pfs_thread;
2478 wait->m_class= pfs_cond->m_class;
2479 wait->m_timer_start= timer_start;
2480 wait->m_timer_end= 0;
2481 wait->m_object_instance_addr= pfs_cond->m_identity;
2482 wait->m_event_id= pfs_thread->m_event_id++;
2483 wait->m_end_event_id= 0;
2484 wait->m_operation= cond_operation_map[static_cast<int> (op)];
2485 wait->m_source_file= src_file;
2486 wait->m_source_line= src_line;
2487 wait->m_wait_class= WAIT_CLASS_COND;
2488
2489 pfs_thread->m_events_waits_current++;
2490 }
2491 }
2492 else
2493 {
2494 if (pfs_cond->m_timed)
2495 {
2496 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2497 state->m_timer_start= timer_start;
2498 flags= STATE_FLAG_TIMED;
2499 }
2500 else
2501 {
2502 /*
2503 Complete shortcut.
2504 */
2505 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
2506 pfs_cond->m_cond_stat.m_wait_stat.aggregate_counted();
2507 return NULL;
2508 }
2509 }
2510
2511 state->m_flags= flags;
2512 state->m_cond= cond;
2513 state->m_mutex= mutex;
2514 return reinterpret_cast<PSI_cond_locker*> (state);
2515 }
2516
lock_flags_to_lock_type(uint flags)2517 static inline PFS_TL_LOCK_TYPE lock_flags_to_lock_type(uint flags)
2518 {
2519 enum thr_lock_type value= static_cast<enum thr_lock_type> (flags);
2520
2521 switch (value)
2522 {
2523 case TL_READ:
2524 return PFS_TL_READ;
2525 case TL_READ_WITH_SHARED_LOCKS:
2526 return PFS_TL_READ_WITH_SHARED_LOCKS;
2527 case TL_READ_HIGH_PRIORITY:
2528 return PFS_TL_READ_HIGH_PRIORITY;
2529 case TL_READ_NO_INSERT:
2530 return PFS_TL_READ_NO_INSERT;
2531 case TL_WRITE_ALLOW_WRITE:
2532 return PFS_TL_WRITE_ALLOW_WRITE;
2533 case TL_WRITE_CONCURRENT_INSERT:
2534 return PFS_TL_WRITE_CONCURRENT_INSERT;
2535 case TL_WRITE_DELAYED:
2536 return PFS_TL_WRITE_DELAYED;
2537 case TL_WRITE_LOW_PRIORITY:
2538 return PFS_TL_WRITE_LOW_PRIORITY;
2539 case TL_WRITE:
2540 return PFS_TL_WRITE;
2541
2542 case TL_WRITE_ONLY:
2543 case TL_IGNORE:
2544 case TL_UNLOCK:
2545 case TL_READ_DEFAULT:
2546 case TL_WRITE_DEFAULT:
2547 default:
2548 DBUG_ASSERT(false);
2549 }
2550
2551 /* Dead code */
2552 return PFS_TL_READ;
2553 }
2554
external_lock_flags_to_lock_type(uint flags)2555 static inline PFS_TL_LOCK_TYPE external_lock_flags_to_lock_type(uint flags)
2556 {
2557 DBUG_ASSERT(flags == F_RDLCK || flags == F_WRLCK);
2558 return (flags == F_RDLCK ? PFS_TL_READ_EXTERNAL : PFS_TL_WRITE_EXTERNAL);
2559 }
2560
2561 /**
2562 Implementation of the table instrumentation interface.
2563 @sa PSI_v1::start_table_io_wait_v1
2564 */
2565 static PSI_table_locker*
start_table_io_wait_v1(PSI_table_locker_state * state,PSI_table * table,PSI_table_io_operation op,uint index,const char * src_file,uint src_line)2566 start_table_io_wait_v1(PSI_table_locker_state *state,
2567 PSI_table *table,
2568 PSI_table_io_operation op,
2569 uint index,
2570 const char *src_file, uint src_line)
2571 {
2572 DBUG_ASSERT(static_cast<int> (op) >= 0);
2573 DBUG_ASSERT(static_cast<uint> (op) < array_elements(table_io_operation_map));
2574 DBUG_ASSERT(state != NULL);
2575 PFS_table *pfs_table= reinterpret_cast<PFS_table*> (table);
2576 DBUG_ASSERT(pfs_table != NULL);
2577 DBUG_ASSERT(pfs_table->m_share != NULL);
2578
2579 if (! pfs_table->m_io_enabled)
2580 return NULL;
2581
2582 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2583
2584 uint flags;
2585 ulonglong timer_start= 0;
2586
2587 if (flag_thread_instrumentation)
2588 {
2589 if (pfs_thread == NULL)
2590 return NULL;
2591 if (! pfs_thread->m_enabled)
2592 return NULL;
2593 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2594 flags= STATE_FLAG_THREAD;
2595
2596 if (pfs_table->m_io_timed)
2597 {
2598 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2599 state->m_timer_start= timer_start;
2600 flags|= STATE_FLAG_TIMED;
2601 }
2602
2603 if (flag_events_waits_current)
2604 {
2605 if (unlikely(pfs_thread->m_events_waits_current >=
2606 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2607 {
2608 locker_lost++;
2609 return NULL;
2610 }
2611 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2612 state->m_wait= wait;
2613 flags|= STATE_FLAG_EVENT;
2614
2615 PFS_events_waits *parent_event= wait - 1;
2616 wait->m_event_type= EVENT_TYPE_WAIT;
2617 wait->m_nesting_event_id= parent_event->m_event_id;
2618 wait->m_nesting_event_type= parent_event->m_event_type;
2619
2620 PFS_table_share *share= pfs_table->m_share;
2621 wait->m_thread= pfs_thread;
2622 wait->m_class= &global_table_io_class;
2623 wait->m_timer_start= timer_start;
2624 wait->m_timer_end= 0;
2625 wait->m_object_instance_addr= pfs_table->m_identity;
2626 wait->m_event_id= pfs_thread->m_event_id++;
2627 wait->m_end_event_id= 0;
2628 wait->m_operation= table_io_operation_map[static_cast<int> (op)];
2629 wait->m_flags= 0;
2630 wait->m_object_type= share->get_object_type();
2631 wait->m_weak_table_share= share;
2632 wait->m_weak_version= share->get_version();
2633 wait->m_index= index;
2634 wait->m_source_file= src_file;
2635 wait->m_source_line= src_line;
2636 wait->m_wait_class= WAIT_CLASS_TABLE;
2637
2638 pfs_thread->m_events_waits_current++;
2639 }
2640 }
2641 else
2642 {
2643 if (pfs_table->m_io_timed)
2644 {
2645 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2646 state->m_timer_start= timer_start;
2647 flags= STATE_FLAG_TIMED;
2648 }
2649 else
2650 {
2651 /* TODO: consider a shortcut here */
2652 flags= 0;
2653 }
2654 }
2655
2656 state->m_flags= flags;
2657 state->m_table= table;
2658 state->m_io_operation= op;
2659 state->m_index= index;
2660 return reinterpret_cast<PSI_table_locker*> (state);
2661 }
2662
2663 /**
2664 Implementation of the table instrumentation interface.
2665 @sa PSI_v1::start_table_lock_wait.
2666 */
2667 static PSI_table_locker*
start_table_lock_wait_v1(PSI_table_locker_state * state,PSI_table * table,PSI_table_lock_operation op,ulong op_flags,const char * src_file,uint src_line)2668 start_table_lock_wait_v1(PSI_table_locker_state *state,
2669 PSI_table *table,
2670 PSI_table_lock_operation op,
2671 ulong op_flags,
2672 const char *src_file, uint src_line)
2673 {
2674 DBUG_ASSERT(state != NULL);
2675 DBUG_ASSERT((op == PSI_TABLE_LOCK) || (op == PSI_TABLE_EXTERNAL_LOCK));
2676
2677 PFS_table *pfs_table= reinterpret_cast<PFS_table*> (table);
2678
2679 DBUG_ASSERT(pfs_table != NULL);
2680 DBUG_ASSERT(pfs_table->m_share != NULL);
2681
2682 if (! pfs_table->m_lock_enabled)
2683 return NULL;
2684
2685 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2686
2687 PFS_TL_LOCK_TYPE lock_type;
2688
2689 switch (op)
2690 {
2691 case PSI_TABLE_LOCK:
2692 lock_type= lock_flags_to_lock_type(op_flags);
2693 break;
2694 case PSI_TABLE_EXTERNAL_LOCK:
2695 /*
2696 See the handler::external_lock() API design,
2697 there is no handler::external_unlock().
2698 */
2699 if (op_flags == F_UNLCK)
2700 return NULL;
2701 lock_type= external_lock_flags_to_lock_type(op_flags);
2702 break;
2703 default:
2704 lock_type= PFS_TL_READ;
2705 DBUG_ASSERT(false);
2706 }
2707
2708 DBUG_ASSERT((uint) lock_type < array_elements(table_lock_operation_map));
2709
2710 uint flags;
2711 ulonglong timer_start= 0;
2712
2713 if (flag_thread_instrumentation)
2714 {
2715 if (pfs_thread == NULL)
2716 return NULL;
2717 if (! pfs_thread->m_enabled)
2718 return NULL;
2719 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2720 flags= STATE_FLAG_THREAD;
2721
2722 if (pfs_table->m_lock_timed)
2723 {
2724 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2725 state->m_timer_start= timer_start;
2726 flags|= STATE_FLAG_TIMED;
2727 }
2728
2729 if (flag_events_waits_current)
2730 {
2731 if (unlikely(pfs_thread->m_events_waits_current >=
2732 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2733 {
2734 locker_lost++;
2735 return NULL;
2736 }
2737 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2738 state->m_wait= wait;
2739 flags|= STATE_FLAG_EVENT;
2740
2741 PFS_events_waits *parent_event= wait - 1;
2742 wait->m_event_type= EVENT_TYPE_WAIT;
2743 wait->m_nesting_event_id= parent_event->m_event_id;
2744 wait->m_nesting_event_type= parent_event->m_event_type;
2745
2746 PFS_table_share *share= pfs_table->m_share;
2747 wait->m_thread= pfs_thread;
2748 wait->m_class= &global_table_lock_class;
2749 wait->m_timer_start= timer_start;
2750 wait->m_timer_end= 0;
2751 wait->m_object_instance_addr= pfs_table->m_identity;
2752 wait->m_event_id= pfs_thread->m_event_id++;
2753 wait->m_end_event_id= 0;
2754 wait->m_operation= table_lock_operation_map[lock_type];
2755 wait->m_flags= 0;
2756 wait->m_object_type= share->get_object_type();
2757 wait->m_weak_table_share= share;
2758 wait->m_weak_version= share->get_version();
2759 wait->m_index= 0;
2760 wait->m_source_file= src_file;
2761 wait->m_source_line= src_line;
2762 wait->m_wait_class= WAIT_CLASS_TABLE;
2763
2764 pfs_thread->m_events_waits_current++;
2765 }
2766 }
2767 else
2768 {
2769 if (pfs_table->m_lock_timed)
2770 {
2771 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2772 state->m_timer_start= timer_start;
2773 flags= STATE_FLAG_TIMED;
2774 }
2775 else
2776 {
2777 /* TODO: consider a shortcut here */
2778 flags= 0;
2779 }
2780 }
2781
2782 state->m_flags= flags;
2783 state->m_table= table;
2784 state->m_index= lock_type;
2785 return reinterpret_cast<PSI_table_locker*> (state);
2786 }
2787
2788 /**
2789 Implementation of the file instrumentation interface.
2790 @sa PSI_v1::get_thread_file_name_locker.
2791 */
2792 static PSI_file_locker*
get_thread_file_name_locker_v1(PSI_file_locker_state * state,PSI_file_key key,PSI_file_operation op,const char * name,const void * identity)2793 get_thread_file_name_locker_v1(PSI_file_locker_state *state,
2794 PSI_file_key key,
2795 PSI_file_operation op,
2796 const char *name, const void *identity)
2797 {
2798 DBUG_ASSERT(static_cast<int> (op) >= 0);
2799 DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
2800 DBUG_ASSERT(state != NULL);
2801
2802 if (! flag_global_instrumentation)
2803 return NULL;
2804 PFS_file_class *klass= find_file_class(key);
2805 if (unlikely(klass == NULL))
2806 return NULL;
2807 if (! klass->m_enabled)
2808 return NULL;
2809
2810 /* Needed for the LF_HASH */
2811 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2812 if (unlikely(pfs_thread == NULL))
2813 return NULL;
2814
2815 if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
2816 return NULL;
2817
2818 uint flags;
2819
2820 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2821 flags= STATE_FLAG_THREAD;
2822
2823 if (klass->m_timed)
2824 flags|= STATE_FLAG_TIMED;
2825
2826 if (flag_events_waits_current)
2827 {
2828 if (unlikely(pfs_thread->m_events_waits_current >=
2829 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2830 {
2831 locker_lost++;
2832 return NULL;
2833 }
2834 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2835 state->m_wait= wait;
2836 flags|= STATE_FLAG_EVENT;
2837
2838 PFS_events_waits *parent_event= wait - 1;
2839 wait->m_event_type= EVENT_TYPE_WAIT;
2840 wait->m_nesting_event_id= parent_event->m_event_id;
2841 wait->m_nesting_event_type= parent_event->m_event_type;
2842
2843 wait->m_thread= pfs_thread;
2844 wait->m_class= klass;
2845 wait->m_timer_start= 0;
2846 wait->m_timer_end= 0;
2847 wait->m_object_instance_addr= NULL;
2848 wait->m_weak_file= NULL;
2849 wait->m_weak_version= 0;
2850 wait->m_event_id= pfs_thread->m_event_id++;
2851 wait->m_end_event_id= 0;
2852 wait->m_operation= file_operation_map[static_cast<int> (op)];
2853 wait->m_wait_class= WAIT_CLASS_FILE;
2854
2855 pfs_thread->m_events_waits_current++;
2856 }
2857
2858 state->m_flags= flags;
2859 state->m_file= NULL;
2860 state->m_name= name;
2861 state->m_class= klass;
2862 state->m_operation= op;
2863 return reinterpret_cast<PSI_file_locker*> (state);
2864 }
2865
2866 /**
2867 Implementation of the file instrumentation interface.
2868 @sa PSI_v1::get_thread_file_stream_locker.
2869 */
2870 static PSI_file_locker*
get_thread_file_stream_locker_v1(PSI_file_locker_state * state,PSI_file * file,PSI_file_operation op)2871 get_thread_file_stream_locker_v1(PSI_file_locker_state *state,
2872 PSI_file *file, PSI_file_operation op)
2873 {
2874 PFS_file *pfs_file= reinterpret_cast<PFS_file*> (file);
2875 DBUG_ASSERT(static_cast<int> (op) >= 0);
2876 DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
2877 DBUG_ASSERT(state != NULL);
2878
2879 if (unlikely(pfs_file == NULL))
2880 return NULL;
2881 DBUG_ASSERT(pfs_file->m_class != NULL);
2882 PFS_file_class *klass= pfs_file->m_class;
2883
2884 if (! pfs_file->m_enabled)
2885 return NULL;
2886
2887 uint flags;
2888
2889 if (flag_thread_instrumentation)
2890 {
2891 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2892 if (unlikely(pfs_thread == NULL))
2893 return NULL;
2894 if (! pfs_thread->m_enabled)
2895 return NULL;
2896 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2897 flags= STATE_FLAG_THREAD;
2898
2899 if (pfs_file->m_timed)
2900 flags|= STATE_FLAG_TIMED;
2901
2902 if (flag_events_waits_current)
2903 {
2904 if (unlikely(pfs_thread->m_events_waits_current >=
2905 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2906 {
2907 locker_lost++;
2908 return NULL;
2909 }
2910 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2911 state->m_wait= wait;
2912 flags|= STATE_FLAG_EVENT;
2913
2914 PFS_events_waits *parent_event= wait - 1;
2915 wait->m_event_type= EVENT_TYPE_WAIT;
2916 wait->m_nesting_event_id= parent_event->m_event_id;
2917 wait->m_nesting_event_type= parent_event->m_event_type;
2918
2919 wait->m_thread= pfs_thread;
2920 wait->m_class= klass;
2921 wait->m_timer_start= 0;
2922 wait->m_timer_end= 0;
2923 wait->m_object_instance_addr= pfs_file;
2924 wait->m_weak_file= pfs_file;
2925 wait->m_weak_version= pfs_file->get_version();
2926 wait->m_event_id= pfs_thread->m_event_id++;
2927 wait->m_end_event_id= 0;
2928 wait->m_operation= file_operation_map[static_cast<int> (op)];
2929 wait->m_wait_class= WAIT_CLASS_FILE;
2930
2931 pfs_thread->m_events_waits_current++;
2932 }
2933 }
2934 else
2935 {
2936 state->m_thread= NULL;
2937 if (pfs_file->m_timed)
2938 {
2939 flags= STATE_FLAG_TIMED;
2940 }
2941 else
2942 {
2943 /* TODO: consider a shortcut. */
2944 flags= 0;
2945 }
2946 }
2947
2948 state->m_flags= flags;
2949 state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
2950 state->m_operation= op;
2951 state->m_name= NULL;
2952 state->m_class= klass;
2953 return reinterpret_cast<PSI_file_locker*> (state);
2954 }
2955
2956 /**
2957 Implementation of the file instrumentation interface.
2958 @sa PSI_v1::get_thread_file_descriptor_locker.
2959 */
2960 static PSI_file_locker*
get_thread_file_descriptor_locker_v1(PSI_file_locker_state * state,File file,PSI_file_operation op)2961 get_thread_file_descriptor_locker_v1(PSI_file_locker_state *state,
2962 File file, PSI_file_operation op)
2963 {
2964 int index= static_cast<int> (file);
2965 DBUG_ASSERT(static_cast<int> (op) >= 0);
2966 DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
2967 DBUG_ASSERT(state != NULL);
2968
2969 if (unlikely((index < 0) || (index >= file_handle_max)))
2970 return NULL;
2971
2972 PFS_file *pfs_file= file_handle_array[index];
2973 if (unlikely(pfs_file == NULL))
2974 return NULL;
2975
2976 /*
2977 We are about to close a file by descriptor number,
2978 and the calling code still holds the descriptor.
2979 Cleanup the file descriptor <--> file instrument association.
2980 Remove the instrumentation *before* the close to avoid race
2981 conditions with another thread opening a file
2982 (that could be given the same descriptor).
2983 */
2984 if (op == PSI_FILE_CLOSE)
2985 file_handle_array[index]= NULL;
2986
2987 if (! pfs_file->m_enabled)
2988 return NULL;
2989
2990 DBUG_ASSERT(pfs_file->m_class != NULL);
2991 PFS_file_class *klass= pfs_file->m_class;
2992
2993 uint flags;
2994
2995 if (flag_thread_instrumentation)
2996 {
2997 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2998 if (unlikely(pfs_thread == NULL))
2999 return NULL;
3000 if (! pfs_thread->m_enabled)
3001 return NULL;
3002 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
3003 flags= STATE_FLAG_THREAD;
3004
3005 if (pfs_file->m_timed)
3006 flags|= STATE_FLAG_TIMED;
3007
3008 if (flag_events_waits_current)
3009 {
3010 if (unlikely(pfs_thread->m_events_waits_current >=
3011 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
3012 {
3013 locker_lost++;
3014 return NULL;
3015 }
3016 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
3017 state->m_wait= wait;
3018 flags|= STATE_FLAG_EVENT;
3019
3020 PFS_events_waits *parent_event= wait - 1;
3021 wait->m_event_type= EVENT_TYPE_WAIT;
3022 wait->m_nesting_event_id= parent_event->m_event_id;
3023 wait->m_nesting_event_type= parent_event->m_event_type;
3024
3025 wait->m_thread= pfs_thread;
3026 wait->m_class= klass;
3027 wait->m_timer_start= 0;
3028 wait->m_timer_end= 0;
3029 wait->m_object_instance_addr= pfs_file;
3030 wait->m_weak_file= pfs_file;
3031 wait->m_weak_version= pfs_file->get_version();
3032 wait->m_event_id= pfs_thread->m_event_id++;
3033 wait->m_end_event_id= 0;
3034 wait->m_operation= file_operation_map[static_cast<int> (op)];
3035 wait->m_wait_class= WAIT_CLASS_FILE;
3036
3037 pfs_thread->m_events_waits_current++;
3038 }
3039 }
3040 else
3041 {
3042 state->m_thread= NULL;
3043 if (pfs_file->m_timed)
3044 {
3045 flags= STATE_FLAG_TIMED;
3046 }
3047 else
3048 {
3049 /* TODO: consider a shortcut. */
3050 flags= 0;
3051 }
3052 }
3053
3054 state->m_flags= flags;
3055 state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
3056 state->m_operation= op;
3057 state->m_name= NULL;
3058 state->m_class= klass;
3059 return reinterpret_cast<PSI_file_locker*> (state);
3060 }
3061
3062 /** Socket locker */
3063
3064 static PSI_socket_locker*
start_socket_wait_v1(PSI_socket_locker_state * state,PSI_socket * socket,PSI_socket_operation op,size_t count,const char * src_file,uint src_line)3065 start_socket_wait_v1(PSI_socket_locker_state *state,
3066 PSI_socket *socket,
3067 PSI_socket_operation op,
3068 size_t count,
3069 const char *src_file, uint src_line)
3070 {
3071 DBUG_ASSERT(static_cast<int> (op) >= 0);
3072 DBUG_ASSERT(static_cast<uint> (op) < array_elements(socket_operation_map));
3073 DBUG_ASSERT(state != NULL);
3074 PFS_socket *pfs_socket= reinterpret_cast<PFS_socket*> (socket);
3075
3076 DBUG_ASSERT(pfs_socket != NULL);
3077 DBUG_ASSERT(pfs_socket->m_class != NULL);
3078
3079 if (!pfs_socket->m_enabled || pfs_socket->m_idle)
3080 return NULL;
3081
3082 uint flags= 0;
3083 ulonglong timer_start= 0;
3084
3085 if (flag_thread_instrumentation)
3086 {
3087 /*
3088 Do not use pfs_socket->m_thread_owner here,
3089 as different threads may use concurrently the same socket,
3090 for example during a KILL.
3091 */
3092 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
3093
3094 if (unlikely(pfs_thread == NULL))
3095 return NULL;
3096
3097 if (!pfs_thread->m_enabled)
3098 return NULL;
3099
3100 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
3101 flags= STATE_FLAG_THREAD;
3102
3103 if (pfs_socket->m_timed)
3104 {
3105 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
3106 state->m_timer_start= timer_start;
3107 flags|= STATE_FLAG_TIMED;
3108 }
3109
3110 if (flag_events_waits_current)
3111 {
3112 if (unlikely(pfs_thread->m_events_waits_current >=
3113 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
3114 {
3115 locker_lost++;
3116 return NULL;
3117 }
3118 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
3119 state->m_wait= wait;
3120 flags|= STATE_FLAG_EVENT;
3121
3122 PFS_events_waits *parent_event= wait - 1;
3123 wait->m_event_type= EVENT_TYPE_WAIT;
3124 wait->m_nesting_event_id= parent_event->m_event_id;
3125 wait->m_nesting_event_type= parent_event->m_event_type;
3126 wait->m_thread= pfs_thread;
3127 wait->m_class= pfs_socket->m_class;
3128 wait->m_timer_start= timer_start;
3129 wait->m_timer_end= 0;
3130 wait->m_object_instance_addr= pfs_socket->m_identity;
3131 wait->m_weak_socket= pfs_socket;
3132 wait->m_weak_version= pfs_socket->get_version();
3133 wait->m_event_id= pfs_thread->m_event_id++;
3134 wait->m_end_event_id= 0;
3135 wait->m_operation= socket_operation_map[static_cast<int>(op)];
3136 wait->m_source_file= src_file;
3137 wait->m_source_line= src_line;
3138 wait->m_number_of_bytes= count;
3139 wait->m_wait_class= WAIT_CLASS_SOCKET;
3140
3141 pfs_thread->m_events_waits_current++;
3142 }
3143 }
3144 else
3145 {
3146 if (pfs_socket->m_timed)
3147 {
3148 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
3149 state->m_timer_start= timer_start;
3150 flags= STATE_FLAG_TIMED;
3151 }
3152 else
3153 {
3154 /*
3155 Even if timing is disabled, end_socket_wait() still needs a locker to
3156 capture the number of bytes sent or received by the socket operation.
3157 For operations that do not have a byte count, then just increment the
3158 event counter and return a NULL locker.
3159 */
3160 switch (op)
3161 {
3162 case PSI_SOCKET_CONNECT:
3163 case PSI_SOCKET_CREATE:
3164 case PSI_SOCKET_BIND:
3165 case PSI_SOCKET_SEEK:
3166 case PSI_SOCKET_OPT:
3167 case PSI_SOCKET_STAT:
3168 case PSI_SOCKET_SHUTDOWN:
3169 case PSI_SOCKET_CLOSE:
3170 case PSI_SOCKET_SELECT:
3171 pfs_socket->m_socket_stat.m_io_stat.m_misc.aggregate_counted();
3172 return NULL;
3173 default:
3174 break;
3175 }
3176 }
3177 }
3178
3179 state->m_flags= flags;
3180 state->m_socket= socket;
3181 state->m_operation= op;
3182 return reinterpret_cast<PSI_socket_locker*> (state);
3183 }
3184
3185 /**
3186 Implementation of the mutex instrumentation interface.
3187 @sa PSI_v1::unlock_mutex.
3188 */
unlock_mutex_v1(PSI_mutex * mutex)3189 static void unlock_mutex_v1(PSI_mutex *mutex)
3190 {
3191 PFS_mutex *pfs_mutex= reinterpret_cast<PFS_mutex*> (mutex);
3192
3193 DBUG_ASSERT(pfs_mutex != NULL);
3194
3195 /*
3196 Note that this code is still protected by the instrumented mutex,
3197 and therefore is thread safe. See inline_mysql_mutex_unlock().
3198 */
3199
3200 /* Always update the instrumented state */
3201 pfs_mutex->m_owner= NULL;
3202 pfs_mutex->m_last_locked= 0;
3203
3204 #ifdef LATER_WL2333
3205 /*
3206 See WL#2333: SHOW ENGINE ... LOCK STATUS.
3207 PFS_mutex::m_lock_stat is not exposed in user visible tables
3208 currently, so there is no point spending time computing it.
3209 */
3210 if (! pfs_mutex->m_enabled)
3211 return;
3212
3213 if (! pfs_mutex->m_timed)
3214 return;
3215
3216 ulonglong locked_time;
3217 locked_time= get_timer_pico_value(wait_timer) - pfs_mutex->m_last_locked;
3218 pfs_mutex->m_mutex_stat.m_lock_stat.aggregate_value(locked_time);
3219 #endif
3220 }
3221
3222 /**
3223 Implementation of the rwlock instrumentation interface.
3224 @sa PSI_v1::unlock_rwlock.
3225 */
unlock_rwlock_v1(PSI_rwlock * rwlock)3226 static void unlock_rwlock_v1(PSI_rwlock *rwlock)
3227 {
3228 PFS_rwlock *pfs_rwlock= reinterpret_cast<PFS_rwlock*> (rwlock);
3229 DBUG_ASSERT(pfs_rwlock != NULL);
3230 DBUG_ASSERT(pfs_rwlock == sanitize_rwlock(pfs_rwlock));
3231 DBUG_ASSERT(pfs_rwlock->m_class != NULL);
3232 DBUG_ASSERT(pfs_rwlock->m_lock.is_populated());
3233
3234 bool last_writer= false;
3235 bool last_reader= false;
3236
3237 /*
3238 Note that this code is still protected by the instrumented rwlock,
3239 and therefore is:
3240 - thread safe for write locks
3241 - almost thread safe for read locks (pfs_rwlock->m_readers is unsafe).
3242 See inline_mysql_rwlock_unlock()
3243 */
3244
3245 /* Always update the instrumented state */
3246 if (pfs_rwlock->m_writer != NULL)
3247 {
3248 /* Nominal case, a writer is unlocking. */
3249 last_writer= true;
3250 pfs_rwlock->m_writer= NULL;
3251 /* Reset the readers stats, they could be off */
3252 pfs_rwlock->m_readers= 0;
3253 }
3254 else if (likely(pfs_rwlock->m_readers > 0))
3255 {
3256 /* Nominal case, a reader is unlocking. */
3257 if (--(pfs_rwlock->m_readers) == 0)
3258 last_reader= true;
3259 }
3260 else
3261 {
3262 /*
3263 Edge case, we have no writer and no readers,
3264 on an unlock event.
3265 This is possible for:
3266 - partial instrumentation
3267 - instrumentation disabled at runtime,
3268 see when get_thread_rwlock_locker_v1() returns NULL
3269 No further action is taken here, the next
3270 write lock will put the statistics is a valid state.
3271 */
3272 }
3273
3274 #ifdef LATER_WL2333
3275 /* See WL#2333: SHOW ENGINE ... LOCK STATUS. */
3276
3277 if (! pfs_rwlock->m_enabled)
3278 return;
3279
3280 if (! pfs_rwlock->m_timed)
3281 return;
3282
3283 ulonglong locked_time;
3284 if (last_writer)
3285 {
3286 locked_time= get_timer_pico_value(wait_timer) - pfs_rwlock->m_last_written;
3287 pfs_rwlock->m_rwlock_stat.m_write_lock_stat.aggregate_value(locked_time);
3288 }
3289 else if (last_reader)
3290 {
3291 locked_time= get_timer_pico_value(wait_timer) - pfs_rwlock->m_last_read;
3292 pfs_rwlock->m_rwlock_stat.m_read_lock_stat.aggregate_value(locked_time);
3293 }
3294 #else
3295 (void) last_reader;
3296 (void) last_writer;
3297 #endif
3298 }
3299
3300 /**
3301 Implementation of the cond instrumentation interface.
3302 @sa PSI_v1::signal_cond.
3303 */
signal_cond_v1(PSI_cond * cond)3304 static void signal_cond_v1(PSI_cond* cond)
3305 {
3306 PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
3307
3308 DBUG_ASSERT(pfs_cond != NULL);
3309
3310 pfs_cond->m_cond_stat.m_signal_count++;
3311 }
3312
3313 /**
3314 Implementation of the cond instrumentation interface.
3315 @sa PSI_v1::broadcast_cond.
3316 */
broadcast_cond_v1(PSI_cond * cond)3317 static void broadcast_cond_v1(PSI_cond* cond)
3318 {
3319 PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
3320
3321 DBUG_ASSERT(pfs_cond != NULL);
3322
3323 pfs_cond->m_cond_stat.m_broadcast_count++;
3324 }
3325
3326 /**
3327 Implementation of the idle instrumentation interface.
3328 @sa PSI_v1::start_idle_wait.
3329 */
3330 static PSI_idle_locker*
start_idle_wait_v1(PSI_idle_locker_state * state,const char * src_file,uint src_line)3331 start_idle_wait_v1(PSI_idle_locker_state* state, const char *src_file, uint src_line)
3332 {
3333 DBUG_ASSERT(state != NULL);
3334
3335 if (!flag_global_instrumentation)
3336 return NULL;
3337
3338 if (!global_idle_class.m_enabled)
3339 return NULL;
3340
3341 uint flags= 0;
3342 ulonglong timer_start= 0;
3343
3344 if (flag_thread_instrumentation)
3345 {
3346 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
3347 if (unlikely(pfs_thread == NULL))
3348 return NULL;
3349 if (!pfs_thread->m_enabled)
3350 return NULL;
3351 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
3352 flags= STATE_FLAG_THREAD;
3353
3354 DBUG_ASSERT(pfs_thread->m_events_statements_count == 0);
3355
3356 if (global_idle_class.m_timed)
3357 {
3358 timer_start= get_timer_raw_value_and_function(idle_timer, &state->m_timer);
3359 state->m_timer_start= timer_start;
3360 flags|= STATE_FLAG_TIMED;
3361 }
3362
3363 if (flag_events_waits_current)
3364 {
3365 if (unlikely(pfs_thread->m_events_waits_current >=
3366 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
3367 {
3368 locker_lost++;
3369 return NULL;
3370 }
3371 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
3372 state->m_wait= wait;
3373 flags|= STATE_FLAG_EVENT;
3374
3375 wait->m_event_type= EVENT_TYPE_WAIT;
3376 /*
3377 IDLE events are waits, but by definition we know that
3378 such waits happen outside of any STAGE and STATEMENT,
3379 so they have no parents.
3380 */
3381 wait->m_nesting_event_id= 0;
3382 /* no need to set wait->m_nesting_event_type */
3383
3384 wait->m_thread= pfs_thread;
3385 wait->m_class= &global_idle_class;
3386 wait->m_timer_start= timer_start;
3387 wait->m_timer_end= 0;
3388 wait->m_event_id= pfs_thread->m_event_id++;
3389 wait->m_end_event_id= 0;
3390 wait->m_operation= OPERATION_TYPE_IDLE;
3391 wait->m_source_file= src_file;
3392 wait->m_source_line= src_line;
3393 wait->m_wait_class= WAIT_CLASS_IDLE;
3394
3395 pfs_thread->m_events_waits_current++;
3396 }
3397 }
3398 else
3399 {
3400 if (global_idle_class.m_timed)
3401 {
3402 timer_start= get_timer_raw_value_and_function(idle_timer, &state->m_timer);
3403 state->m_timer_start= timer_start;
3404 flags= STATE_FLAG_TIMED;
3405 }
3406 }
3407
3408 state->m_flags= flags;
3409 return reinterpret_cast<PSI_idle_locker*> (state);
3410 }
3411
3412 /**
3413 Implementation of the mutex instrumentation interface.
3414 @sa PSI_v1::end_idle_wait.
3415 */
end_idle_wait_v1(PSI_idle_locker * locker)3416 static void end_idle_wait_v1(PSI_idle_locker* locker)
3417 {
3418 PSI_idle_locker_state *state= reinterpret_cast<PSI_idle_locker_state*> (locker);
3419 DBUG_ASSERT(state != NULL);
3420 ulonglong timer_end= 0;
3421 ulonglong wait_time= 0;
3422
3423 uint flags= state->m_flags;
3424
3425 if (flags & STATE_FLAG_TIMED)
3426 {
3427 timer_end= state->m_timer();
3428 wait_time= timer_end - state->m_timer_start;
3429 }
3430
3431 if (flags & STATE_FLAG_THREAD)
3432 {
3433 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3434 PFS_single_stat *event_name_array;
3435 event_name_array= thread->m_instr_class_waits_stats;
3436
3437 if (flags & STATE_FLAG_TIMED)
3438 {
3439 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3440 event_name_array[GLOBAL_IDLE_EVENT_INDEX].aggregate_value(wait_time);
3441 }
3442 else
3443 {
3444 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3445 event_name_array[GLOBAL_IDLE_EVENT_INDEX].aggregate_counted();
3446 }
3447
3448 if (flags & STATE_FLAG_EVENT)
3449 {
3450 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3451 DBUG_ASSERT(wait != NULL);
3452
3453 wait->m_timer_end= timer_end;
3454 wait->m_end_event_id= thread->m_event_id;
3455 if (flag_events_waits_history)
3456 insert_events_waits_history(thread, wait);
3457 if (flag_events_waits_history_long)
3458 insert_events_waits_history_long(wait);
3459 thread->m_events_waits_current--;
3460
3461 DBUG_ASSERT(wait == thread->m_events_waits_current);
3462 }
3463 }
3464
3465 if (flags & STATE_FLAG_TIMED)
3466 {
3467 /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME (timed) */
3468 global_idle_stat.aggregate_value(wait_time);
3469 }
3470 else
3471 {
3472 /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME (counted) */
3473 global_idle_stat.aggregate_counted();
3474 }
3475 }
3476
3477 /**
3478 Implementation of the mutex instrumentation interface.
3479 @sa PSI_v1::end_mutex_wait.
3480 */
end_mutex_wait_v1(PSI_mutex_locker * locker,int rc)3481 static void end_mutex_wait_v1(PSI_mutex_locker* locker, int rc)
3482 {
3483 PSI_mutex_locker_state *state= reinterpret_cast<PSI_mutex_locker_state*> (locker);
3484 DBUG_ASSERT(state != NULL);
3485
3486 ulonglong timer_end= 0;
3487 ulonglong wait_time= 0;
3488
3489 PFS_mutex *mutex= reinterpret_cast<PFS_mutex *> (state->m_mutex);
3490 DBUG_ASSERT(mutex != NULL);
3491 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3492
3493 uint flags= state->m_flags;
3494
3495 if (flags & STATE_FLAG_TIMED)
3496 {
3497 timer_end= state->m_timer();
3498 wait_time= timer_end - state->m_timer_start;
3499 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3500 mutex->m_mutex_stat.m_wait_stat.aggregate_value(wait_time);
3501 }
3502 else
3503 {
3504 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3505 mutex->m_mutex_stat.m_wait_stat.aggregate_counted();
3506 }
3507
3508 if (likely(rc == 0))
3509 {
3510 mutex->m_owner= thread;
3511 mutex->m_last_locked= timer_end;
3512 }
3513
3514 if (flags & STATE_FLAG_THREAD)
3515 {
3516 PFS_single_stat *event_name_array;
3517 event_name_array= thread->m_instr_class_waits_stats;
3518 uint index= mutex->m_class->m_event_name_index;
3519
3520 if (flags & STATE_FLAG_TIMED)
3521 {
3522 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3523 event_name_array[index].aggregate_value(wait_time);
3524 }
3525 else
3526 {
3527 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3528 event_name_array[index].aggregate_counted();
3529 }
3530
3531 if (flags & STATE_FLAG_EVENT)
3532 {
3533 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3534 DBUG_ASSERT(wait != NULL);
3535
3536 wait->m_timer_end= timer_end;
3537 wait->m_end_event_id= thread->m_event_id;
3538 if (flag_events_waits_history)
3539 insert_events_waits_history(thread, wait);
3540 if (flag_events_waits_history_long)
3541 insert_events_waits_history_long(wait);
3542 thread->m_events_waits_current--;
3543
3544 DBUG_ASSERT(wait == thread->m_events_waits_current);
3545 }
3546 }
3547 }
3548
3549 /**
3550 Implementation of the rwlock instrumentation interface.
3551 @sa PSI_v1::end_rwlock_rdwait.
3552 */
end_rwlock_rdwait_v1(PSI_rwlock_locker * locker,int rc)3553 static void end_rwlock_rdwait_v1(PSI_rwlock_locker* locker, int rc)
3554 {
3555 PSI_rwlock_locker_state *state= reinterpret_cast<PSI_rwlock_locker_state*> (locker);
3556 DBUG_ASSERT(state != NULL);
3557
3558 ulonglong timer_end= 0;
3559 ulonglong wait_time= 0;
3560
3561 PFS_rwlock *rwlock= reinterpret_cast<PFS_rwlock *> (state->m_rwlock);
3562 DBUG_ASSERT(rwlock != NULL);
3563
3564 if (state->m_flags & STATE_FLAG_TIMED)
3565 {
3566 timer_end= state->m_timer();
3567 wait_time= timer_end - state->m_timer_start;
3568 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3569 rwlock->m_rwlock_stat.m_wait_stat.aggregate_value(wait_time);
3570 }
3571 else
3572 {
3573 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3574 rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
3575 }
3576
3577 if (rc == 0)
3578 {
3579 /*
3580 Warning:
3581 Multiple threads can execute this section concurrently
3582 (since multiple readers can execute in parallel).
3583 The statistics generated are not safe, which is why they are
3584 just statistics, not facts.
3585 */
3586 if (rwlock->m_readers == 0)
3587 rwlock->m_last_read= timer_end;
3588 rwlock->m_writer= NULL;
3589 rwlock->m_readers++;
3590 }
3591
3592 if (state->m_flags & STATE_FLAG_THREAD)
3593 {
3594 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3595 DBUG_ASSERT(thread != NULL);
3596
3597 PFS_single_stat *event_name_array;
3598 event_name_array= thread->m_instr_class_waits_stats;
3599 uint index= rwlock->m_class->m_event_name_index;
3600
3601 if (state->m_flags & STATE_FLAG_TIMED)
3602 {
3603 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3604 event_name_array[index].aggregate_value(wait_time);
3605 }
3606 else
3607 {
3608 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3609 event_name_array[index].aggregate_counted();
3610 }
3611
3612 if (state->m_flags & STATE_FLAG_EVENT)
3613 {
3614 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3615 DBUG_ASSERT(wait != NULL);
3616
3617 wait->m_timer_end= timer_end;
3618 wait->m_end_event_id= thread->m_event_id;
3619 if (flag_events_waits_history)
3620 insert_events_waits_history(thread, wait);
3621 if (flag_events_waits_history_long)
3622 insert_events_waits_history_long(wait);
3623 thread->m_events_waits_current--;
3624
3625 DBUG_ASSERT(wait == thread->m_events_waits_current);
3626 }
3627 }
3628 }
3629
3630 /**
3631 Implementation of the rwlock instrumentation interface.
3632 @sa PSI_v1::end_rwlock_wrwait.
3633 */
end_rwlock_wrwait_v1(PSI_rwlock_locker * locker,int rc)3634 static void end_rwlock_wrwait_v1(PSI_rwlock_locker* locker, int rc)
3635 {
3636 PSI_rwlock_locker_state *state= reinterpret_cast<PSI_rwlock_locker_state*> (locker);
3637 DBUG_ASSERT(state != NULL);
3638
3639 ulonglong timer_end= 0;
3640 ulonglong wait_time= 0;
3641
3642 PFS_rwlock *rwlock= reinterpret_cast<PFS_rwlock *> (state->m_rwlock);
3643 DBUG_ASSERT(rwlock != NULL);
3644 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3645
3646 if (state->m_flags & STATE_FLAG_TIMED)
3647 {
3648 timer_end= state->m_timer();
3649 wait_time= timer_end - state->m_timer_start;
3650 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3651 rwlock->m_rwlock_stat.m_wait_stat.aggregate_value(wait_time);
3652 }
3653 else
3654 {
3655 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3656 rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
3657 }
3658
3659 if (likely(rc == 0))
3660 {
3661 /* Thread safe : we are protected by the instrumented rwlock */
3662 rwlock->m_writer= thread;
3663 rwlock->m_last_written= timer_end;
3664 /* Reset the readers stats, they could be off */
3665 rwlock->m_readers= 0;
3666 rwlock->m_last_read= 0;
3667 }
3668
3669 if (state->m_flags & STATE_FLAG_THREAD)
3670 {
3671 PFS_single_stat *event_name_array;
3672 event_name_array= thread->m_instr_class_waits_stats;
3673 uint index= rwlock->m_class->m_event_name_index;
3674
3675 if (state->m_flags & STATE_FLAG_TIMED)
3676 {
3677 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3678 event_name_array[index].aggregate_value(wait_time);
3679 }
3680 else
3681 {
3682 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3683 event_name_array[index].aggregate_counted();
3684 }
3685
3686 if (state->m_flags & STATE_FLAG_EVENT)
3687 {
3688 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3689 DBUG_ASSERT(wait != NULL);
3690
3691 wait->m_timer_end= timer_end;
3692 wait->m_end_event_id= thread->m_event_id;
3693 if (flag_events_waits_history)
3694 insert_events_waits_history(thread, wait);
3695 if (flag_events_waits_history_long)
3696 insert_events_waits_history_long(wait);
3697 thread->m_events_waits_current--;
3698
3699 DBUG_ASSERT(wait == thread->m_events_waits_current);
3700 }
3701 }
3702 }
3703
3704 /**
3705 Implementation of the cond instrumentation interface.
3706 @sa PSI_v1::end_cond_wait.
3707 */
end_cond_wait_v1(PSI_cond_locker * locker,int rc)3708 static void end_cond_wait_v1(PSI_cond_locker* locker, int rc)
3709 {
3710 PSI_cond_locker_state *state= reinterpret_cast<PSI_cond_locker_state*> (locker);
3711 DBUG_ASSERT(state != NULL);
3712
3713 ulonglong timer_end= 0;
3714 ulonglong wait_time= 0;
3715
3716 PFS_cond *cond= reinterpret_cast<PFS_cond *> (state->m_cond);
3717 /* PFS_mutex *mutex= reinterpret_cast<PFS_mutex *> (state->m_mutex); */
3718
3719 if (state->m_flags & STATE_FLAG_TIMED)
3720 {
3721 timer_end= state->m_timer();
3722 wait_time= timer_end - state->m_timer_start;
3723 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3724 cond->m_cond_stat.m_wait_stat.aggregate_value(wait_time);
3725 }
3726 else
3727 {
3728 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3729 cond->m_cond_stat.m_wait_stat.aggregate_counted();
3730 }
3731
3732 if (state->m_flags & STATE_FLAG_THREAD)
3733 {
3734 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3735 DBUG_ASSERT(thread != NULL);
3736
3737 PFS_single_stat *event_name_array;
3738 event_name_array= thread->m_instr_class_waits_stats;
3739 uint index= cond->m_class->m_event_name_index;
3740
3741 if (state->m_flags & STATE_FLAG_TIMED)
3742 {
3743 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3744 event_name_array[index].aggregate_value(wait_time);
3745 }
3746 else
3747 {
3748 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3749 event_name_array[index].aggregate_counted();
3750 }
3751
3752 if (state->m_flags & STATE_FLAG_EVENT)
3753 {
3754 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3755 DBUG_ASSERT(wait != NULL);
3756
3757 wait->m_timer_end= timer_end;
3758 wait->m_end_event_id= thread->m_event_id;
3759 if (flag_events_waits_history)
3760 insert_events_waits_history(thread, wait);
3761 if (flag_events_waits_history_long)
3762 insert_events_waits_history_long(wait);
3763 thread->m_events_waits_current--;
3764
3765 DBUG_ASSERT(wait == thread->m_events_waits_current);
3766 }
3767 }
3768 }
3769
3770 /**
3771 Implementation of the table instrumentation interface.
3772 @sa PSI_v1::end_table_io_wait.
3773 */
end_table_io_wait_v1(PSI_table_locker * locker)3774 static void end_table_io_wait_v1(PSI_table_locker* locker)
3775 {
3776 PSI_table_locker_state *state= reinterpret_cast<PSI_table_locker_state*> (locker);
3777 DBUG_ASSERT(state != NULL);
3778
3779 ulonglong timer_end= 0;
3780 ulonglong wait_time= 0;
3781
3782 PFS_table *table= reinterpret_cast<PFS_table *> (state->m_table);
3783 DBUG_ASSERT(table != NULL);
3784
3785 PFS_single_stat *stat;
3786 PFS_table_io_stat *table_io_stat;
3787
3788 DBUG_ASSERT((state->m_index < table->m_share->m_key_count) ||
3789 (state->m_index == MAX_INDEXES));
3790
3791 table_io_stat= & table->m_table_stat.m_index_stat[state->m_index];
3792 table_io_stat->m_has_data= true;
3793
3794 switch (state->m_io_operation)
3795 {
3796 case PSI_TABLE_FETCH_ROW:
3797 stat= & table_io_stat->m_fetch;
3798 break;
3799 case PSI_TABLE_WRITE_ROW:
3800 stat= & table_io_stat->m_insert;
3801 break;
3802 case PSI_TABLE_UPDATE_ROW:
3803 stat= & table_io_stat->m_update;
3804 break;
3805 case PSI_TABLE_DELETE_ROW:
3806 stat= & table_io_stat->m_delete;
3807 break;
3808 default:
3809 DBUG_ASSERT(false);
3810 stat= NULL;
3811 break;
3812 }
3813
3814 uint flags= state->m_flags;
3815
3816 if (flags & STATE_FLAG_TIMED)
3817 {
3818 timer_end= state->m_timer();
3819 wait_time= timer_end - state->m_timer_start;
3820 stat->aggregate_value(wait_time);
3821 }
3822 else
3823 {
3824 stat->aggregate_counted();
3825 }
3826
3827 if (flags & STATE_FLAG_THREAD)
3828 {
3829 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3830 DBUG_ASSERT(thread != NULL);
3831
3832 PFS_single_stat *event_name_array;
3833 event_name_array= thread->m_instr_class_waits_stats;
3834
3835 /*
3836 Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
3837 (for wait/io/table/sql/handler)
3838 */
3839 if (flags & STATE_FLAG_TIMED)
3840 {
3841 event_name_array[GLOBAL_TABLE_IO_EVENT_INDEX].aggregate_value(wait_time);
3842 }
3843 else
3844 {
3845 event_name_array[GLOBAL_TABLE_IO_EVENT_INDEX].aggregate_counted();
3846 }
3847
3848 if (flags & STATE_FLAG_EVENT)
3849 {
3850 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3851 DBUG_ASSERT(wait != NULL);
3852
3853 wait->m_timer_end= timer_end;
3854 wait->m_end_event_id= thread->m_event_id;
3855 if (flag_events_waits_history)
3856 insert_events_waits_history(thread, wait);
3857 if (flag_events_waits_history_long)
3858 insert_events_waits_history_long(wait);
3859 thread->m_events_waits_current--;
3860
3861 DBUG_ASSERT(wait == thread->m_events_waits_current);
3862 }
3863 }
3864
3865 table->m_has_io_stats= true;
3866 }
3867
3868 /**
3869 Implementation of the table instrumentation interface.
3870 @sa PSI_v1::end_table_lock_wait.
3871 */
end_table_lock_wait_v1(PSI_table_locker * locker)3872 static void end_table_lock_wait_v1(PSI_table_locker* locker)
3873 {
3874 PSI_table_locker_state *state= reinterpret_cast<PSI_table_locker_state*> (locker);
3875 DBUG_ASSERT(state != NULL);
3876
3877 ulonglong timer_end= 0;
3878 ulonglong wait_time= 0;
3879
3880 PFS_table *table= reinterpret_cast<PFS_table *> (state->m_table);
3881 DBUG_ASSERT(table != NULL);
3882
3883 PFS_single_stat *stat= & table->m_table_stat.m_lock_stat.m_stat[state->m_index];
3884
3885 uint flags= state->m_flags;
3886
3887 if (flags & STATE_FLAG_TIMED)
3888 {
3889 timer_end= state->m_timer();
3890 wait_time= timer_end - state->m_timer_start;
3891 stat->aggregate_value(wait_time);
3892 }
3893 else
3894 {
3895 stat->aggregate_counted();
3896 }
3897
3898 if (flags & STATE_FLAG_THREAD)
3899 {
3900 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3901 DBUG_ASSERT(thread != NULL);
3902
3903 PFS_single_stat *event_name_array;
3904 event_name_array= thread->m_instr_class_waits_stats;
3905
3906 /*
3907 Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
3908 (for wait/lock/table/sql/handler)
3909 */
3910 if (flags & STATE_FLAG_TIMED)
3911 {
3912 event_name_array[GLOBAL_TABLE_LOCK_EVENT_INDEX].aggregate_value(wait_time);
3913 }
3914 else
3915 {
3916 event_name_array[GLOBAL_TABLE_LOCK_EVENT_INDEX].aggregate_counted();
3917 }
3918
3919 if (flags & STATE_FLAG_EVENT)
3920 {
3921 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3922 DBUG_ASSERT(wait != NULL);
3923
3924 wait->m_timer_end= timer_end;
3925 wait->m_end_event_id= thread->m_event_id;
3926 if (flag_events_waits_history)
3927 insert_events_waits_history(thread, wait);
3928 if (flag_events_waits_history_long)
3929 insert_events_waits_history_long(wait);
3930 thread->m_events_waits_current--;
3931
3932 DBUG_ASSERT(wait == thread->m_events_waits_current);
3933 }
3934 }
3935
3936 table->m_has_lock_stats= true;
3937 }
3938
3939 static void start_file_wait_v1(PSI_file_locker *locker,
3940 size_t count,
3941 const char *src_file,
3942 uint src_line);
3943
3944 static void end_file_wait_v1(PSI_file_locker *locker,
3945 size_t count);
3946
3947 /**
3948 Implementation of the file instrumentation interface.
3949 @sa PSI_v1::start_file_open_wait.
3950 */
start_file_open_wait_v1(PSI_file_locker * locker,const char * src_file,uint src_line)3951 static void start_file_open_wait_v1(PSI_file_locker *locker,
3952 const char *src_file,
3953 uint src_line)
3954 {
3955 start_file_wait_v1(locker, 0, src_file, src_line);
3956
3957 return;
3958 }
3959
3960 /**
3961 Implementation of the file instrumentation interface.
3962 @sa PSI_v1::end_file_open_wait.
3963 */
end_file_open_wait_v1(PSI_file_locker * locker,void * result)3964 static PSI_file* end_file_open_wait_v1(PSI_file_locker *locker,
3965 void *result)
3966 {
3967 PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
3968 DBUG_ASSERT(state != NULL);
3969
3970 switch (state->m_operation)
3971 {
3972 case PSI_FILE_STAT:
3973 case PSI_FILE_RENAME:
3974 break;
3975 case PSI_FILE_STREAM_OPEN:
3976 case PSI_FILE_CREATE:
3977 case PSI_FILE_OPEN:
3978 if (result != NULL)
3979 {
3980 PFS_file_class *klass= reinterpret_cast<PFS_file_class*> (state->m_class);
3981 PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
3982 const char *name= state->m_name;
3983 uint len= strlen(name);
3984 PFS_file *pfs_file= find_or_create_file(thread, klass, name, len, true);
3985 state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
3986 }
3987 break;
3988 default:
3989 DBUG_ASSERT(false);
3990 break;
3991 }
3992
3993 end_file_wait_v1(locker, 0);
3994
3995 return state->m_file;
3996 }
3997
3998 /**
3999 Implementation of the file instrumentation interface.
4000 @sa PSI_v1::end_file_open_wait_and_bind_to_descriptor.
4001 */
end_file_open_wait_and_bind_to_descriptor_v1(PSI_file_locker * locker,File file)4002 static void end_file_open_wait_and_bind_to_descriptor_v1
4003 (PSI_file_locker *locker, File file)
4004 {
4005 PFS_file *pfs_file= NULL;
4006 int index= (int) file;
4007 PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4008 DBUG_ASSERT(state != NULL);
4009
4010 if (index >= 0)
4011 {
4012 PFS_file_class *klass= reinterpret_cast<PFS_file_class*> (state->m_class);
4013 PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
4014 const char *name= state->m_name;
4015 uint len= strlen(name);
4016 pfs_file= find_or_create_file(thread, klass, name, len, true);
4017 state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
4018 }
4019
4020 end_file_wait_v1(locker, 0);
4021
4022 if (likely(index >= 0))
4023 {
4024 if (likely(index < file_handle_max))
4025 file_handle_array[index]= pfs_file;
4026 else
4027 {
4028 if (pfs_file != NULL)
4029 release_file(pfs_file);
4030 file_handle_lost++;
4031 }
4032 }
4033 }
4034
4035 /**
4036 Implementation of the file instrumentation interface.
4037 @sa PSI_v1::start_file_wait.
4038 */
start_file_wait_v1(PSI_file_locker * locker,size_t count,const char * src_file,uint src_line)4039 static void start_file_wait_v1(PSI_file_locker *locker,
4040 size_t count,
4041 const char *src_file,
4042 uint src_line)
4043 {
4044 ulonglong timer_start= 0;
4045 PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4046 DBUG_ASSERT(state != NULL);
4047
4048 uint flags= state->m_flags;
4049
4050 if (flags & STATE_FLAG_TIMED)
4051 {
4052 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
4053 state->m_timer_start= timer_start;
4054 }
4055
4056 if (flags & STATE_FLAG_EVENT)
4057 {
4058 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
4059 DBUG_ASSERT(wait != NULL);
4060
4061 wait->m_timer_start= timer_start;
4062 wait->m_source_file= src_file;
4063 wait->m_source_line= src_line;
4064 wait->m_number_of_bytes= count;
4065 }
4066 }
4067
4068 /**
4069 Implementation of the file instrumentation interface.
4070 @sa PSI_v1::end_file_wait.
4071 */
end_file_wait_v1(PSI_file_locker * locker,size_t byte_count)4072 static void end_file_wait_v1(PSI_file_locker *locker,
4073 size_t byte_count)
4074 {
4075 PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4076 DBUG_ASSERT(state != NULL);
4077 PFS_file *file= reinterpret_cast<PFS_file *> (state->m_file);
4078 PFS_file_class *klass= reinterpret_cast<PFS_file_class *> (state->m_class);
4079 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
4080
4081 ulonglong timer_end= 0;
4082 ulonglong wait_time= 0;
4083 PFS_byte_stat *byte_stat;
4084 uint flags= state->m_flags;
4085 size_t bytes= ((int)byte_count > -1 ? byte_count : 0);
4086
4087 PFS_file_stat *file_stat;
4088
4089 if (file != NULL)
4090 {
4091 file_stat= & file->m_file_stat;
4092 }
4093 else
4094 {
4095 file_stat= & klass->m_file_stat;
4096 }
4097
4098 switch (state->m_operation)
4099 {
4100 /* Group read operations */
4101 case PSI_FILE_READ:
4102 byte_stat= &file_stat->m_io_stat.m_read;
4103 break;
4104 /* Group write operations */
4105 case PSI_FILE_WRITE:
4106 byte_stat= &file_stat->m_io_stat.m_write;
4107 break;
4108 /* Group remaining operations as miscellaneous */
4109 case PSI_FILE_CREATE:
4110 case PSI_FILE_CREATE_TMP:
4111 case PSI_FILE_OPEN:
4112 case PSI_FILE_STREAM_OPEN:
4113 case PSI_FILE_STREAM_CLOSE:
4114 case PSI_FILE_SEEK:
4115 case PSI_FILE_TELL:
4116 case PSI_FILE_FLUSH:
4117 case PSI_FILE_FSTAT:
4118 case PSI_FILE_CHSIZE:
4119 case PSI_FILE_DELETE:
4120 case PSI_FILE_RENAME:
4121 case PSI_FILE_SYNC:
4122 case PSI_FILE_STAT:
4123 case PSI_FILE_CLOSE:
4124 byte_stat= &file_stat->m_io_stat.m_misc;
4125 break;
4126 default:
4127 DBUG_ASSERT(false);
4128 byte_stat= NULL;
4129 break;
4130 }
4131
4132 /* Aggregation for EVENTS_WAITS_SUMMARY_BY_INSTANCE */
4133 if (flags & STATE_FLAG_TIMED)
4134 {
4135 timer_end= state->m_timer();
4136 wait_time= timer_end - state->m_timer_start;
4137 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
4138 byte_stat->aggregate(wait_time, bytes);
4139 }
4140 else
4141 {
4142 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
4143 byte_stat->aggregate_counted(bytes);
4144 }
4145
4146 if (flags & STATE_FLAG_THREAD)
4147 {
4148 DBUG_ASSERT(thread != NULL);
4149
4150 PFS_single_stat *event_name_array;
4151 event_name_array= thread->m_instr_class_waits_stats;
4152 uint index= klass->m_event_name_index;
4153
4154 if (flags & STATE_FLAG_TIMED)
4155 {
4156 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
4157 event_name_array[index].aggregate_value(wait_time);
4158 }
4159 else
4160 {
4161 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
4162 event_name_array[index].aggregate_counted();
4163 }
4164
4165 if (state->m_flags & STATE_FLAG_EVENT)
4166 {
4167 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
4168 DBUG_ASSERT(wait != NULL);
4169
4170 wait->m_timer_end= timer_end;
4171 wait->m_number_of_bytes= bytes;
4172 wait->m_end_event_id= thread->m_event_id;
4173 wait->m_object_instance_addr= file;
4174 wait->m_weak_file= file;
4175 wait->m_weak_version= (file ? file->get_version() : 0);
4176
4177 if (flag_events_waits_history)
4178 insert_events_waits_history(thread, wait);
4179 if (flag_events_waits_history_long)
4180 insert_events_waits_history_long(wait);
4181 thread->m_events_waits_current--;
4182
4183 DBUG_ASSERT(wait == thread->m_events_waits_current);
4184 }
4185 }
4186 }
4187
4188 /**
4189 Implementation of the file instrumentation interface.
4190 @sa PSI_v1::start_file_close_wait.
4191 */
start_file_close_wait_v1(PSI_file_locker * locker,const char * src_file,uint src_line)4192 static void start_file_close_wait_v1(PSI_file_locker *locker,
4193 const char *src_file,
4194 uint src_line)
4195 {
4196 PFS_thread *thread;
4197 const char *name;
4198 uint len;
4199 PFS_file *pfs_file;
4200 PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4201 DBUG_ASSERT(state != NULL);
4202
4203 switch (state->m_operation)
4204 {
4205 case PSI_FILE_DELETE:
4206 thread= reinterpret_cast<PFS_thread*> (state->m_thread);
4207 name= state->m_name;
4208 len= strlen(name);
4209 pfs_file= find_or_create_file(thread, NULL, name, len, false);
4210 state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
4211 break;
4212 case PSI_FILE_STREAM_CLOSE:
4213 case PSI_FILE_CLOSE:
4214 break;
4215 default:
4216 DBUG_ASSERT(false);
4217 break;
4218 }
4219
4220 start_file_wait_v1(locker, 0, src_file, src_line);
4221
4222 return;
4223 }
4224
4225 /**
4226 Implementation of the file instrumentation interface.
4227 @sa PSI_v1::end_file_close_wait.
4228 */
end_file_close_wait_v1(PSI_file_locker * locker,int rc)4229 static void end_file_close_wait_v1(PSI_file_locker *locker, int rc)
4230 {
4231 PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4232 DBUG_ASSERT(state != NULL);
4233
4234 end_file_wait_v1(locker, 0);
4235
4236 if (rc == 0)
4237 {
4238 PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
4239 PFS_file *file= reinterpret_cast<PFS_file*> (state->m_file);
4240
4241 /* Release or destroy the file if necessary */
4242 switch(state->m_operation)
4243 {
4244 case PSI_FILE_CLOSE:
4245 case PSI_FILE_STREAM_CLOSE:
4246 if (file != NULL)
4247 release_file(file);
4248 break;
4249 case PSI_FILE_DELETE:
4250 if (file != NULL)
4251 destroy_file(thread, file);
4252 break;
4253 default:
4254 DBUG_ASSERT(false);
4255 break;
4256 }
4257 }
4258 return;
4259 }
4260
start_stage_v1(PSI_stage_key key,const char * src_file,int src_line)4261 static void start_stage_v1(PSI_stage_key key, const char *src_file, int src_line)
4262 {
4263 ulonglong timer_value= 0;
4264
4265 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4266 if (unlikely(pfs_thread == NULL))
4267 return;
4268
4269 /* Always update column threads.processlist_state. */
4270 pfs_thread->m_stage= key;
4271
4272 if (! flag_global_instrumentation)
4273 return;
4274
4275 if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
4276 return;
4277
4278 PFS_events_stages *pfs= & pfs_thread->m_stage_current;
4279 PFS_events_waits *child_wait= & pfs_thread->m_events_waits_stack[0];
4280 PFS_events_statements *parent_statement= & pfs_thread->m_statement_stack[0];
4281
4282 PFS_instr_class *old_class= pfs->m_class;
4283 if (old_class != NULL)
4284 {
4285 PFS_stage_stat *event_name_array;
4286 event_name_array= pfs_thread->m_instr_class_stages_stats;
4287 uint index= old_class->m_event_name_index;
4288
4289 /* Finish old event */
4290 if (old_class->m_timed)
4291 {
4292 timer_value= get_timer_raw_value(stage_timer);;
4293 pfs->m_timer_end= timer_value;
4294
4295 /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
4296 ulonglong stage_time= timer_value - pfs->m_timer_start;
4297 event_name_array[index].aggregate_value(stage_time);
4298 }
4299 else
4300 {
4301 /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
4302 event_name_array[index].aggregate_counted();
4303 }
4304
4305 if (flag_events_stages_current)
4306 {
4307 pfs->m_end_event_id= pfs_thread->m_event_id;
4308 if (flag_events_stages_history)
4309 insert_events_stages_history(pfs_thread, pfs);
4310 if (flag_events_stages_history_long)
4311 insert_events_stages_history_long(pfs);
4312 }
4313
4314 /* This stage event is now complete. */
4315 pfs->m_class= NULL;
4316
4317 /* New waits will now be attached directly to the parent statement. */
4318 child_wait->m_event_id= parent_statement->m_event_id;
4319 child_wait->m_event_type= parent_statement->m_event_type;
4320 /* See below for new stages, that may overwrite this. */
4321 }
4322
4323 /* Start new event */
4324
4325 PFS_stage_class *new_klass= find_stage_class(key);
4326 if (unlikely(new_klass == NULL))
4327 return;
4328
4329 if (! new_klass->m_enabled)
4330 return;
4331
4332 pfs->m_class= new_klass;
4333 if (new_klass->m_timed)
4334 {
4335 /*
4336 Do not call the timer again if we have a
4337 TIMER_END for the previous stage already.
4338 */
4339 if (timer_value == 0)
4340 timer_value= get_timer_raw_value(stage_timer);
4341 pfs->m_timer_start= timer_value;
4342 }
4343 else
4344 pfs->m_timer_start= 0;
4345 pfs->m_timer_end= 0;
4346
4347 if (flag_events_stages_current)
4348 {
4349 /* m_thread_internal_id is immutable and already set */
4350 DBUG_ASSERT(pfs->m_thread_internal_id == pfs_thread->m_thread_internal_id);
4351 pfs->m_event_id= pfs_thread->m_event_id++;
4352 pfs->m_end_event_id= 0;
4353 pfs->m_source_file= src_file;
4354 pfs->m_source_line= src_line;
4355
4356 /* New wait events will have this new stage as parent. */
4357 child_wait->m_event_id= pfs->m_event_id;
4358 child_wait->m_event_type= EVENT_TYPE_STAGE;
4359 }
4360 }
4361
end_stage_v1()4362 static void end_stage_v1()
4363 {
4364 ulonglong timer_value= 0;
4365
4366 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4367 if (unlikely(pfs_thread == NULL))
4368 return;
4369
4370 pfs_thread->m_stage= 0;
4371
4372 if (! flag_global_instrumentation)
4373 return;
4374
4375 if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
4376 return;
4377
4378 PFS_events_stages *pfs= & pfs_thread->m_stage_current;
4379
4380 PFS_instr_class *old_class= pfs->m_class;
4381 if (old_class != NULL)
4382 {
4383 PFS_stage_stat *event_name_array;
4384 event_name_array= pfs_thread->m_instr_class_stages_stats;
4385 uint index= old_class->m_event_name_index;
4386
4387 /* Finish old event */
4388 if (old_class->m_timed)
4389 {
4390 timer_value= get_timer_raw_value(stage_timer);;
4391 pfs->m_timer_end= timer_value;
4392
4393 /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
4394 ulonglong stage_time= timer_value - pfs->m_timer_start;
4395 event_name_array[index].aggregate_value(stage_time);
4396 }
4397 else
4398 {
4399 /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
4400 event_name_array[index].aggregate_counted();
4401 }
4402
4403 if (flag_events_stages_current)
4404 {
4405 pfs->m_end_event_id= pfs_thread->m_event_id;
4406 if (flag_events_stages_history)
4407 insert_events_stages_history(pfs_thread, pfs);
4408 if (flag_events_stages_history_long)
4409 insert_events_stages_history_long(pfs);
4410 }
4411
4412 /* New waits will now be attached directly to the parent statement. */
4413 PFS_events_waits *child_wait= & pfs_thread->m_events_waits_stack[0];
4414 PFS_events_statements *parent_statement= & pfs_thread->m_statement_stack[0];
4415 child_wait->m_event_id= parent_statement->m_event_id;
4416 child_wait->m_event_type= parent_statement->m_event_type;
4417
4418 /* This stage is completed */
4419 pfs->m_class= NULL;
4420 }
4421 }
4422
4423 static PSI_statement_locker*
get_thread_statement_locker_v1(PSI_statement_locker_state * state,PSI_statement_key key,const void * charset)4424 get_thread_statement_locker_v1(PSI_statement_locker_state *state,
4425 PSI_statement_key key,
4426 const void *charset)
4427 {
4428 DBUG_ASSERT(state != NULL);
4429 DBUG_ASSERT(charset != NULL);
4430
4431 if (! flag_global_instrumentation)
4432 return NULL;
4433 PFS_statement_class *klass= find_statement_class(key);
4434 if (unlikely(klass == NULL))
4435 return NULL;
4436 if (! klass->m_enabled)
4437 return NULL;
4438
4439 uint flags;
4440
4441 if (flag_thread_instrumentation)
4442 {
4443 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4444 if (unlikely(pfs_thread == NULL))
4445 return NULL;
4446 if (! pfs_thread->m_enabled)
4447 return NULL;
4448 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
4449 flags= STATE_FLAG_THREAD;
4450
4451 if (klass->m_timed)
4452 flags|= STATE_FLAG_TIMED;
4453
4454 if (flag_events_statements_current)
4455 {
4456 ulonglong event_id= pfs_thread->m_event_id++;
4457
4458 if (pfs_thread->m_events_statements_count >= statement_stack_max)
4459 {
4460 return NULL;
4461 }
4462
4463 pfs_thread->m_stmt_lock.allocated_to_dirty();
4464 PFS_events_statements *pfs= & pfs_thread->m_statement_stack[pfs_thread->m_events_statements_count];
4465 /* m_thread_internal_id is immutable and already set */
4466 DBUG_ASSERT(pfs->m_thread_internal_id == pfs_thread->m_thread_internal_id);
4467 pfs->m_event_id= event_id;
4468 pfs->m_end_event_id= 0;
4469 pfs->m_class= klass;
4470 pfs->m_timer_start= 0;
4471 pfs->m_timer_end= 0;
4472 pfs->m_lock_time= 0;
4473 pfs->m_current_schema_name_length= 0;
4474 pfs->m_sqltext_length= 0;
4475 pfs->m_sqltext_truncated= false;
4476 pfs->m_sqltext_cs_number= system_charset_info->number; /* default */
4477
4478 pfs->m_message_text[0]= '\0';
4479 pfs->m_sql_errno= 0;
4480 pfs->m_sqlstate[0]= '\0';
4481 pfs->m_error_count= 0;
4482 pfs->m_warning_count= 0;
4483 pfs->m_rows_affected= 0;
4484
4485 pfs->m_rows_sent= 0;
4486 pfs->m_rows_examined= 0;
4487 pfs->m_created_tmp_disk_tables= 0;
4488 pfs->m_created_tmp_tables= 0;
4489 pfs->m_select_full_join= 0;
4490 pfs->m_select_full_range_join= 0;
4491 pfs->m_select_range= 0;
4492 pfs->m_select_range_check= 0;
4493 pfs->m_select_scan= 0;
4494 pfs->m_sort_merge_passes= 0;
4495 pfs->m_sort_range= 0;
4496 pfs->m_sort_rows= 0;
4497 pfs->m_sort_scan= 0;
4498 pfs->m_no_index_used= 0;
4499 pfs->m_no_good_index_used= 0;
4500 pfs->m_digest_storage.reset();
4501
4502 /* New stages will have this statement as parent */
4503 PFS_events_stages *child_stage= & pfs_thread->m_stage_current;
4504 child_stage->m_nesting_event_id= event_id;
4505 child_stage->m_nesting_event_type= EVENT_TYPE_STATEMENT;
4506
4507 /* New waits will have this statement as parent, if no stage is instrumented */
4508 PFS_events_waits *child_wait= & pfs_thread->m_events_waits_stack[0];
4509 child_wait->m_nesting_event_id= event_id;
4510 child_wait->m_nesting_event_type= EVENT_TYPE_STATEMENT;
4511
4512 state->m_statement= pfs;
4513 flags|= STATE_FLAG_EVENT;
4514
4515 pfs_thread->m_events_statements_count++;
4516 pfs_thread->m_stmt_lock.dirty_to_allocated();
4517 }
4518 }
4519 else
4520 {
4521 if (klass->m_timed)
4522 flags= STATE_FLAG_TIMED;
4523 else
4524 flags= 0;
4525 }
4526
4527 if (flag_statements_digest)
4528 {
4529 flags|= STATE_FLAG_DIGEST;
4530 }
4531
4532 state->m_discarded= false;
4533 state->m_class= klass;
4534 state->m_flags= flags;
4535
4536 state->m_lock_time= 0;
4537 state->m_rows_sent= 0;
4538 state->m_rows_examined= 0;
4539 state->m_created_tmp_disk_tables= 0;
4540 state->m_created_tmp_tables= 0;
4541 state->m_select_full_join= 0;
4542 state->m_select_full_range_join= 0;
4543 state->m_select_range= 0;
4544 state->m_select_range_check= 0;
4545 state->m_select_scan= 0;
4546 state->m_sort_merge_passes= 0;
4547 state->m_sort_range= 0;
4548 state->m_sort_rows= 0;
4549 state->m_sort_scan= 0;
4550 state->m_no_index_used= 0;
4551 state->m_no_good_index_used= 0;
4552
4553 state->m_digest= NULL;
4554
4555 state->m_schema_name_length= 0;
4556 state->m_cs_number= ((CHARSET_INFO *)charset)->number;
4557
4558 return reinterpret_cast<PSI_statement_locker*> (state);
4559 }
4560
4561 static PSI_statement_locker*
refine_statement_v1(PSI_statement_locker * locker,PSI_statement_key key)4562 refine_statement_v1(PSI_statement_locker *locker,
4563 PSI_statement_key key)
4564 {
4565 PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4566 if (state == NULL)
4567 return NULL;
4568 DBUG_ASSERT(state->m_class != NULL);
4569 PFS_statement_class *klass;
4570 /* Only refine statements for mutable instrumentation */
4571 klass= reinterpret_cast<PFS_statement_class*> (state->m_class);
4572 DBUG_ASSERT(klass->is_mutable());
4573 klass= find_statement_class(key);
4574
4575 uint flags= state->m_flags;
4576
4577 if (unlikely(klass == NULL) || !klass->m_enabled)
4578 {
4579 /* pop statement stack */
4580 if (flags & STATE_FLAG_THREAD)
4581 {
4582 PFS_thread *pfs_thread= reinterpret_cast<PFS_thread *> (state->m_thread);
4583 DBUG_ASSERT(pfs_thread != NULL);
4584 if (pfs_thread->m_events_statements_count > 0)
4585 pfs_thread->m_events_statements_count--;
4586 }
4587
4588 state->m_discarded= true;
4589 return NULL;
4590 }
4591
4592 if ((flags & STATE_FLAG_TIMED) && ! klass->m_timed)
4593 flags= flags & ~STATE_FLAG_TIMED;
4594
4595 if (flags & STATE_FLAG_EVENT)
4596 {
4597 PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4598 DBUG_ASSERT(pfs != NULL);
4599
4600 /* mutate EVENTS_STATEMENTS_CURRENT.EVENT_NAME */
4601 pfs->m_class= klass;
4602 }
4603
4604 state->m_class= klass;
4605 state->m_flags= flags;
4606 return reinterpret_cast<PSI_statement_locker*> (state);
4607 }
4608
start_statement_v1(PSI_statement_locker * locker,const char * db,uint db_len,const char * src_file,uint src_line)4609 static void start_statement_v1(PSI_statement_locker *locker,
4610 const char *db, uint db_len,
4611 const char *src_file, uint src_line)
4612 {
4613 PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4614 DBUG_ASSERT(state != NULL);
4615
4616 uint flags= state->m_flags;
4617 ulonglong timer_start= 0;
4618
4619 if (flags & STATE_FLAG_TIMED)
4620 {
4621 timer_start= get_timer_raw_value_and_function(statement_timer, & state->m_timer);
4622 state->m_timer_start= timer_start;
4623 }
4624
4625 compile_time_assert(PSI_SCHEMA_NAME_LEN == NAME_LEN);
4626 DBUG_ASSERT(db_len <= sizeof(state->m_schema_name));
4627
4628 if (db_len > 0)
4629 memcpy(state->m_schema_name, db, db_len);
4630 state->m_schema_name_length= db_len;
4631
4632 if (flags & STATE_FLAG_EVENT)
4633 {
4634 PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4635 DBUG_ASSERT(pfs != NULL);
4636
4637 pfs->m_timer_start= timer_start;
4638 pfs->m_source_file= src_file;
4639 pfs->m_source_line= src_line;
4640
4641 DBUG_ASSERT(db_len <= sizeof(pfs->m_current_schema_name));
4642 if (db_len > 0)
4643 memcpy(pfs->m_current_schema_name, db, db_len);
4644 pfs->m_current_schema_name_length= db_len;
4645 }
4646 }
4647
set_statement_text_v1(PSI_statement_locker * locker,const char * text,uint text_len)4648 static void set_statement_text_v1(PSI_statement_locker *locker,
4649 const char *text, uint text_len)
4650 {
4651 PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4652 DBUG_ASSERT(state != NULL);
4653
4654 if (state->m_discarded)
4655 return;
4656
4657 if (state->m_flags & STATE_FLAG_EVENT)
4658 {
4659 PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4660 DBUG_ASSERT(pfs != NULL);
4661 if (text_len > sizeof (pfs->m_sqltext))
4662 {
4663 text_len= sizeof(pfs->m_sqltext);
4664 pfs->m_sqltext_truncated= true;
4665 }
4666 if (text_len)
4667 memcpy(pfs->m_sqltext, text, text_len);
4668 pfs->m_sqltext_length= text_len;
4669 pfs->m_sqltext_cs_number= state->m_cs_number;
4670 }
4671
4672 return;
4673 }
4674
4675 #define SET_STATEMENT_ATTR_BODY(LOCKER, ATTR, VALUE) \
4676 PSI_statement_locker_state *state; \
4677 state= reinterpret_cast<PSI_statement_locker_state*> (LOCKER); \
4678 if (unlikely(state == NULL)) \
4679 return; \
4680 if (state->m_discarded) \
4681 return; \
4682 state->ATTR= VALUE; \
4683 if (state->m_flags & STATE_FLAG_EVENT) \
4684 { \
4685 PFS_events_statements *pfs; \
4686 pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement); \
4687 DBUG_ASSERT(pfs != NULL); \
4688 pfs->ATTR= VALUE; \
4689 } \
4690 return;
4691
4692 #define INC_STATEMENT_ATTR_BODY(LOCKER, ATTR, VALUE) \
4693 PSI_statement_locker_state *state; \
4694 state= reinterpret_cast<PSI_statement_locker_state*> (LOCKER); \
4695 if (unlikely(state == NULL)) \
4696 return; \
4697 if (state->m_discarded) \
4698 return; \
4699 state->ATTR+= VALUE; \
4700 if (state->m_flags & STATE_FLAG_EVENT) \
4701 { \
4702 PFS_events_statements *pfs; \
4703 pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement); \
4704 DBUG_ASSERT(pfs != NULL); \
4705 pfs->ATTR+= VALUE; \
4706 } \
4707 return;
4708
set_statement_lock_time_v1(PSI_statement_locker * locker,ulonglong count)4709 static void set_statement_lock_time_v1(PSI_statement_locker *locker,
4710 ulonglong count)
4711 {
4712 SET_STATEMENT_ATTR_BODY(locker, m_lock_time, count);
4713 }
4714
set_statement_rows_sent_v1(PSI_statement_locker * locker,ulonglong count)4715 static void set_statement_rows_sent_v1(PSI_statement_locker *locker,
4716 ulonglong count)
4717 {
4718 SET_STATEMENT_ATTR_BODY(locker, m_rows_sent, count);
4719 }
4720
set_statement_rows_examined_v1(PSI_statement_locker * locker,ulonglong count)4721 static void set_statement_rows_examined_v1(PSI_statement_locker *locker,
4722 ulonglong count)
4723 {
4724 SET_STATEMENT_ATTR_BODY(locker, m_rows_examined, count);
4725 }
4726
inc_statement_created_tmp_disk_tables_v1(PSI_statement_locker * locker,ulong count)4727 static void inc_statement_created_tmp_disk_tables_v1(PSI_statement_locker *locker,
4728 ulong count)
4729 {
4730 INC_STATEMENT_ATTR_BODY(locker, m_created_tmp_disk_tables, count);
4731 }
4732
inc_statement_created_tmp_tables_v1(PSI_statement_locker * locker,ulong count)4733 static void inc_statement_created_tmp_tables_v1(PSI_statement_locker *locker,
4734 ulong count)
4735 {
4736 INC_STATEMENT_ATTR_BODY(locker, m_created_tmp_tables, count);
4737 }
4738
inc_statement_select_full_join_v1(PSI_statement_locker * locker,ulong count)4739 static void inc_statement_select_full_join_v1(PSI_statement_locker *locker,
4740 ulong count)
4741 {
4742 INC_STATEMENT_ATTR_BODY(locker, m_select_full_join, count);
4743 }
4744
inc_statement_select_full_range_join_v1(PSI_statement_locker * locker,ulong count)4745 static void inc_statement_select_full_range_join_v1(PSI_statement_locker *locker,
4746 ulong count)
4747 {
4748 INC_STATEMENT_ATTR_BODY(locker, m_select_full_range_join, count);
4749 }
4750
inc_statement_select_range_v1(PSI_statement_locker * locker,ulong count)4751 static void inc_statement_select_range_v1(PSI_statement_locker *locker,
4752 ulong count)
4753 {
4754 INC_STATEMENT_ATTR_BODY(locker, m_select_range, count);
4755 }
4756
inc_statement_select_range_check_v1(PSI_statement_locker * locker,ulong count)4757 static void inc_statement_select_range_check_v1(PSI_statement_locker *locker,
4758 ulong count)
4759 {
4760 INC_STATEMENT_ATTR_BODY(locker, m_select_range_check, count);
4761 }
4762
inc_statement_select_scan_v1(PSI_statement_locker * locker,ulong count)4763 static void inc_statement_select_scan_v1(PSI_statement_locker *locker,
4764 ulong count)
4765 {
4766 INC_STATEMENT_ATTR_BODY(locker, m_select_scan, count);
4767 }
4768
inc_statement_sort_merge_passes_v1(PSI_statement_locker * locker,ulong count)4769 static void inc_statement_sort_merge_passes_v1(PSI_statement_locker *locker,
4770 ulong count)
4771 {
4772 INC_STATEMENT_ATTR_BODY(locker, m_sort_merge_passes, count);
4773 }
4774
inc_statement_sort_range_v1(PSI_statement_locker * locker,ulong count)4775 static void inc_statement_sort_range_v1(PSI_statement_locker *locker,
4776 ulong count)
4777 {
4778 INC_STATEMENT_ATTR_BODY(locker, m_sort_range, count);
4779 }
4780
inc_statement_sort_rows_v1(PSI_statement_locker * locker,ulong count)4781 static void inc_statement_sort_rows_v1(PSI_statement_locker *locker,
4782 ulong count)
4783 {
4784 INC_STATEMENT_ATTR_BODY(locker, m_sort_rows, count);
4785 }
4786
inc_statement_sort_scan_v1(PSI_statement_locker * locker,ulong count)4787 static void inc_statement_sort_scan_v1(PSI_statement_locker *locker,
4788 ulong count)
4789 {
4790 INC_STATEMENT_ATTR_BODY(locker, m_sort_scan, count);
4791 }
4792
set_statement_no_index_used_v1(PSI_statement_locker * locker)4793 static void set_statement_no_index_used_v1(PSI_statement_locker *locker)
4794 {
4795 SET_STATEMENT_ATTR_BODY(locker, m_no_index_used, 1);
4796 }
4797
set_statement_no_good_index_used_v1(PSI_statement_locker * locker)4798 static void set_statement_no_good_index_used_v1(PSI_statement_locker *locker)
4799 {
4800 SET_STATEMENT_ATTR_BODY(locker, m_no_good_index_used, 1);
4801 }
4802
end_statement_v1(PSI_statement_locker * locker,void * stmt_da)4803 static void end_statement_v1(PSI_statement_locker *locker, void *stmt_da)
4804 {
4805 PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4806 Diagnostics_area *da= reinterpret_cast<Diagnostics_area*> (stmt_da);
4807 DBUG_ASSERT(state != NULL);
4808 DBUG_ASSERT(da != NULL);
4809
4810 if (state->m_discarded)
4811 return;
4812
4813 PFS_statement_class *klass= reinterpret_cast<PFS_statement_class *> (state->m_class);
4814 DBUG_ASSERT(klass != NULL);
4815
4816 ulonglong timer_end= 0;
4817 ulonglong wait_time= 0;
4818 uint flags= state->m_flags;
4819
4820 if (flags & STATE_FLAG_TIMED)
4821 {
4822 timer_end= state->m_timer();
4823 wait_time= timer_end - state->m_timer_start;
4824 }
4825
4826 PFS_statement_stat *event_name_array;
4827 uint index= klass->m_event_name_index;
4828 PFS_statement_stat *stat;
4829
4830 /*
4831 Capture statement stats by digest.
4832 */
4833 const sql_digest_storage *digest_storage= NULL;
4834 PFS_statement_stat *digest_stat= NULL;
4835
4836 if (flags & STATE_FLAG_THREAD)
4837 {
4838 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
4839 DBUG_ASSERT(thread != NULL);
4840 event_name_array= thread->m_instr_class_statements_stats;
4841 /* Aggregate to EVENTS_STATEMENTS_SUMMARY_BY_THREAD_BY_EVENT_NAME */
4842 stat= & event_name_array[index];
4843
4844 if (flags & STATE_FLAG_DIGEST)
4845 {
4846 digest_storage= state->m_digest;
4847
4848 if (digest_storage != NULL)
4849 {
4850 /* Populate PFS_statements_digest_stat with computed digest information.*/
4851 digest_stat= find_or_create_digest(thread, digest_storage,
4852 state->m_schema_name,
4853 state->m_schema_name_length);
4854 }
4855 }
4856
4857 if (flags & STATE_FLAG_EVENT)
4858 {
4859 PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4860 DBUG_ASSERT(pfs != NULL);
4861
4862 thread->m_stmt_lock.allocated_to_dirty();
4863
4864 switch(da->status())
4865 {
4866 case Diagnostics_area::DA_EMPTY:
4867 break;
4868 case Diagnostics_area::DA_OK:
4869 memcpy(pfs->m_message_text, da->message(), MYSQL_ERRMSG_SIZE);
4870 pfs->m_message_text[MYSQL_ERRMSG_SIZE]= 0;
4871 pfs->m_rows_affected= da->affected_rows();
4872 pfs->m_warning_count= da->statement_warn_count();
4873 memcpy(pfs->m_sqlstate, "00000", SQLSTATE_LENGTH);
4874 break;
4875 case Diagnostics_area::DA_EOF:
4876 pfs->m_warning_count= da->statement_warn_count();
4877 break;
4878 case Diagnostics_area::DA_ERROR:
4879 memcpy(pfs->m_message_text, da->message(), MYSQL_ERRMSG_SIZE);
4880 pfs->m_message_text[MYSQL_ERRMSG_SIZE]= 0;
4881 pfs->m_sql_errno= da->sql_errno();
4882 pfs->m_error_count++;
4883 memcpy(pfs->m_sqlstate, da->get_sqlstate(), SQLSTATE_LENGTH);
4884 break;
4885 case Diagnostics_area::DA_DISABLED:
4886 break;
4887 }
4888
4889 pfs->m_timer_end= timer_end;
4890 pfs->m_end_event_id= thread->m_event_id;
4891
4892 if (digest_storage != NULL)
4893 {
4894 /*
4895 The following columns in events_statement_current:
4896 - DIGEST,
4897 - DIGEST_TEXT
4898 are computed from the digest storage.
4899 */
4900 pfs->m_digest_storage.copy(digest_storage);
4901 }
4902
4903 if (flag_events_statements_history)
4904 insert_events_statements_history(thread, pfs);
4905 if (flag_events_statements_history_long)
4906 insert_events_statements_history_long(pfs);
4907
4908 DBUG_ASSERT(thread->m_events_statements_count > 0);
4909 thread->m_events_statements_count--;
4910 thread->m_stmt_lock.dirty_to_allocated();
4911 }
4912 }
4913 else
4914 {
4915 if (flags & STATE_FLAG_DIGEST)
4916 {
4917 PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4918
4919 /* An instrumented thread is required, for LF_PINS. */
4920 if (thread != NULL)
4921 {
4922 /* Set digest stat. */
4923 digest_storage= state->m_digest;
4924
4925 if (digest_storage != NULL)
4926 {
4927 /* Populate statements_digest_stat with computed digest information. */
4928 digest_stat= find_or_create_digest(thread, digest_storage,
4929 state->m_schema_name,
4930 state->m_schema_name_length);
4931 }
4932 }
4933 }
4934
4935 event_name_array= global_instr_class_statements_array;
4936 /* Aggregate to EVENTS_STATEMENTS_SUMMARY_GLOBAL_BY_EVENT_NAME */
4937 stat= & event_name_array[index];
4938 }
4939
4940 if (flags & STATE_FLAG_TIMED)
4941 {
4942 /* Aggregate to EVENTS_STATEMENTS_SUMMARY_..._BY_EVENT_NAME (timed) */
4943 stat->aggregate_value(wait_time);
4944 }
4945 else
4946 {
4947 /* Aggregate to EVENTS_STATEMENTS_SUMMARY_..._BY_EVENT_NAME (counted) */
4948 stat->aggregate_counted();
4949 }
4950
4951 stat->m_lock_time+= state->m_lock_time;
4952 stat->m_rows_sent+= state->m_rows_sent;
4953 stat->m_rows_examined+= state->m_rows_examined;
4954 stat->m_created_tmp_disk_tables+= state->m_created_tmp_disk_tables;
4955 stat->m_created_tmp_tables+= state->m_created_tmp_tables;
4956 stat->m_select_full_join+= state->m_select_full_join;
4957 stat->m_select_full_range_join+= state->m_select_full_range_join;
4958 stat->m_select_range+= state->m_select_range;
4959 stat->m_select_range_check+= state->m_select_range_check;
4960 stat->m_select_scan+= state->m_select_scan;
4961 stat->m_sort_merge_passes+= state->m_sort_merge_passes;
4962 stat->m_sort_range+= state->m_sort_range;
4963 stat->m_sort_rows+= state->m_sort_rows;
4964 stat->m_sort_scan+= state->m_sort_scan;
4965 stat->m_no_index_used+= state->m_no_index_used;
4966 stat->m_no_good_index_used+= state->m_no_good_index_used;
4967
4968 if (digest_stat != NULL)
4969 {
4970 if (flags & STATE_FLAG_TIMED)
4971 {
4972 digest_stat->aggregate_value(wait_time);
4973 }
4974 else
4975 {
4976 digest_stat->aggregate_counted();
4977 }
4978
4979 digest_stat->m_lock_time+= state->m_lock_time;
4980 digest_stat->m_rows_sent+= state->m_rows_sent;
4981 digest_stat->m_rows_examined+= state->m_rows_examined;
4982 digest_stat->m_created_tmp_disk_tables+= state->m_created_tmp_disk_tables;
4983 digest_stat->m_created_tmp_tables+= state->m_created_tmp_tables;
4984 digest_stat->m_select_full_join+= state->m_select_full_join;
4985 digest_stat->m_select_full_range_join+= state->m_select_full_range_join;
4986 digest_stat->m_select_range+= state->m_select_range;
4987 digest_stat->m_select_range_check+= state->m_select_range_check;
4988 digest_stat->m_select_scan+= state->m_select_scan;
4989 digest_stat->m_sort_merge_passes+= state->m_sort_merge_passes;
4990 digest_stat->m_sort_range+= state->m_sort_range;
4991 digest_stat->m_sort_rows+= state->m_sort_rows;
4992 digest_stat->m_sort_scan+= state->m_sort_scan;
4993 digest_stat->m_no_index_used+= state->m_no_index_used;
4994 digest_stat->m_no_good_index_used+= state->m_no_good_index_used;
4995 }
4996
4997 switch (da->status())
4998 {
4999 case Diagnostics_area::DA_EMPTY:
5000 break;
5001 case Diagnostics_area::DA_OK:
5002 stat->m_rows_affected+= da->affected_rows();
5003 stat->m_warning_count+= da->statement_warn_count();
5004 if (digest_stat != NULL)
5005 {
5006 digest_stat->m_rows_affected+= da->affected_rows();
5007 digest_stat->m_warning_count+= da->statement_warn_count();
5008 }
5009 break;
5010 case Diagnostics_area::DA_EOF:
5011 stat->m_warning_count+= da->statement_warn_count();
5012 if (digest_stat != NULL)
5013 {
5014 digest_stat->m_warning_count+= da->statement_warn_count();
5015 }
5016 break;
5017 case Diagnostics_area::DA_ERROR:
5018 stat->m_error_count++;
5019 if (digest_stat != NULL)
5020 {
5021 digest_stat->m_error_count++;
5022 }
5023 break;
5024 case Diagnostics_area::DA_DISABLED:
5025 break;
5026 }
5027 }
5028
5029 /**
5030 Implementation of the socket instrumentation interface.
5031 @sa PSI_v1::end_socket_wait.
5032 */
end_socket_wait_v1(PSI_socket_locker * locker,size_t byte_count)5033 static void end_socket_wait_v1(PSI_socket_locker *locker, size_t byte_count)
5034 {
5035 PSI_socket_locker_state *state= reinterpret_cast<PSI_socket_locker_state*> (locker);
5036 DBUG_ASSERT(state != NULL);
5037
5038 PFS_socket *socket= reinterpret_cast<PFS_socket *>(state->m_socket);
5039 DBUG_ASSERT(socket != NULL);
5040
5041 ulonglong timer_end= 0;
5042 ulonglong wait_time= 0;
5043 PFS_byte_stat *byte_stat;
5044 uint flags= state->m_flags;
5045 size_t bytes= ((int)byte_count > -1 ? byte_count : 0);
5046
5047 switch (state->m_operation)
5048 {
5049 /* Group read operations */
5050 case PSI_SOCKET_RECV:
5051 case PSI_SOCKET_RECVFROM:
5052 case PSI_SOCKET_RECVMSG:
5053 byte_stat= &socket->m_socket_stat.m_io_stat.m_read;
5054 break;
5055 /* Group write operations */
5056 case PSI_SOCKET_SEND:
5057 case PSI_SOCKET_SENDTO:
5058 case PSI_SOCKET_SENDMSG:
5059 byte_stat= &socket->m_socket_stat.m_io_stat.m_write;
5060 break;
5061 /* Group remaining operations as miscellaneous */
5062 case PSI_SOCKET_CONNECT:
5063 case PSI_SOCKET_CREATE:
5064 case PSI_SOCKET_BIND:
5065 case PSI_SOCKET_SEEK:
5066 case PSI_SOCKET_OPT:
5067 case PSI_SOCKET_STAT:
5068 case PSI_SOCKET_SHUTDOWN:
5069 case PSI_SOCKET_SELECT:
5070 case PSI_SOCKET_CLOSE:
5071 byte_stat= &socket->m_socket_stat.m_io_stat.m_misc;
5072 break;
5073 default:
5074 DBUG_ASSERT(false);
5075 byte_stat= NULL;
5076 break;
5077 }
5078
5079 /* Aggregation for EVENTS_WAITS_SUMMARY_BY_INSTANCE */
5080 if (flags & STATE_FLAG_TIMED)
5081 {
5082 timer_end= state->m_timer();
5083 wait_time= timer_end - state->m_timer_start;
5084
5085 /* Aggregate to the socket instrument for now (timed) */
5086 byte_stat->aggregate(wait_time, bytes);
5087 }
5088 else
5089 {
5090 /* Aggregate to the socket instrument (event count and byte count) */
5091 byte_stat->aggregate_counted(bytes);
5092 }
5093
5094 /* Aggregate to EVENTS_WAITS_HISTORY and EVENTS_WAITS_HISTORY_LONG */
5095 if (flags & STATE_FLAG_EVENT)
5096 {
5097 PFS_thread *thread= reinterpret_cast<PFS_thread *>(state->m_thread);
5098 DBUG_ASSERT(thread != NULL);
5099 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
5100 DBUG_ASSERT(wait != NULL);
5101
5102 wait->m_timer_end= timer_end;
5103 wait->m_end_event_id= thread->m_event_id;
5104 wait->m_number_of_bytes= bytes;
5105
5106 if (flag_events_waits_history)
5107 insert_events_waits_history(thread, wait);
5108 if (flag_events_waits_history_long)
5109 insert_events_waits_history_long(wait);
5110 thread->m_events_waits_current--;
5111
5112 DBUG_ASSERT(wait == thread->m_events_waits_current);
5113 }
5114 }
5115
set_socket_state_v1(PSI_socket * socket,PSI_socket_state state)5116 static void set_socket_state_v1(PSI_socket *socket, PSI_socket_state state)
5117 {
5118 DBUG_ASSERT((state == PSI_SOCKET_STATE_IDLE) || (state == PSI_SOCKET_STATE_ACTIVE));
5119 PFS_socket *pfs= reinterpret_cast<PFS_socket*>(socket);
5120 DBUG_ASSERT(pfs != NULL);
5121 DBUG_ASSERT(pfs->m_idle || (state == PSI_SOCKET_STATE_IDLE));
5122 DBUG_ASSERT(!pfs->m_idle || (state == PSI_SOCKET_STATE_ACTIVE));
5123 pfs->m_idle= (state == PSI_SOCKET_STATE_IDLE);
5124 }
5125
5126 /**
5127 Set socket descriptor and address info.
5128 */
set_socket_info_v1(PSI_socket * socket,const my_socket * fd,const struct sockaddr * addr,socklen_t addr_len)5129 static void set_socket_info_v1(PSI_socket *socket,
5130 const my_socket *fd,
5131 const struct sockaddr *addr,
5132 socklen_t addr_len)
5133 {
5134 PFS_socket *pfs= reinterpret_cast<PFS_socket*>(socket);
5135 DBUG_ASSERT(pfs != NULL);
5136
5137 /** Set socket descriptor */
5138 if (fd != NULL)
5139 pfs->m_fd= *fd;
5140
5141 /** Set raw socket address and length */
5142 if (likely(addr != NULL && addr_len > 0))
5143 {
5144 pfs->m_addr_len= addr_len;
5145
5146 /** Restrict address length to size of struct */
5147 if (unlikely(pfs->m_addr_len > sizeof(sockaddr_storage)))
5148 pfs->m_addr_len= sizeof(struct sockaddr_storage);
5149
5150 memcpy(&pfs->m_sock_addr, addr, pfs->m_addr_len);
5151 }
5152 }
5153
5154 /**
5155 Implementation of the socket instrumentation interface.
5156 @sa PSI_v1::set_socket_info.
5157 */
set_socket_thread_owner_v1(PSI_socket * socket)5158 static void set_socket_thread_owner_v1(PSI_socket *socket)
5159 {
5160 PFS_socket *pfs_socket= reinterpret_cast<PFS_socket*>(socket);
5161 DBUG_ASSERT(pfs_socket != NULL);
5162 pfs_socket->m_thread_owner= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
5163 }
5164
5165 struct PSI_digest_locker*
pfs_digest_start_v1(PSI_statement_locker * locker)5166 pfs_digest_start_v1(PSI_statement_locker *locker)
5167 {
5168 PSI_statement_locker_state *statement_state;
5169 statement_state= reinterpret_cast<PSI_statement_locker_state*> (locker);
5170 DBUG_ASSERT(statement_state != NULL);
5171
5172 if (statement_state->m_discarded)
5173 return NULL;
5174
5175 if (statement_state->m_flags & STATE_FLAG_DIGEST)
5176 {
5177 return reinterpret_cast<PSI_digest_locker*> (locker);
5178 }
5179
5180 return NULL;
5181 }
5182
pfs_digest_end_v1(PSI_digest_locker * locker,const sql_digest_storage * digest)5183 void pfs_digest_end_v1(PSI_digest_locker *locker, const sql_digest_storage *digest)
5184 {
5185 PSI_statement_locker_state *statement_state;
5186 statement_state= reinterpret_cast<PSI_statement_locker_state*> (locker);
5187 DBUG_ASSERT(statement_state != NULL);
5188 DBUG_ASSERT(digest != NULL);
5189
5190 if (statement_state->m_discarded)
5191 return;
5192
5193 if (statement_state->m_flags & STATE_FLAG_DIGEST)
5194 {
5195 statement_state->m_digest= digest;
5196 }
5197 }
5198
5199 /**
5200 Implementation of the thread attribute connection interface
5201 @sa PSI_v1::set_thread_connect_attr.
5202 */
set_thread_connect_attrs_v1(const char * buffer,uint length,const void * from_cs)5203 static int set_thread_connect_attrs_v1(const char *buffer, uint length,
5204 const void *from_cs)
5205 {
5206
5207 PFS_thread *thd= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
5208
5209 DBUG_ASSERT(buffer != NULL);
5210
5211 if (likely(thd != NULL) && session_connect_attrs_size_per_thread > 0)
5212 {
5213 const CHARSET_INFO *cs = static_cast<const CHARSET_INFO *> (from_cs);
5214
5215 /* copy from the input buffer as much as we can fit */
5216 uint copy_size= (uint)(length < session_connect_attrs_size_per_thread ?
5217 length : session_connect_attrs_size_per_thread);
5218 thd->m_session_lock.allocated_to_dirty();
5219 memcpy(thd->m_session_connect_attrs, buffer, copy_size);
5220 thd->m_session_connect_attrs_length= copy_size;
5221 thd->m_session_connect_attrs_cs_number= cs->number;
5222 thd->m_session_lock.dirty_to_allocated();
5223
5224 if (copy_size == length)
5225 return 0;
5226
5227 session_connect_attrs_lost++;
5228 return 1;
5229 }
5230 return 0;
5231 }
5232
5233
5234 /**
5235 Implementation of the instrumentation interface.
5236 @sa PSI_v1.
5237 */
5238 PSI_v1 PFS_v1=
5239 {
5240 register_mutex_v1,
5241 register_rwlock_v1,
5242 register_cond_v1,
5243 register_thread_v1,
5244 register_file_v1,
5245 register_stage_v1,
5246 register_statement_v1,
5247 register_socket_v1,
5248 init_mutex_v1,
5249 destroy_mutex_v1,
5250 init_rwlock_v1,
5251 destroy_rwlock_v1,
5252 init_cond_v1,
5253 destroy_cond_v1,
5254 init_socket_v1,
5255 destroy_socket_v1,
5256 get_table_share_v1,
5257 release_table_share_v1,
5258 drop_table_share_v1,
5259 open_table_v1,
5260 unbind_table_v1,
5261 rebind_table_v1,
5262 close_table_v1,
5263 create_file_v1,
5264 spawn_thread_v1,
5265 new_thread_v1,
5266 set_thread_id_v1,
5267 get_thread_v1,
5268 set_thread_user_v1,
5269 set_thread_account_v1,
5270 set_thread_db_v1,
5271 set_thread_command_v1,
5272 set_thread_start_time_v1,
5273 set_thread_state_v1,
5274 set_thread_info_v1,
5275 set_thread_v1,
5276 delete_current_thread_v1,
5277 delete_thread_v1,
5278 get_thread_file_name_locker_v1,
5279 get_thread_file_stream_locker_v1,
5280 get_thread_file_descriptor_locker_v1,
5281 unlock_mutex_v1,
5282 unlock_rwlock_v1,
5283 signal_cond_v1,
5284 broadcast_cond_v1,
5285 start_idle_wait_v1,
5286 end_idle_wait_v1,
5287 start_mutex_wait_v1,
5288 end_mutex_wait_v1,
5289 start_rwlock_wait_v1, /* read */
5290 end_rwlock_rdwait_v1,
5291 start_rwlock_wait_v1, /* write */
5292 end_rwlock_wrwait_v1,
5293 start_cond_wait_v1,
5294 end_cond_wait_v1,
5295 start_table_io_wait_v1,
5296 end_table_io_wait_v1,
5297 start_table_lock_wait_v1,
5298 end_table_lock_wait_v1,
5299 start_file_open_wait_v1,
5300 end_file_open_wait_v1,
5301 end_file_open_wait_and_bind_to_descriptor_v1,
5302 start_file_wait_v1,
5303 end_file_wait_v1,
5304 start_file_close_wait_v1,
5305 end_file_close_wait_v1,
5306 start_stage_v1,
5307 end_stage_v1,
5308 get_thread_statement_locker_v1,
5309 refine_statement_v1,
5310 start_statement_v1,
5311 set_statement_text_v1,
5312 set_statement_lock_time_v1,
5313 set_statement_rows_sent_v1,
5314 set_statement_rows_examined_v1,
5315 inc_statement_created_tmp_disk_tables_v1,
5316 inc_statement_created_tmp_tables_v1,
5317 inc_statement_select_full_join_v1,
5318 inc_statement_select_full_range_join_v1,
5319 inc_statement_select_range_v1,
5320 inc_statement_select_range_check_v1,
5321 inc_statement_select_scan_v1,
5322 inc_statement_sort_merge_passes_v1,
5323 inc_statement_sort_range_v1,
5324 inc_statement_sort_rows_v1,
5325 inc_statement_sort_scan_v1,
5326 set_statement_no_index_used_v1,
5327 set_statement_no_good_index_used_v1,
5328 end_statement_v1,
5329 start_socket_wait_v1,
5330 end_socket_wait_v1,
5331 set_socket_state_v1,
5332 set_socket_info_v1,
5333 set_socket_thread_owner_v1,
5334 pfs_digest_start_v1,
5335 pfs_digest_end_v1,
5336 set_thread_connect_attrs_v1,
5337 };
5338
get_interface(int version)5339 static void* get_interface(int version)
5340 {
5341 switch (version)
5342 {
5343 case PSI_VERSION_1:
5344 return &PFS_v1;
5345 default:
5346 return NULL;
5347 }
5348 }
5349
5350 C_MODE_END
5351
5352 struct PSI_bootstrap PFS_bootstrap=
5353 {
5354 get_interface
5355 };
5356