1 /*
2 Copyright (c) 2000, 2010, Oracle and/or its affiliates.
3 Copyright (c) 2009, 2017, MariaDB Corporation
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; version 2 of the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
17
18 #ifdef USE_PRAGMA_INTERFACE
19 #pragma implementation /* gcc class implementation */
20 #endif
21
22 /**
23 @file
24
25 @brief
26 Functions for easy reading of records, possible through a cache
27 */
28
29 #include "mariadb.h"
30 #include "records.h"
31 #include "sql_priv.h"
32 #include "records.h"
33 #include "opt_range.h" // SQL_SELECT
34 #include "sql_class.h" // THD
35 #include "sql_base.h"
36 #include "sql_sort.h" // SORT_ADDON_FIELD
37
38 static int rr_quick(READ_RECORD *info);
39 int rr_sequential(READ_RECORD *info);
40 static int rr_from_tempfile(READ_RECORD *info);
41 static int rr_unpack_from_tempfile(READ_RECORD *info);
42 static int rr_unpack_from_buffer(READ_RECORD *info);
43 int rr_from_pointers(READ_RECORD *info);
44 static int rr_from_cache(READ_RECORD *info);
45 static int init_rr_cache(THD *thd, READ_RECORD *info);
46 static int rr_cmp(uchar *a,uchar *b);
47 static int rr_index_first(READ_RECORD *info);
48 static int rr_index_last(READ_RECORD *info);
49 static int rr_index(READ_RECORD *info);
50 static int rr_index_desc(READ_RECORD *info);
51
52
53 /**
54 Initialize READ_RECORD structure to perform full index scan in desired
55 direction using read_record.read_record() interface
56
57 This function has been added at late stage and is used only by
58 UPDATE/DELETE. Other statements perform index scans using
59 join_read_first/next functions.
60
61 @param info READ_RECORD structure to initialize.
62 @param thd Thread handle
63 @param table Table to be accessed
64 @param print_error If true, call table->file->print_error() if an error
65 occurs (except for end-of-records error)
66 @param idx index to scan
67 @param reverse Scan in the reverse direction
68 */
69
init_read_record_idx(READ_RECORD * info,THD * thd,TABLE * table,bool print_error,uint idx,bool reverse)70 bool init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table,
71 bool print_error, uint idx, bool reverse)
72 {
73 int error= 0;
74 DBUG_ENTER("init_read_record_idx");
75
76 empty_record(table);
77 bzero((char*) info,sizeof(*info));
78 info->thd= thd;
79 info->table= table;
80 info->print_error= print_error;
81 info->unlock_row= rr_unlock_row;
82
83 table->status=0; /* And it's always found */
84 if (!table->file->inited &&
85 unlikely(error= table->file->ha_index_init(idx, 1)))
86 {
87 if (print_error)
88 table->file->print_error(error, MYF(0));
89 }
90
91 /* read_record_func will be changed to rr_index in rr_index_first */
92 info->read_record_func= reverse ? rr_index_last : rr_index_first;
93 DBUG_RETURN(error != 0);
94 }
95
96
97 /*
98 init_read_record is used to scan by using a number of different methods.
99 Which method to use is set-up in this call so that later calls to
100 the info->read_record will call the appropriate method using a function
101 pointer.
102
103 There are five methods that relate completely to the sort function
104 filesort. The result of a filesort is retrieved using read_record
105 calls. The other two methods are used for normal table access.
106
107 The filesort will produce references to the records sorted, these
108 references can be stored in memory or in a temporary file.
109
110 The temporary file is normally used when the references doesn't fit into
111 a properly sized memory buffer. For most small queries the references
112 are stored in the memory buffer.
113 SYNOPSIS
114 init_read_record()
115 info OUT read structure
116 thd Thread handle
117 table Table the data [originally] comes from.
118 select SQL_SELECT structure. We may select->quick or
119 select->file as data source
120 use_record_cache Call file->extra_opt(HA_EXTRA_CACHE,...)
121 if we're going to do sequential read and some
122 additional conditions are satisfied.
123 print_error Copy this to info->print_error
124 disable_rr_cache Don't use rr_from_cache (used by sort-union
125 index-merge which produces rowid sequences that
126 are already ordered)
127
128 DESCRIPTION
129 This function sets up reading data via one of the methods:
130
131 The temporary file is also used when performing an update where a key is
132 modified.
133
134 Methods used when ref's are in memory (using rr_from_pointers):
135 rr_unpack_from_buffer:
136 ----------------------
137 This method is used when table->sort.addon_field is allocated.
138 This is allocated for most SELECT queries not involving any BLOB's.
139 In this case the records are fetched from a memory buffer.
140 rr_from_pointers:
141 -----------------
142 Used when the above is not true, UPDATE, DELETE and so forth and
143 SELECT's involving BLOB's. It is also used when the addon_field
144 buffer is not allocated due to that its size was bigger than the
145 session variable max_length_for_sort_data.
146 In this case the record data is fetched from the handler using the
147 saved reference using the rnd_pos handler call.
148
149 Methods used when ref's are in a temporary file (using rr_from_tempfile)
150 rr_unpack_from_tempfile:
151 ------------------------
152 Same as rr_unpack_from_buffer except that references are fetched from
153 temporary file. Should obviously not really happen other than in
154 strange configurations.
155
156 rr_from_tempfile:
157 -----------------
158 Same as rr_from_pointers except that references are fetched from
159 temporary file instead of from
160 rr_from_cache:
161 --------------
162 This is a special variant of rr_from_tempfile that can be used for
163 handlers that is not using the HA_FAST_KEY_READ table flag. Instead
164 of reading the references one by one from the temporary file it reads
165 a set of them, sorts them and reads all of them into a buffer which
166 is then used for a number of subsequent calls to rr_from_cache.
167 It is only used for SELECT queries and a number of other conditions
168 on table size.
169
170 All other accesses use either index access methods (rr_quick) or a full
171 table scan (rr_sequential).
172 rr_quick:
173 ---------
174 rr_quick uses one of the QUICK_SELECT classes in opt_range.cc to
175 perform an index scan. There are loads of functionality hidden
176 in these quick classes. It handles all index scans of various kinds.
177 rr_sequential:
178 --------------
179 This is the most basic access method of a table using rnd_init,
180 rnd_next and rnd_end. No indexes are used.
181 */
182
init_read_record(READ_RECORD * info,THD * thd,TABLE * table,SQL_SELECT * select,SORT_INFO * filesort,int use_record_cache,bool print_error,bool disable_rr_cache)183 bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
184 SQL_SELECT *select,
185 SORT_INFO *filesort,
186 int use_record_cache, bool print_error,
187 bool disable_rr_cache)
188 {
189 IO_CACHE *tempfile;
190 SORT_ADDON_FIELD *addon_field= filesort ? filesort->addon_field : 0;
191 DBUG_ENTER("init_read_record");
192
193 bzero((char*) info,sizeof(*info));
194 info->thd=thd;
195 info->table=table;
196 info->addon_field= addon_field;
197
198 if ((table->s->tmp_table == INTERNAL_TMP_TABLE) &&
199 !addon_field)
200 (void) table->file->extra(HA_EXTRA_MMAP);
201
202 if (addon_field)
203 {
204 info->rec_buf= (uchar*) filesort->addon_buf.str;
205 info->ref_length= (uint)filesort->addon_buf.length;
206 info->unpack= filesort->unpack;
207 }
208 else
209 {
210 empty_record(table);
211 info->ref_length= (uint)table->file->ref_length;
212 }
213 info->select=select;
214 info->print_error=print_error;
215 info->unlock_row= rr_unlock_row;
216 table->status= 0; /* Rows are always found */
217
218 tempfile= 0;
219 if (select && my_b_inited(&select->file))
220 tempfile= &select->file;
221 else if (filesort && my_b_inited(&filesort->io_cache))
222 tempfile= &filesort->io_cache;
223
224 if (tempfile && !(select && select->quick))
225 {
226 DBUG_PRINT("info",("using rr_from_tempfile"));
227 info->read_record_func=
228 addon_field ? rr_unpack_from_tempfile : rr_from_tempfile;
229 info->io_cache= tempfile;
230 reinit_io_cache(info->io_cache,READ_CACHE,0L,0,0);
231 info->ref_pos=table->file->ref;
232 if (!table->file->inited)
233 if (unlikely(table->file->ha_rnd_init_with_error(0)))
234 DBUG_RETURN(1);
235
236 /*
237 addon_field is checked because if we use addon fields,
238 it doesn't make sense to use cache - we don't read from the table
239 and filesort->io_cache is read sequentially
240 */
241 if (!disable_rr_cache &&
242 !addon_field &&
243 thd->variables.read_rnd_buff_size &&
244 !(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
245 (table->db_stat & HA_READ_ONLY ||
246 table->reginfo.lock_type <= TL_READ_NO_INSERT) &&
247 (ulonglong) table->s->reclength* (table->file->stats.records+
248 table->file->stats.deleted) >
249 (ulonglong) MIN_FILE_LENGTH_TO_USE_ROW_CACHE &&
250 info->io_cache->end_of_file/info->ref_length * table->s->reclength >
251 (my_off_t) MIN_ROWS_TO_USE_TABLE_CACHE &&
252 !table->s->blob_fields &&
253 info->ref_length <= MAX_REFLENGTH)
254 {
255 if (! init_rr_cache(thd, info))
256 {
257 DBUG_PRINT("info",("using rr_from_cache"));
258 info->read_record_func= rr_from_cache;
259 }
260 }
261 }
262 else if (select && select->quick)
263 {
264 DBUG_PRINT("info",("using rr_quick"));
265 info->read_record_func= rr_quick;
266 }
267 else if (filesort && filesort->record_pointers)
268 {
269 DBUG_PRINT("info",("using record_pointers"));
270 if (unlikely(table->file->ha_rnd_init_with_error(0)))
271 DBUG_RETURN(1);
272 info->cache_pos= filesort->record_pointers;
273 info->cache_end= (info->cache_pos+
274 filesort->return_rows * info->ref_length);
275 info->read_record_func=
276 addon_field ? rr_unpack_from_buffer : rr_from_pointers;
277 }
278 else if (table->file->keyread_enabled())
279 {
280 int error;
281 info->read_record_func= rr_index_first;
282 if (!table->file->inited &&
283 unlikely((error= table->file->ha_index_init(table->file->keyread, 1))))
284 {
285 if (print_error)
286 table->file->print_error(error, MYF(0));
287 DBUG_RETURN(1);
288 }
289 }
290 else
291 {
292 DBUG_PRINT("info",("using rr_sequential"));
293 info->read_record_func= rr_sequential;
294 if (unlikely(table->file->ha_rnd_init_with_error(1)))
295 DBUG_RETURN(1);
296 /* We can use record cache if we don't update dynamic length tables */
297 if (!table->no_cache &&
298 (use_record_cache > 0 ||
299 (int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY ||
300 !(table->s->db_options_in_use & HA_OPTION_PACK_RECORD) ||
301 (use_record_cache < 0 &&
302 !(table->file->ha_table_flags() & HA_NOT_DELETE_WITH_CACHE))))
303 (void) table->file->extra_opt(HA_EXTRA_CACHE,
304 thd->variables.read_buff_size);
305 }
306 /* Condition pushdown to storage engine */
307 if ((table->file->ha_table_flags() & HA_CAN_TABLE_CONDITION_PUSHDOWN) &&
308 select && select->cond &&
309 (select->cond->used_tables() & table->map) &&
310 !table->file->pushed_cond)
311 table->file->cond_push(select->cond);
312
313 DBUG_RETURN(0);
314 } /* init_read_record */
315
316
317
end_read_record(READ_RECORD * info)318 void end_read_record(READ_RECORD *info)
319 {
320 /* free cache if used */
321 free_cache(info);
322 if (info->table)
323 {
324 if (info->table->db_stat) // if opened
325 (void) info->table->file->extra(HA_EXTRA_NO_CACHE);
326 if (info->read_record_func != rr_quick) // otherwise quick_range does it
327 (void) info->table->file->ha_index_or_rnd_end();
328 info->table=0;
329 }
330 }
331
332
free_cache(READ_RECORD * info)333 void free_cache(READ_RECORD *info)
334 {
335 if (info->cache)
336 {
337 my_free_lock(info->cache);
338 info->cache=0;
339 }
340 }
341
342
rr_handle_error(READ_RECORD * info,int error)343 static int rr_handle_error(READ_RECORD *info, int error)
344 {
345 if (info->thd->killed)
346 {
347 info->thd->send_kill_message();
348 return 1;
349 }
350
351 if (error == HA_ERR_END_OF_FILE)
352 error= -1;
353 else
354 {
355 if (info->print_error)
356 info->table->file->print_error(error, MYF(0));
357 if (error < 0) // Fix negative BDB errno
358 error= 1;
359 }
360 return error;
361 }
362
363
364 /** Read a record from head-database. */
365
rr_quick(READ_RECORD * info)366 static int rr_quick(READ_RECORD *info)
367 {
368 int tmp;
369 while ((tmp= info->select->quick->get_next()))
370 {
371 tmp= rr_handle_error(info, tmp);
372 break;
373 }
374 return tmp;
375 }
376
377
378 /**
379 Reads first row in an index scan.
380
381 @param info Scan info
382
383 @retval
384 0 Ok
385 @retval
386 -1 End of records
387 @retval
388 1 Error
389 */
390
rr_index_first(READ_RECORD * info)391 static int rr_index_first(READ_RECORD *info)
392 {
393 int tmp;
394 // tell handler that we are doing an index scan
395 if ((tmp = info->table->file->prepare_index_scan()))
396 {
397 tmp= rr_handle_error(info, tmp);
398 return tmp;
399 }
400
401 tmp= info->table->file->ha_index_first(info->record());
402 info->read_record_func= rr_index;
403 if (tmp)
404 tmp= rr_handle_error(info, tmp);
405 return tmp;
406 }
407
408
409 /**
410 Reads last row in an index scan.
411
412 @param info Scan info
413
414 @retval
415 0 Ok
416 @retval
417 -1 End of records
418 @retval
419 1 Error
420 */
421
rr_index_last(READ_RECORD * info)422 static int rr_index_last(READ_RECORD *info)
423 {
424 int tmp= info->table->file->ha_index_last(info->record());
425 info->read_record_func= rr_index_desc;
426 if (tmp)
427 tmp= rr_handle_error(info, tmp);
428 return tmp;
429 }
430
431
432 /**
433 Reads index sequentially after first row.
434
435 Read the next index record (in forward direction) and translate return
436 value.
437
438 @param info Scan info
439
440 @retval
441 0 Ok
442 @retval
443 -1 End of records
444 @retval
445 1 Error
446 */
447
rr_index(READ_RECORD * info)448 static int rr_index(READ_RECORD *info)
449 {
450 int tmp= info->table->file->ha_index_next(info->record());
451 if (tmp)
452 tmp= rr_handle_error(info, tmp);
453 return tmp;
454 }
455
456
457 /**
458 Reads index sequentially from the last row to the first.
459
460 Read the prev index record (in backward direction) and translate return
461 value.
462
463 @param info Scan info
464
465 @retval
466 0 Ok
467 @retval
468 -1 End of records
469 @retval
470 1 Error
471 */
472
rr_index_desc(READ_RECORD * info)473 static int rr_index_desc(READ_RECORD *info)
474 {
475 int tmp= info->table->file->ha_index_prev(info->record());
476 if (tmp)
477 tmp= rr_handle_error(info, tmp);
478 return tmp;
479 }
480
481
rr_sequential(READ_RECORD * info)482 int rr_sequential(READ_RECORD *info)
483 {
484 int tmp;
485 while ((tmp= info->table->file->ha_rnd_next(info->record())))
486 {
487 tmp= rr_handle_error(info, tmp);
488 break;
489 }
490 return tmp;
491 }
492
493
rr_from_tempfile(READ_RECORD * info)494 static int rr_from_tempfile(READ_RECORD *info)
495 {
496 int tmp;
497 for (;;)
498 {
499 if (my_b_read(info->io_cache,info->ref_pos,info->ref_length))
500 return -1; /* End of file */
501 if (!(tmp= info->table->file->ha_rnd_pos(info->record(), info->ref_pos)))
502 break;
503 /* The following is extremely unlikely to happen */
504 if (tmp == HA_ERR_KEY_NOT_FOUND)
505 continue;
506 tmp= rr_handle_error(info, tmp);
507 break;
508 }
509 return tmp;
510 } /* rr_from_tempfile */
511
512
513 /**
514 Read a result set record from a temporary file after sorting.
515
516 The function first reads the next sorted record from the temporary file.
517 into a buffer. If a success it calls a callback function that unpacks
518 the fields values use in the result set from this buffer into their
519 positions in the regular record buffer.
520
521 @param info Reference to the context including record descriptors
522
523 @retval
524 0 Record successfully read.
525 @retval
526 -1 There is no record to be read anymore.
527 */
528
rr_unpack_from_tempfile(READ_RECORD * info)529 static int rr_unpack_from_tempfile(READ_RECORD *info)
530 {
531 if (my_b_read(info->io_cache, info->rec_buf, info->ref_length))
532 return -1;
533 (*info->unpack)(info->addon_field, info->rec_buf,
534 info->rec_buf + info->ref_length);
535
536 return 0;
537 }
538
rr_from_pointers(READ_RECORD * info)539 int rr_from_pointers(READ_RECORD *info)
540 {
541 int tmp;
542 uchar *cache_pos;
543
544 for (;;)
545 {
546 if (info->cache_pos == info->cache_end)
547 return -1; /* End of file */
548 cache_pos= info->cache_pos;
549 info->cache_pos+= info->ref_length;
550
551 if (!(tmp= info->table->file->ha_rnd_pos(info->record(), cache_pos)))
552 break;
553
554 /* The following is extremely unlikely to happen */
555 if (tmp == HA_ERR_KEY_NOT_FOUND)
556 continue;
557 tmp= rr_handle_error(info, tmp);
558 break;
559 }
560 return tmp;
561 }
562
563 /**
564 Read a result set record from a buffer after sorting.
565
566 The function first reads the next sorted record from the sort buffer.
567 If a success it calls a callback function that unpacks
568 the fields values use in the result set from this buffer into their
569 positions in the regular record buffer.
570
571 @param info Reference to the context including record descriptors
572
573 @retval
574 0 Record successfully read.
575 @retval
576 -1 There is no record to be read anymore.
577 */
578
rr_unpack_from_buffer(READ_RECORD * info)579 static int rr_unpack_from_buffer(READ_RECORD *info)
580 {
581 if (info->cache_pos == info->cache_end)
582 return -1; /* End of buffer */
583 (*info->unpack)(info->addon_field, info->cache_pos,
584 info->cache_end);
585 info->cache_pos+= info->ref_length;
586 return 0;
587 }
588 /* cacheing of records from a database */
589
590 static const uint STRUCT_LENGTH= 3 + MAX_REFLENGTH;
591
init_rr_cache(THD * thd,READ_RECORD * info)592 static int init_rr_cache(THD *thd, READ_RECORD *info)
593 {
594 uint rec_cache_size, cache_records;
595 DBUG_ENTER("init_rr_cache");
596
597 info->reclength= ALIGN_SIZE(info->table->s->reclength+1);
598 if (info->reclength < STRUCT_LENGTH)
599 info->reclength= ALIGN_SIZE(STRUCT_LENGTH);
600
601 info->error_offset= info->table->s->reclength;
602 cache_records= thd->variables.read_rnd_buff_size /
603 (info->reclength + STRUCT_LENGTH);
604 rec_cache_size= cache_records * info->reclength;
605 info->rec_cache_size= cache_records * info->ref_length;
606
607 // We have to allocate one more byte to use uint3korr (see comments for it)
608 if (cache_records <= 2 ||
609 !(info->cache= (uchar*) my_malloc_lock(rec_cache_size + cache_records *
610 STRUCT_LENGTH + 1,
611 MYF(MY_THREAD_SPECIFIC))))
612 DBUG_RETURN(1);
613 #ifdef HAVE_valgrind
614 // Avoid warnings in qsort
615 bzero(info->cache, rec_cache_size + cache_records * STRUCT_LENGTH + 1);
616 #endif
617 DBUG_PRINT("info", ("Allocated buffer for %d records", cache_records));
618 info->read_positions=info->cache+rec_cache_size;
619 info->cache_pos=info->cache_end=info->cache;
620 DBUG_RETURN(0);
621 } /* init_rr_cache */
622
623
rr_from_cache(READ_RECORD * info)624 static int rr_from_cache(READ_RECORD *info)
625 {
626 uint i;
627 ulong length;
628 my_off_t rest_of_file;
629 int16 error;
630 uchar *position,*ref_position,*record_pos;
631 ulong record;
632
633 for (;;)
634 {
635 if (info->cache_pos != info->cache_end)
636 {
637 if (unlikely(info->cache_pos[info->error_offset]))
638 {
639 shortget(error,info->cache_pos);
640 if (info->print_error)
641 info->table->file->print_error(error,MYF(0));
642 }
643 else
644 {
645 error=0;
646 memcpy(info->record(), info->cache_pos,
647 (size_t) info->table->s->reclength);
648 }
649 info->cache_pos+=info->reclength;
650 return ((int) error);
651 }
652 length=info->rec_cache_size;
653 rest_of_file=info->io_cache->end_of_file - my_b_tell(info->io_cache);
654 if ((my_off_t) length > rest_of_file)
655 length= (ulong) rest_of_file;
656 if (!length || my_b_read(info->io_cache,info->cache,length))
657 {
658 DBUG_PRINT("info",("Found end of file"));
659 return -1; /* End of file */
660 }
661
662 length/=info->ref_length;
663 position=info->cache;
664 ref_position=info->read_positions;
665 for (i=0 ; i < length ; i++,position+=info->ref_length)
666 {
667 memcpy(ref_position,position,(size_t) info->ref_length);
668 ref_position+=MAX_REFLENGTH;
669 int3store(ref_position,(long) i);
670 ref_position+=3;
671 }
672 my_qsort(info->read_positions, length, STRUCT_LENGTH, (qsort_cmp) rr_cmp);
673
674 position=info->read_positions;
675 for (i=0 ; i < length ; i++)
676 {
677 memcpy(info->ref_pos,position,(size_t) info->ref_length);
678 position+=MAX_REFLENGTH;
679 record=uint3korr(position);
680 position+=3;
681 record_pos=info->cache+record*info->reclength;
682 if (unlikely((error= (int16) info->table->file->
683 ha_rnd_pos(record_pos,info->ref_pos))))
684 {
685 record_pos[info->error_offset]=1;
686 shortstore(record_pos,error);
687 DBUG_PRINT("error",("Got error: %d:%d when reading row",
688 my_errno, error));
689 }
690 else
691 record_pos[info->error_offset]=0;
692 }
693 info->cache_end=(info->cache_pos=info->cache)+length*info->reclength;
694 }
695 } /* rr_from_cache */
696
697
rr_cmp(uchar * a,uchar * b)698 static int rr_cmp(uchar *a,uchar *b)
699 {
700 if (a[0] != b[0])
701 return (int) a[0] - (int) b[0];
702 if (a[1] != b[1])
703 return (int) a[1] - (int) b[1];
704 if (a[2] != b[2])
705 return (int) a[2] - (int) b[2];
706 #if MAX_REFLENGTH == 4
707 return (int) a[3] - (int) b[3];
708 #else
709 if (a[3] != b[3])
710 return (int) a[3] - (int) b[3];
711 if (a[4] != b[4])
712 return (int) a[4] - (int) b[4];
713 if (a[5] != b[5])
714 return (int) a[5] - (int) b[5];
715 if (a[6] != b[6])
716 return (int) a[6] - (int) b[6];
717 return (int) a[7] - (int) b[7];
718 #endif
719 }
720