1 /*
2    Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
23 
24 /* Copy data from a text file to table */
25 
26 #include "sql/sql_load.h"
27 
28 #include <fcntl.h>
29 #include <limits.h>
30 #include <stdio.h>
31 // Execute_load_query_log_event,
32 // LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F
33 #include <string.h>
34 #include <sys/types.h>
35 #include <algorithm>
36 #include <atomic>
37 
38 #include "libbinlogevents/include/load_data_events.h"
39 #include "m_ctype.h"
40 #include "m_string.h"
41 #include "my_base.h"
42 #include "my_bitmap.h"
43 #include "my_dbug.h"
44 #include "my_dir.h"
45 #include "my_inttypes.h"
46 #include "my_io.h"
47 #include "my_loglevel.h"
48 #include "my_macros.h"
49 #include "my_sys.h"
50 #include "my_thread_local.h"
51 #include "mysql/components/services/log_builtins.h"
52 #include "mysql/psi/mysql_file.h"
53 #include "mysql/service_mysql_alloc.h"
54 #include "mysql/thread_type.h"
55 #include "mysql_com.h"
56 #include "mysqld_error.h"
57 #include "sql/auth/auth_acls.h"
58 #include "sql/auth/auth_common.h"
59 #include "sql/binlog.h"
60 #include "sql/derror.h"
61 #include "sql/error_handler.h"  // Ignore_error_handler
62 #include "sql/field.h"
63 #include "sql/handler.h"
64 #include "sql/item.h"
65 #include "sql/item_func.h"
66 #include "sql/item_timefunc.h"  // Item_func_now_local
67 #include "sql/log.h"
68 #include "sql/log_event.h"  // Delete_file_log_event,
69 #include "sql/mysqld.h"     // mysql_real_data_home
70 #include "sql/protocol.h"
71 #include "sql/protocol_classic.h"
72 #include "sql/psi_memory_key.h"
73 #include "sql/query_result.h"
74 #include "sql/rpl_rli.h"  // Relay_log_info
75 #include "sql/rpl_slave.h"
76 #include "sql/sql_base.h"  // fill_record_n_invoke_before_triggers
77 #include "sql/sql_class.h"
78 #include "sql/sql_error.h"
79 #include "sql/sql_insert.h"  // check_that_all_fields_are_given_values,
80 #include "sql/sql_lex.h"
81 #include "sql/sql_list.h"
82 #include "sql/sql_show.h"
83 #include "sql/sql_view.h"  // check_key_in_view
84 #include "sql/system_variables.h"
85 #include "sql/table.h"
86 #include "sql/table_trigger_dispatcher.h"  // Table_trigger_dispatcher
87 #include "sql/thr_malloc.h"
88 #include "sql/transaction_info.h"
89 #include "sql/trigger_def.h"
90 #include "sql_string.h"
91 #include "thr_lock.h"
92 
93 class READ_INFO;
94 
95 using std::max;
96 using std::min;
97 
98 class XML_TAG {
99  public:
100   int level;
101   String field;
102   String value;
103   XML_TAG(int l, String f, String v);
104 };
105 
XML_TAG(int l,String f,String v)106 XML_TAG::XML_TAG(int l, String f, String v) {
107   level = l;
108   field.append(f);
109   value.append(v);
110 }
111 
112 #define GET (stack_pos != stack ? *--stack_pos : my_b_get(&cache))
113 #define PUSH(A) *(stack_pos++) = (A)
114 
115 class READ_INFO {
116   File file;
117   uchar *buffer,    /* Buffer for read text */
118       *end_of_buff; /* Data in bufferts ends here */
119   uint buff_length; /* Length of buffer */
120   const uchar *field_term_ptr, *line_term_ptr;
121   const char *line_start_ptr, *line_start_end;
122   size_t field_term_length, line_term_length, enclosed_length;
123   int field_term_char, line_term_char, enclosed_char, escape_char;
124   int *stack, *stack_pos;
125   bool found_end_of_line, start_of_line, eof;
126   bool need_end_io_cache;
127   IO_CACHE cache;
128   int level; /* for load xml */
129 
130  public:
131   bool error, line_truncated, found_null, enclosed;
132   uchar *row_start, /* Found row starts here */
133       *row_end;     /* Found row ends here */
134   const CHARSET_INFO *read_charset;
135 
136   READ_INFO(File file, uint tot_length, const CHARSET_INFO *cs,
137             const String &field_term, const String &line_start,
138             const String &line_term, const String &enclosed, int escape,
139             bool get_it_from_net, bool is_fifo);
140   ~READ_INFO();
141   bool read_field();
142   bool read_fixed_length();
143   bool next_line();
144   char unescape(char chr);
145   bool terminator(const uchar *ptr, size_t length);
146   bool find_start_of_fields();
147   /* load xml */
148   List<XML_TAG> taglist;
149   int read_value(int delim, String *val);
150   int read_cdata(String *val, bool *have_cdata);
151   bool read_xml();
152   void clear_level(int level);
153 
154   /*
155     We need to force cache close before destructor is invoked to log
156     the last read block
157   */
end_io_cache()158   void end_io_cache() {
159     ::end_io_cache(&cache);
160     need_end_io_cache = false;
161   }
162 
163   /*
164     Either this method, or we need to make cache public
165     Arg must be set from Sql_cmd_load_table::execute_inner()
166     since constructor does not see either the table or THD value
167   */
set_io_cache_arg(void * arg)168   void set_io_cache_arg(void *arg) { cache.arg = arg; }
169 
170   /**
171     skip all data till the eof.
172   */
skip_data_till_eof()173   void skip_data_till_eof() {
174     while (GET != my_b_EOF)
175       ;
176   }
177 };
178 
179 /**
180   Execute LOAD DATA query
181 
182   @param thd                 Current thread.
183   @param handle_duplicates   Indicates whenever we should emit error or
184                              replace row if we will meet duplicates.
185 
186   @returns true if error
187 */
execute_inner(THD * thd,enum enum_duplicates handle_duplicates)188 bool Sql_cmd_load_table::execute_inner(THD *thd,
189                                        enum enum_duplicates handle_duplicates) {
190   char name[FN_REFLEN];
191   File file;
192   bool error = false;
193   const String *field_term = m_exchange.field.field_term;
194   const String *escaped = m_exchange.field.escaped;
195   const String *enclosed = m_exchange.field.enclosed;
196   bool is_fifo = false;
197   SELECT_LEX *select = thd->lex->select_lex;
198   LOAD_FILE_INFO lf_info;
199   THD::killed_state killed_status = THD::NOT_KILLED;
200   bool is_concurrent;
201   bool transactional_table;
202   TABLE_LIST *const table_list = thd->lex->query_tables;
203   const char *db = table_list->db;  // This is never null
204   /*
205     If path for file is not defined, we will use the current database.
206     If this is not set, we will use the directory where the table to be
207     loaded is located
208   */
209   const char *tdb = thd->db().str ? thd->db().str : db;  // Result is never null
210   ulong skip_lines = m_exchange.skip_lines;
211   DBUG_TRACE;
212 
213   /*
214     Bug #34283
215     mysqlbinlog leaves tmpfile after termination if binlog contains
216     load data infile, so in mixed mode we go to row-based for
217     avoiding the problem.
218   */
219   thd->set_current_stmt_binlog_format_row_if_mixed();
220 
221   if (escaped->length() > 1 || enclosed->length() > 1) {
222     my_error(ER_WRONG_FIELD_TERMINATORS, MYF(0));
223     return true;
224   }
225 
226   /* Report problems with non-ascii separators */
227   if (!escaped->is_ascii() || !enclosed->is_ascii() ||
228       !field_term->is_ascii() || !m_exchange.line.line_term->is_ascii() ||
229       !m_exchange.line.line_start->is_ascii()) {
230     push_warning(thd, Sql_condition::SL_WARNING,
231                  WARN_NON_ASCII_SEPARATOR_NOT_IMPLEMENTED,
232                  ER_THD(thd, WARN_NON_ASCII_SEPARATOR_NOT_IMPLEMENTED));
233   }
234 
235   if (open_and_lock_tables(thd, table_list, 0)) return true;
236 
237   THD_STAGE_INFO(thd, stage_executing);
238   if (select->setup_tables(thd, table_list, false)) return true;
239 
240   if (run_before_dml_hook(thd)) return true;
241 
242   if (table_list->is_view() && select->resolve_placeholder_tables(thd, false))
243     return true; /* purecov: inspected */
244 
245   TABLE_LIST *const insert_table_ref =
246       table_list->is_updatable() &&  // View must be updatable
247               !table_list
248                    ->is_multiple_tables() &&  // Multi-table view not allowed
249               !table_list->is_derived()
250           ?  // derived tables not allowed
251           table_list->updatable_base_table()
252           : nullptr;
253 
254   if (insert_table_ref == nullptr ||
255       check_key_in_view(thd, table_list, insert_table_ref)) {
256     my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "LOAD");
257     return true;
258   }
259   if (select->derived_table_count &&
260       select->check_view_privileges(thd, INSERT_ACL, SELECT_ACL))
261     return true; /* purecov: inspected */
262 
263   if (table_list->is_merged()) {
264     if (table_list->prepare_check_option(thd)) return true;
265 
266     if (handle_duplicates == DUP_REPLACE &&
267         table_list->prepare_replace_filter(thd))
268       return true;
269   }
270 
271   // Pass the check option down to the underlying table:
272   insert_table_ref->check_option = table_list->check_option;
273   /*
274     Let us emit an error if we are loading data to table which is used
275     in subselect in SET clause like we do it for INSERT.
276 
277     The main thing to fix to remove this restriction is to ensure that the
278     table is marked to be 'used for insert' in which case we should never
279     mark this table as 'const table' (ie, one that has only one row).
280   */
281   if (unique_table(insert_table_ref, table_list->next_global, false)) {
282     my_error(ER_UPDATE_TABLE_USED, MYF(0), table_list->table_name);
283     return true;
284   }
285 
286   TABLE *const table = insert_table_ref->table;
287 
288   for (Field **cur_field = table->field; *cur_field; ++cur_field)
289     (*cur_field)->reset_warnings();
290 
291   transactional_table = table->file->has_transactions();
292   is_concurrent =
293       (table_list->lock_descriptor().type == TL_WRITE_CONCURRENT_INSERT);
294 
295   if (m_opt_fields_or_vars.is_empty()) {
296     Field_iterator_table_ref field_iterator;
297     field_iterator.set(table_list);
298     for (; !field_iterator.end_of_fields(); field_iterator.next()) {
299       Item *item;
300       if (!(item = field_iterator.create_item(thd))) return true;
301 
302       if (item->field_for_view_update() == nullptr) {
303         my_error(ER_NONUPDATEABLE_COLUMN, MYF(0), item->item_name.ptr());
304         return true;
305       }
306       m_opt_fields_or_vars.push_back(item->real_item());
307     }
308     bitmap_set_all(table->write_set);
309     /*
310       Let us also prepare SET clause, altough it is probably empty
311       in this case.
312     */
313     if (setup_fields(thd, Ref_item_array(), m_opt_set_fields, INSERT_ACL,
314                      nullptr, false, true) ||
315         setup_fields(thd, Ref_item_array(), m_opt_set_exprs, SELECT_ACL,
316                      nullptr, false, false))
317       return true;
318   } else {  // Part field list
319     /*
320       Because m_opt_fields_or_vars may contain user variables,
321       pass false for column_update in first call below.
322     */
323     if (setup_fields(thd, Ref_item_array(), m_opt_fields_or_vars, INSERT_ACL,
324                      nullptr, false, false) ||
325         setup_fields(thd, Ref_item_array(), m_opt_set_fields, INSERT_ACL,
326                      nullptr, false, true))
327       return true;
328 
329     /*
330       Special updatability test is needed because m_opt_fields_or_vars may
331       contain a mix of column references and user variables.
332     */
333     Item *item;
334     List_iterator<Item> it(m_opt_fields_or_vars);
335     while ((item = it++)) {
336       if ((item->type() == Item::FIELD_ITEM ||
337            item->type() == Item::REF_ITEM) &&
338           item->field_for_view_update() == nullptr) {
339         my_error(ER_NONUPDATEABLE_COLUMN, MYF(0), item->item_name.ptr());
340         return true;
341       }
342       if (item->type() == Item::STRING_ITEM) {
343         /*
344           This item represents a user variable. Create a new item with the
345           same name that can be added to LEX::set_var_list. This ensures
346           that corresponding Item_func_get_user_var items are resolved as
347           non-const items.
348         */
349         Item_func_set_user_var *user_var = new (thd->mem_root)
350             Item_func_set_user_var(item->item_name, item, false);
351         if (user_var == nullptr) return true;
352         thd->lex->set_var_list.push_back(user_var);
353       }
354     }
355 
356     // Consider the following table:
357     //
358     //   CREATE TABLE t1 (x DOUBLE, y DOUBLE, g POINT SRID 4326 NOT NULL);
359     //
360     // If the user wants to load a file which only contains two values (x and y
361     // coordinates), it is possible to do it by executing the following
362     // statement:
363     //
364     //  LOAD DATA INFILE 'data' (@x, @y)
365     //    SET x = @x, y = @y, g = ST_SRID(POINT(@x, @y));
366     //
367     // However, the columns that are specified in the SET clause are only marked
368     // in the write set, and not in fields_set_during_insert. The latter is the
369     // bitmap used during check_that_all_fields_are_given_values(), so we need
370     // to copy the bits from the write set over to said bitmap. If not, the
371     // server will return an error saying that column 'g' doesn't have a default
372     // value.
373     bitmap_union(table->fields_set_during_insert, table->write_set);
374 
375     if (check_that_all_fields_are_given_values(thd, table, table_list))
376       return true;
377     /* Fix the expressions in SET clause */
378     if (setup_fields(thd, Ref_item_array(), m_opt_set_exprs, SELECT_ACL,
379                      nullptr, false, false))
380       return true;
381   }
382 
383   const int escape_char =
384       (escaped->length() &&
385        (m_exchange.escaped_given() ||
386         !(thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES)))
387           ? (*escaped)[0]
388           : INT_MAX;
389 
390   /*
391     * LOAD DATA INFILE fff INTO TABLE xxx SET columns2
392     sets all columns, except if file's row lacks some: in that case,
393     defaults are set by read_fixed_length() and read_sep_field(),
394     not by COPY_INFO.
395     * LOAD DATA INFILE fff INTO TABLE xxx (columns1) SET columns2=
396     may need a default for columns other than columns1 and columns2.
397   */
398   const bool manage_defaults = m_opt_fields_or_vars.elements != 0;
399   COPY_INFO info(COPY_INFO::INSERT_OPERATION, &m_opt_fields_or_vars,
400                  &m_opt_set_fields, manage_defaults, handle_duplicates,
401                  escape_char);
402 
403   if (info.add_function_default_columns(table, table->write_set)) return true;
404 
405   if (table->triggers) {
406     if (table->triggers->mark_fields(TRG_EVENT_INSERT)) return true;
407   }
408 
409   prepare_triggers_for_insert_stmt(thd, table);
410 
411   uint tot_length = 0;
412   bool use_blobs = false, use_vars = false;
413   List_iterator_fast<Item> it(m_opt_fields_or_vars);
414   Item *item;
415 
416   while ((item = it++)) {
417     const Item *real_item = item->real_item();
418 
419     if (real_item->type() == Item::FIELD_ITEM) {
420       const Field *field = down_cast<const Item_field *>(real_item)->field;
421       if (field->is_flag_set(BLOB_FLAG)) {
422         use_blobs = true;
423         tot_length += 256;  // Will be extended if needed
424       } else
425         tot_length += field->field_length;
426     } else if (item->type() == Item::STRING_ITEM)
427       use_vars = true;
428   }
429   if (use_blobs && m_exchange.line.line_term->is_empty() &&
430       field_term->is_empty()) {
431     my_error(ER_BLOBS_AND_NO_TERMINATED, MYF(0));
432     return true;
433   }
434   if (use_vars && !field_term->length() && !enclosed->length()) {
435     my_error(ER_LOAD_FROM_FIXED_SIZE_ROWS_TO_VAR, MYF(0));
436     return true;
437   }
438 
439   if (m_is_local_file) {
440     (void)net_request_file(thd->get_protocol_classic()->get_net(),
441                            m_exchange.file_name);
442     file = -1;
443   } else {
444     if (!dirname_length(m_exchange.file_name)) {
445       strxnmov(name, FN_REFLEN - 1, mysql_real_data_home, tdb, NullS);
446       (void)fn_format(name, m_exchange.file_name, name, "",
447                       MY_RELATIVE_PATH | MY_UNPACK_FILENAME);
448     } else {
449       (void)fn_format(
450           name, m_exchange.file_name, mysql_real_data_home, "",
451           MY_RELATIVE_PATH | MY_UNPACK_FILENAME | MY_RETURN_REAL_PATH);
452     }
453 
454     if ((thd->system_thread &
455          (SYSTEM_THREAD_SLAVE_SQL | SYSTEM_THREAD_SLAVE_WORKER)) != 0) {
456       Relay_log_info *rli = thd->rli_slave->get_c_rli();
457 
458       if (strncmp(rli->slave_patternload_file, name,
459                   rli->slave_patternload_file_size)) {
460         /*
461           LOAD DATA INFILE in the slave SQL Thread can only read from
462           --slave-load-tmpdir". This should never happen. Please, report a bug.
463         */
464         LogErr(ERROR_LEVEL, ER_LOAD_DATA_INFILE_FAILED_IN_UNEXPECTED_WAY);
465         my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--slave-load-tmpdir");
466         return true;
467       }
468     } else if (!is_secure_file_path(name)) {
469       /* Read only allowed from within dir specified by secure_file_priv */
470       my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--secure-file-priv");
471       return true;
472     }
473 
474 #if !defined(_WIN32)
475     MY_STAT stat_info;
476     if (!my_stat(name, &stat_info, MYF(MY_WME))) return true;
477 
478     // if we are not in slave thread, the file must be:
479     if (!thd->slave_thread &&
480         !((stat_info.st_mode & S_IFLNK) != S_IFLNK &&   // symlink
481           ((stat_info.st_mode & S_IFREG) == S_IFREG ||  // regular file
482            (stat_info.st_mode & S_IFIFO) == S_IFIFO)))  // named pipe
483     {
484       my_error(ER_TEXTFILE_NOT_READABLE, MYF(0), name);
485       return true;
486     }
487     if ((stat_info.st_mode & S_IFIFO) == S_IFIFO) is_fifo = true;
488 #endif
489     if ((file = mysql_file_open(key_file_load, name, O_RDONLY, MYF(MY_WME))) <
490         0)
491 
492       return true;
493   }
494 
495   READ_INFO read_info(
496       file, tot_length,
497       m_exchange.cs ? m_exchange.cs : thd->variables.collation_database,
498       *field_term, *m_exchange.line.line_start, *m_exchange.line.line_term,
499       *enclosed, info.escape_char, m_is_local_file, is_fifo);
500   if (read_info.error) {
501     if (file >= 0) mysql_file_close(file, MYF(0));  // no files in net reading
502     return true;                                    // Can't allocate buffers
503   }
504 
505   if (mysql_bin_log.is_open()) {
506     lf_info.thd = thd;
507     lf_info.logged_data_file = false;
508     lf_info.last_pos_in_file = HA_POS_ERROR;
509     lf_info.log_delayed = transactional_table;
510     read_info.set_io_cache_arg((void *)&lf_info);
511   }
512 
513   thd->check_for_truncated_fields = CHECK_FIELD_WARN;
514   thd->num_truncated_fields = 0L;
515   /* Skip lines if there is a line terminator */
516   if (m_exchange.line.line_term->length() &&
517       m_exchange.filetype != FILETYPE_XML) {
518     /* m_exchange.skip_lines needs to be preserved for logging */
519     while (skip_lines > 0) {
520       skip_lines--;
521       if (read_info.next_line()) break;
522     }
523   }
524 
525   if (!(error = read_info.error)) {
526     table->next_number_field = table->found_next_number_field;
527     if (thd->lex->is_ignore() || handle_duplicates == DUP_REPLACE)
528       table->file->ha_extra(HA_EXTRA_IGNORE_DUP_KEY);
529     if (handle_duplicates == DUP_REPLACE &&
530         (!table->triggers || !table->triggers->has_delete_triggers()))
531       table->file->ha_extra(HA_EXTRA_WRITE_CAN_REPLACE);
532     if (thd->locked_tables_mode <= LTM_LOCK_TABLES)
533       table->file->ha_start_bulk_insert((ha_rows)0);
534     table->copy_blobs = true;
535 
536     if (m_exchange.filetype == FILETYPE_XML) /* load xml */
537       error =
538           read_xml_field(thd, info, insert_table_ref, read_info, skip_lines);
539     else if (!field_term->length() && !enclosed->length())
540       error =
541           read_fixed_length(thd, info, insert_table_ref, read_info, skip_lines);
542     else
543       error = read_sep_field(thd, info, insert_table_ref, read_info, *enclosed,
544                              skip_lines);
545     if (thd->locked_tables_mode <= LTM_LOCK_TABLES &&
546         table->file->ha_end_bulk_insert() && !error) {
547       table->file->print_error(my_errno(), MYF(0));
548       error = true;
549     }
550     table->next_number_field = nullptr;
551   }
552   if (file >= 0) mysql_file_close(file, MYF(0));
553   free_blobs(table); /* if pack_blob was used */
554   table->copy_blobs = false;
555   thd->check_for_truncated_fields = CHECK_FIELD_IGNORE;
556   /*
557      simulated killing in the middle of per-row loop
558      must be effective for binlogging
559   */
560   DBUG_EXECUTE_IF("simulate_kill_bug27571", {
561     error = true;
562     thd->killed = THD::KILL_QUERY;
563   };);
564 
565   killed_status = error ? thd->killed.load() : THD::NOT_KILLED;
566 
567   if (error) {
568     if (m_is_local_file) read_info.skip_data_till_eof();
569 
570     if (mysql_bin_log.is_open()) {
571       {
572         /*
573           Make sure last block (the one which caused the error) gets
574           logged.  This is needed because otherwise after write of (to
575           the binlog, not to read_info (which is a cache))
576           Delete_file_log_event the bad block will remain in read_info
577           (because pre_read is not called at the end of the last
578           block; remember pre_read is called whenever a new block is
579           read from disk).  At the end of Sql_cmd_load_table::execute_inner(),
580           the destructor of read_info will call end_io_cache() which will flush
581           read_info, so we will finally have this in the binlog:
582 
583           Append_block # The last successful block
584           Delete_file
585           Append_block # The failing block
586           which is nonsense.
587           Or could also be (for a small file)
588           Create_file  # The failing block
589           which is nonsense (Delete_file is not written in this case, because:
590           Create_file has not been written, so Delete_file is not written, then
591           when read_info is destroyed end_io_cache() is called which writes
592           Create_file.
593         */
594         read_info.end_io_cache();
595         /* If the file was not empty, wrote_create_file is true */
596         if (lf_info.logged_data_file) {
597           int errcode = query_error_code(thd, killed_status == THD::NOT_KILLED);
598 
599           /* since there is already an error, the possible error of
600              writing binary log will be ignored */
601           if (thd->get_transaction()->cannot_safely_rollback(
602                   Transaction_ctx::STMT))
603             (void)write_execute_load_query_log_event(
604                 thd, table_list->db, table_list->table_name, is_concurrent,
605                 handle_duplicates, transactional_table, errcode);
606           else {
607             Delete_file_log_event d(thd, db, transactional_table);
608             (void)mysql_bin_log.write_event(&d);
609           }
610         }
611       }
612     }
613     error = true;  // Error on read
614     goto err;
615   }
616 
617   snprintf(name, sizeof(name), ER_THD(thd, ER_LOAD_INFO),
618            (long)info.stats.records, (long)info.stats.deleted,
619            (long)(info.stats.records - info.stats.copied),
620            (long)thd->get_stmt_da()->current_statement_cond_count());
621 
622   if (mysql_bin_log.is_open()) {
623     /*
624       We need to do the job that is normally done inside
625       binlog_query() here, which is to ensure that the pending event
626       is written before tables are unlocked and before any other
627       events are written.  We also need to update the table map
628       version for the binary log to mark that table maps are invalid
629       after this point.
630      */
631     if (thd->is_current_stmt_binlog_format_row())
632       error = thd->binlog_flush_pending_rows_event(true, transactional_table);
633     else {
634       /*
635         As already explained above, we need to call end_io_cache() or the last
636         block will be logged only after Execute_load_query_log_event (which is
637         wrong), when read_info is destroyed.
638       */
639       read_info.end_io_cache();
640       if (lf_info.logged_data_file) {
641         int errcode = query_error_code(thd, killed_status == THD::NOT_KILLED);
642         error = write_execute_load_query_log_event(
643             thd, table_list->db, table_list->table_name, is_concurrent,
644             handle_duplicates, transactional_table, errcode);
645       }
646     }
647     if (error) goto err;
648   }
649 
650   /* ok to client sent only after binlog write and engine commit */
651   my_ok(thd, info.stats.copied + info.stats.deleted, 0L, name);
652 err:
653   DBUG_ASSERT(
654       table->file->has_transactions() ||
655       !(info.stats.copied || info.stats.deleted) ||
656       thd->get_transaction()->cannot_safely_rollback(Transaction_ctx::STMT));
657   table->file->ha_release_auto_increment();
658   return error;
659 }
660 
661 /**
662   @note Not a very useful function; just to avoid duplication of code
663 
664   @returns true if error
665 */
write_execute_load_query_log_event(THD * thd,const char * db_arg,const char * table_name_arg,bool is_concurrent,enum enum_duplicates duplicates,bool transactional_table,int errcode)666 bool Sql_cmd_load_table::write_execute_load_query_log_event(
667     THD *thd, const char *db_arg, const char *table_name_arg,
668     bool is_concurrent, enum enum_duplicates duplicates,
669     bool transactional_table, int errcode) {
670   const char *tbl = table_name_arg;
671   const char *tdb = (thd->db().str != nullptr ? thd->db().str : db_arg);
672   const String *query = nullptr;
673   String string_buf;
674   size_t fname_start = 0;
675   size_t fname_end = 0;
676 
677   if (thd->db().str == nullptr || strcmp(db_arg, thd->db().str)) {
678     /*
679       If used database differs from table's database,
680       prefix table name with database name so that it
681       becomes a FQ name.
682      */
683     string_buf.set_charset(system_charset_info);
684     append_identifier(thd, &string_buf, db_arg, strlen(db_arg));
685     string_buf.append(".");
686   }
687   append_identifier(thd, &string_buf, table_name_arg, strlen(table_name_arg));
688   tbl = string_buf.c_ptr_safe();
689   Load_query_generator gen(thd, &m_exchange, tdb, tbl, is_concurrent,
690                            duplicates == DUP_REPLACE, thd->lex->is_ignore());
691   query = gen.generate(&fname_start, &fname_end);
692 
693   Execute_load_query_log_event e(
694       thd, query->ptr(), query->length(), fname_start, fname_end,
695       (duplicates == DUP_REPLACE)
696           ? binary_log::LOAD_DUP_REPLACE
697           : (thd->lex->is_ignore() ? binary_log::LOAD_DUP_IGNORE
698                                    : binary_log::LOAD_DUP_ERROR),
699       transactional_table, false, false, errcode);
700 
701   return mysql_bin_log.write_event(&e);
702 }
703 
704 namespace {
705 /**
706   Checks if an item is a hidden generated column.
707 
708   @param table       Pointer to TABLE object
709   @param item        Item to check
710 
711   @returns true if checked item is a hidden generated column.
712 */
is_hidden_generated_column(TABLE * table,Item * item)713 inline bool is_hidden_generated_column(TABLE *table, Item *item) {
714   Item *real_item = item->real_item();
715   if (table->has_gcol() && real_item->type() == Item::FIELD_ITEM) {
716     const Field *field = down_cast<Item_field *>(real_item)->field;
717     if (bitmap_is_set(&table->fields_for_functional_indexes,
718                       field->field_index()))
719       return true;
720   }
721   return false;
722 }
723 }  // namespace
724 
725 /**
726   Read of rows of fixed size + optional garbage + optional newline
727 
728   @param thd         Pointer to THD object
729   @param info        Pointer to COPY_INFO object
730   @param table_list  Pointer to TABLE_LIST object
731   @param read_info   Pointer to READ_INFO object
732   @param skip_lines  Number of ignored lines
733                      at the start of the file.
734 
735   @returns true if error
736 */
read_fixed_length(THD * thd,COPY_INFO & info,TABLE_LIST * table_list,READ_INFO & read_info,ulong skip_lines)737 bool Sql_cmd_load_table::read_fixed_length(THD *thd, COPY_INFO &info,
738                                            TABLE_LIST *table_list,
739                                            READ_INFO &read_info,
740                                            ulong skip_lines) {
741   List_iterator_fast<Item> it(m_opt_fields_or_vars);
742   TABLE *table = table_list->table;
743   bool err;
744   DBUG_TRACE;
745 
746   while (!read_info.read_fixed_length()) {
747     if (thd->killed) {
748       thd->send_kill_message();
749       return true;
750     }
751     if (skip_lines) {
752       /*
753         We could implement this with a simple seek if:
754         - We are not using DATA INFILE LOCAL
755         - escape character is  ""
756         - line starting prefix is ""
757       */
758       skip_lines--;
759       continue;
760     }
761     it.rewind();
762     uchar *pos = read_info.row_start;
763 
764     restore_record(table, s->default_values);
765     /*
766       Check whether default values of the fields not specified in column list
767       are correct or not.
768     */
769     if (validate_default_values_of_unset_fields(thd, table)) {
770       read_info.error = true;
771       break;
772     }
773 
774     Autoinc_field_has_explicit_non_null_value_reset_guard after_each_row(table);
775 
776     Item *item;
777     while ((item = it++)) {
778       // Skip hidden generated columns.
779       if (is_hidden_generated_column(table, item)) continue;
780       /*
781         There is no variables in fields_vars list in this format so
782         this conversion is safe (no need to check for STRING_ITEM).
783       */
784       DBUG_ASSERT(item->real_item()->type() == Item::FIELD_ITEM);
785       Item_field *sql_field = static_cast<Item_field *>(item->real_item());
786       Field *field = sql_field->field;
787       if (field == table->next_number_field)
788         table->autoinc_field_has_explicit_non_null_value = true;
789       /*
790         No fields specified in fields_vars list can be null in this format.
791         Mark field as not null, we should do this for each row because of
792         restore_record...
793       */
794       field->set_notnull();
795 
796       if (pos == read_info.row_end) {
797         thd->num_truncated_fields++; /* Not enough fields */
798         push_warning_printf(thd, Sql_condition::SL_WARNING,
799                             ER_WARN_TOO_FEW_RECORDS,
800                             ER_THD(thd, ER_WARN_TOO_FEW_RECORDS),
801                             thd->get_stmt_da()->current_row_for_condition());
802         if (field->type() == FIELD_TYPE_TIMESTAMP && !field->is_nullable()) {
803           // Specific of TIMESTAMP NOT NULL: set to CURRENT_TIMESTAMP.
804           Item_func_now_local::store_in(field);
805         }
806       } else {
807         uint length;
808         uchar save_chr;
809         if ((length = (uint)(read_info.row_end - pos)) > field->field_length)
810           length = field->field_length;
811         save_chr = pos[length];
812         pos[length] = '\0';  // Safeguard aganst malloc
813         field->store((char *)pos, length, read_info.read_charset);
814         pos[length] = save_chr;
815         if ((pos += length) > read_info.row_end)
816           pos = read_info.row_end; /* Fills rest with space */
817       }
818     }
819     if (pos != read_info.row_end) {
820       thd->num_truncated_fields++; /* Too long row */
821       push_warning_printf(thd, Sql_condition::SL_WARNING,
822                           ER_WARN_TOO_MANY_RECORDS,
823                           ER_THD(thd, ER_WARN_TOO_MANY_RECORDS),
824                           thd->get_stmt_da()->current_row_for_condition());
825     }
826 
827     if (thd->killed || fill_record_n_invoke_before_triggers(
828                            thd, &info, m_opt_set_fields, m_opt_set_exprs, table,
829                            TRG_EVENT_INSERT, table->s->fields, true, nullptr))
830       return true;
831 
832     switch (table_list->view_check_option(thd)) {
833       case VIEW_CHECK_SKIP:
834         read_info.next_line();
835         goto continue_loop;
836       case VIEW_CHECK_ERROR:
837         return true;
838     }
839 
840     if (invoke_table_check_constraints(thd, table)) {
841       if (thd->is_error()) return true;
842       // continue when IGNORE clause is used.
843       read_info.next_line();
844       goto continue_loop;
845     }
846 
847     err = write_record(thd, table, &info, nullptr);
848     if (err) return true;
849 
850     /*
851       We don't need to reset auto-increment field since we are restoring
852       its default value at the beginning of each loop iteration.
853     */
854     if (read_info.next_line())  // Skip to next line
855       break;
856     if (read_info.line_truncated) {
857       thd->num_truncated_fields++; /* Too long row */
858       push_warning_printf(thd, Sql_condition::SL_WARNING,
859                           ER_WARN_TOO_MANY_RECORDS,
860                           ER_THD(thd, ER_WARN_TOO_MANY_RECORDS),
861                           thd->get_stmt_da()->current_row_for_condition());
862     }
863     thd->get_stmt_da()->inc_current_row_for_condition();
864   continue_loop:;
865   }
866   return read_info.error;
867 }
868 
869 class Field_tmp_nullability_guard {
870  public:
Field_tmp_nullability_guard(Item * item)871   explicit Field_tmp_nullability_guard(Item *item) : m_field(nullptr) {
872     if (item->type() == Item::FIELD_ITEM) {
873       m_field = ((Item_field *)item)->field;
874       /*
875         Enable temporary nullability for items that corresponds
876         to table fields.
877       */
878       m_field->set_tmp_nullable();
879     }
880   }
881 
~Field_tmp_nullability_guard()882   ~Field_tmp_nullability_guard() {
883     if (m_field) m_field->reset_tmp_nullable();
884   }
885 
886  private:
887   Field *m_field;
888 };
889 
890 /**
891   Read rows in delimiter-separated formats.
892 
893   @param thd         Pointer to THD object
894   @param info        Pointer to COPY_INFO object
895   @param table_list  Pointer to TABLE_LIST object
896   @param read_info   Pointer to READ_INFO object
897   @param enclosed    ENCLOSED BY character
898   @param skip_lines  Number of ignored lines
899                      at the start of the file.
900 
901   @returns true if error
902 */
read_sep_field(THD * thd,COPY_INFO & info,TABLE_LIST * table_list,READ_INFO & read_info,const String & enclosed,ulong skip_lines)903 bool Sql_cmd_load_table::read_sep_field(THD *thd, COPY_INFO &info,
904                                         TABLE_LIST *table_list,
905                                         READ_INFO &read_info,
906                                         const String &enclosed,
907                                         ulong skip_lines) {
908   List_iterator_fast<Item> it(m_opt_fields_or_vars);
909   Item *item;
910   TABLE *table = table_list->table;
911   size_t enclosed_length;
912   bool err;
913   DBUG_TRACE;
914 
915   enclosed_length = enclosed.length();
916 
917   for (;; it.rewind()) {
918     if (thd->killed) {
919       thd->send_kill_message();
920       return true;
921     }
922 
923     restore_record(table, s->default_values);
924     /*
925       Check whether default values of the fields not specified in column list
926       are correct or not.
927     */
928     if (validate_default_values_of_unset_fields(thd, table)) {
929       read_info.error = true;
930       break;
931     }
932 
933     Autoinc_field_has_explicit_non_null_value_reset_guard after_each_row(table);
934 
935     while ((item = it++)) {
936       uint length;
937       uchar *pos;
938       Item *real_item;
939 
940       // Skip hidden generated columns.
941       if (is_hidden_generated_column(table, item)) continue;
942 
943       if (read_info.read_field()) break;
944 
945       /* If this line is to be skipped we don't want to fill field or var */
946       if (skip_lines) continue;
947 
948       pos = read_info.row_start;
949       length = (uint)(read_info.row_end - pos);
950 
951       real_item = item->real_item();
952 
953       Field_tmp_nullability_guard fld_tmp_nullability_guard(real_item);
954 
955       if ((!read_info.enclosed && (enclosed_length && length == 4 &&
956                                    !memcmp(pos, STRING_WITH_LEN("NULL")))) ||
957           (length == 1 && read_info.found_null)) {
958         if (real_item->type() == Item::FIELD_ITEM) {
959           Field *field = ((Item_field *)real_item)->field;
960           if (field->reset())  // Set to 0
961           {
962             my_error(ER_WARN_NULL_TO_NOTNULL, MYF(0), field->field_name,
963                      thd->get_stmt_da()->current_row_for_condition());
964             return true;
965           }
966           if (!field->is_nullable() && field->type() == FIELD_TYPE_TIMESTAMP) {
967             // Specific of TIMESTAMP NOT NULL: set to CURRENT_TIMESTAMP.
968             Item_func_now_local::store_in(field);
969           } else {
970             /*
971               Set field to NULL. Later we will clear temporary nullability flag
972               and check NOT NULL constraint.
973             */
974             field->set_null();
975           }
976         } else if (item->type() == Item::STRING_ITEM) {
977           DBUG_ASSERT(nullptr !=
978                       dynamic_cast<Item_user_var_as_out_param *>(item));
979           ((Item_user_var_as_out_param *)item)
980               ->set_null_value(read_info.read_charset);
981         }
982 
983         continue;
984       }
985 
986       if (real_item->type() == Item::FIELD_ITEM) {
987         Field *field = ((Item_field *)real_item)->field;
988         field->set_notnull();
989         read_info.row_end[0] = 0;  // Safe to change end marker
990         if (field == table->next_number_field)
991           table->autoinc_field_has_explicit_non_null_value = true;
992         field->store((char *)pos, length, read_info.read_charset);
993       } else if (item->type() == Item::STRING_ITEM) {
994         DBUG_ASSERT(nullptr !=
995                     dynamic_cast<Item_user_var_as_out_param *>(item));
996         ((Item_user_var_as_out_param *)item)
997             ->set_value((char *)pos, length, read_info.read_charset);
998       }
999     }
1000 
1001     if (thd->is_error()) read_info.error = true;
1002 
1003     if (read_info.error) break;
1004     if (skip_lines) {
1005       skip_lines--;
1006       continue;
1007     }
1008     if (item) {
1009       /* Have not read any field, thus input file is simply ended */
1010       if (item == m_opt_fields_or_vars.head()) break;
1011       for (; item; item = it++) {
1012         Item *real_item = item->real_item();
1013         if (real_item->type() == Item::FIELD_ITEM) {
1014           Field *field = ((Item_field *)real_item)->field;
1015           /*
1016             We set to 0. But if the field is DEFAULT NULL, the "null bit"
1017             turned on by restore_record() above remains so field will be NULL.
1018           */
1019           if (field->reset()) {
1020             my_error(ER_WARN_NULL_TO_NOTNULL, MYF(0), field->field_name,
1021                      thd->get_stmt_da()->current_row_for_condition());
1022             return true;
1023           }
1024           if (field->type() == FIELD_TYPE_TIMESTAMP && !field->is_nullable())
1025             // Specific of TIMESTAMP NOT NULL: set to CURRENT_TIMESTAMP.
1026             Item_func_now_local::store_in(field);
1027           /*
1028             QQ: We probably should not throw warning for each field.
1029             But how about intention to always have the same number
1030             of warnings in THD::num_truncated_fields (and get rid of
1031             num_truncated_fields in the end?)
1032           */
1033           thd->num_truncated_fields++;
1034           push_warning_printf(thd, Sql_condition::SL_WARNING,
1035                               ER_WARN_TOO_FEW_RECORDS,
1036                               ER_THD(thd, ER_WARN_TOO_FEW_RECORDS),
1037                               thd->get_stmt_da()->current_row_for_condition());
1038         } else if (item->type() == Item::STRING_ITEM) {
1039           DBUG_ASSERT(nullptr !=
1040                       dynamic_cast<Item_user_var_as_out_param *>(item));
1041           ((Item_user_var_as_out_param *)item)
1042               ->set_null_value(read_info.read_charset);
1043         }
1044       }
1045     }
1046 
1047     if (thd->killed || fill_record_n_invoke_before_triggers(
1048                            thd, &info, m_opt_set_fields, m_opt_set_exprs, table,
1049                            TRG_EVENT_INSERT, table->s->fields, true, nullptr))
1050       return true;
1051 
1052     if (!table->triggers) {
1053       /*
1054         If there is no trigger for the table then check the NOT NULL constraint
1055         for every table field.
1056 
1057         For the table that has BEFORE-INSERT trigger installed checking for
1058         NOT NULL constraint is done inside function
1059         fill_record_n_invoke_before_triggers() after all trigger instructions
1060         has been executed.
1061       */
1062       it.rewind();
1063 
1064       while ((item = it++)) {
1065         Item *real_item = item->real_item();
1066         if (real_item->type() == Item::FIELD_ITEM)
1067           ((Item_field *)real_item)
1068               ->field->check_constraints(ER_WARN_NULL_TO_NOTNULL);
1069       }
1070     }
1071 
1072     if (thd->is_error()) return true;
1073 
1074     switch (table_list->view_check_option(thd)) {
1075       case VIEW_CHECK_SKIP:
1076         read_info.next_line();
1077         goto continue_loop;
1078       case VIEW_CHECK_ERROR:
1079         return true;
1080     }
1081 
1082     if (invoke_table_check_constraints(thd, table)) {
1083       if (thd->is_error()) return true;
1084       // continue when IGNORE clause is used.
1085       read_info.next_line();
1086       goto continue_loop;
1087     }
1088 
1089     err = write_record(thd, table, &info, nullptr);
1090     if (err) return true;
1091     /*
1092       We don't need to reset auto-increment field since we are restoring
1093       its default value at the beginning of each loop iteration.
1094     */
1095     if (read_info.next_line())  // Skip to next line
1096       break;
1097     if (read_info.line_truncated) {
1098       thd->num_truncated_fields++; /* Too long row */
1099       push_warning_printf(thd, Sql_condition::SL_WARNING,
1100                           ER_WARN_TOO_MANY_RECORDS,
1101                           ER_THD(thd, ER_WARN_TOO_MANY_RECORDS),
1102                           thd->get_stmt_da()->current_row_for_condition());
1103       if (thd->killed) return true;
1104     }
1105     thd->get_stmt_da()->inc_current_row_for_condition();
1106   continue_loop:;
1107   }
1108   return read_info.error;
1109 }
1110 
1111 /**
1112   Read rows in xml format
1113 
1114   @param thd         Pointer to THD object
1115   @param info        Pointer to COPY_INFO object
1116   @param table_list  Pointer to TABLE_LIST object
1117   @param read_info   Pointer to READ_INFO object
1118   @param skip_lines  Number of ignored lines
1119                      at the start of the file.
1120 
1121   @returns true if error
1122 */
read_xml_field(THD * thd,COPY_INFO & info,TABLE_LIST * table_list,READ_INFO & read_info,ulong skip_lines)1123 bool Sql_cmd_load_table::read_xml_field(THD *thd, COPY_INFO &info,
1124                                         TABLE_LIST *table_list,
1125                                         READ_INFO &read_info,
1126                                         ulong skip_lines) {
1127   List_iterator_fast<Item> it(m_opt_fields_or_vars);
1128   Item *item;
1129   TABLE *table = table_list->table;
1130   const CHARSET_INFO *cs = read_info.read_charset;
1131   DBUG_TRACE;
1132 
1133   for (;; it.rewind()) {
1134     if (thd->killed) {
1135       thd->send_kill_message();
1136       return true;
1137     }
1138 
1139     // read row tag and save values into tag list
1140     if (read_info.read_xml()) break;
1141 
1142     List_iterator_fast<XML_TAG> xmlit(read_info.taglist);
1143     xmlit.rewind();
1144     XML_TAG *tag = nullptr;
1145 
1146 #ifndef DBUG_OFF
1147     DBUG_PRINT("read_xml_field", ("skip_lines=%d", (int)skip_lines));
1148     while ((tag = xmlit++)) {
1149       DBUG_PRINT("read_xml_field", ("got tag:%i '%s' '%s'", tag->level,
1150                                     tag->field.c_ptr(), tag->value.c_ptr()));
1151     }
1152 #endif
1153 
1154     restore_record(table, s->default_values);
1155     /*
1156       Check whether default values of the fields not specified in column list
1157       are correct or not.
1158     */
1159     if (validate_default_values_of_unset_fields(thd, table)) {
1160       read_info.error = true;
1161       break;
1162     }
1163 
1164     Autoinc_field_has_explicit_non_null_value_reset_guard after_each_row(table);
1165 
1166     while ((item = it++)) {
1167       /* If this line is to be skipped we don't want to fill field or var */
1168       if (skip_lines) continue;
1169 
1170       // Skip hidden generated columns.
1171       if (is_hidden_generated_column(table, item)) continue;
1172 
1173       /* find field in tag list */
1174       xmlit.rewind();
1175       tag = xmlit++;
1176 
1177       while (tag && strcmp(tag->field.c_ptr(), item->item_name.ptr()) != 0)
1178         tag = xmlit++;
1179 
1180       item = item->real_item();
1181 
1182       if (!tag)  // found null
1183       {
1184         if (item->type() == Item::FIELD_ITEM) {
1185           Field *field = (static_cast<Item_field *>(item))->field;
1186           field->reset();
1187           field->set_null();
1188           if (field == table->next_number_field)
1189             table->autoinc_field_has_explicit_non_null_value = true;
1190           if (!field->is_nullable()) {
1191             if (field->type() == FIELD_TYPE_TIMESTAMP)
1192               // Specific of TIMESTAMP NOT NULL: set to CURRENT_TIMESTAMP.
1193               Item_func_now_local::store_in(field);
1194             else if (field != table->next_number_field)
1195               field->set_warning(Sql_condition::SL_WARNING,
1196                                  ER_WARN_NULL_TO_NOTNULL, 1);
1197           }
1198         } else {
1199           DBUG_ASSERT(nullptr !=
1200                       dynamic_cast<Item_user_var_as_out_param *>(item));
1201           ((Item_user_var_as_out_param *)item)->set_null_value(cs);
1202         }
1203         continue;
1204       }
1205 
1206       if (item->type() == Item::FIELD_ITEM) {
1207         Field *field = ((Item_field *)item)->field;
1208         field->set_notnull();
1209         if (field == table->next_number_field)
1210           table->autoinc_field_has_explicit_non_null_value = true;
1211         field->store(tag->value.ptr(), tag->value.length(), cs);
1212       } else {
1213         DBUG_ASSERT(nullptr !=
1214                     dynamic_cast<Item_user_var_as_out_param *>(item));
1215         ((Item_user_var_as_out_param *)item)
1216             ->set_value(tag->value.ptr(), tag->value.length(), cs);
1217       }
1218     }
1219 
1220     if (read_info.error) break;
1221 
1222     if (skip_lines) {
1223       skip_lines--;
1224       continue;
1225     }
1226 
1227     if (item) {
1228       /* Have not read any field, thus input file is simply ended */
1229       if (item == m_opt_fields_or_vars.head()) break;
1230 
1231       for (; item; item = it++) {
1232         if (item->type() == Item::FIELD_ITEM) {
1233           /*
1234             QQ: We probably should not throw warning for each field.
1235             But how about intention to always have the same number
1236             of warnings in THD::num_truncated_fields (and get rid of
1237             num_truncated_fields in the end?)
1238           */
1239           thd->num_truncated_fields++;
1240           push_warning_printf(thd, Sql_condition::SL_WARNING,
1241                               ER_WARN_TOO_FEW_RECORDS,
1242                               ER_THD(thd, ER_WARN_TOO_FEW_RECORDS),
1243                               thd->get_stmt_da()->current_row_for_condition());
1244         } else {
1245           DBUG_ASSERT(nullptr !=
1246                       dynamic_cast<Item_user_var_as_out_param *>(item));
1247           ((Item_user_var_as_out_param *)item)->set_null_value(cs);
1248         }
1249       }
1250     }
1251 
1252     if (thd->killed || fill_record_n_invoke_before_triggers(
1253                            thd, &info, m_opt_set_fields, m_opt_set_exprs, table,
1254                            TRG_EVENT_INSERT, table->s->fields, true, nullptr))
1255       return true;
1256 
1257     switch (table_list->view_check_option(thd)) {
1258       case VIEW_CHECK_SKIP:
1259         goto continue_loop;
1260       case VIEW_CHECK_ERROR:
1261         return true;
1262     }
1263 
1264     if (invoke_table_check_constraints(thd, table)) {
1265       if (thd->is_error()) return true;
1266       // continue when IGNORE clause is used.
1267       goto continue_loop;
1268     }
1269 
1270     if (write_record(thd, table, &info, nullptr)) return true;
1271 
1272     /*
1273       We don't need to reset auto-increment field since we are restoring
1274       its default value at the beginning of each loop iteration.
1275     */
1276     thd->get_stmt_da()->inc_current_row_for_condition();
1277   continue_loop:;
1278   }
1279   return read_info.error || thd->is_error();
1280 } /* load xml end */
1281 
1282 /* Unescape all escape characters, mark \N as null */
1283 
unescape(char chr)1284 char READ_INFO::unescape(char chr) {
1285   /* keep this switch synchornous with the ESCAPE_CHARS macro */
1286   switch (chr) {
1287     case 'n':
1288       return '\n';
1289     case 't':
1290       return '\t';
1291     case 'r':
1292       return '\r';
1293     case 'b':
1294       return '\b';
1295     case '0':
1296       return 0;  // Ascii null
1297     case 'Z':
1298       return '\032';  // Win32 end of file
1299     case 'N':
1300       found_null = true;
1301 
1302       /* fall through */
1303     default:
1304       return chr;
1305   }
1306 }
1307 
1308 /*
1309   Read a line using buffering
1310   If last line is empty (in line mode) then it isn't outputed
1311 */
1312 
READ_INFO(File file_par,uint tot_length,const CHARSET_INFO * cs,const String & field_term,const String & line_start,const String & line_term,const String & enclosed_par,int escape,bool get_it_from_net,bool is_fifo)1313 READ_INFO::READ_INFO(File file_par, uint tot_length, const CHARSET_INFO *cs,
1314                      const String &field_term, const String &line_start,
1315                      const String &line_term, const String &enclosed_par,
1316                      int escape, bool get_it_from_net, bool is_fifo)
1317     : file(file_par),
1318       buff_length(tot_length),
1319       escape_char(escape),
1320       found_end_of_line(false),
1321       eof(false),
1322       need_end_io_cache(false),
1323       error(false),
1324       line_truncated(false),
1325       found_null(false),
1326       read_charset(cs) {
1327   /*
1328     Field and line terminators must be interpreted as sequence of unsigned char.
1329     Otherwise, non-ascii terminators will be negative on some platforms,
1330     and positive on others (depending on the implementation of char).
1331   */
1332   field_term_ptr =
1333       static_cast<const uchar *>(static_cast<const void *>(field_term.ptr()));
1334   field_term_length = field_term.length();
1335   line_term_ptr =
1336       static_cast<const uchar *>(static_cast<const void *>(line_term.ptr()));
1337   line_term_length = line_term.length();
1338 
1339   level = 0; /* for load xml */
1340   if (line_start.length() == 0) {
1341     line_start_ptr = nullptr;
1342     start_of_line = false;
1343   } else {
1344     line_start_ptr = line_start.ptr();
1345     line_start_end = line_start_ptr + line_start.length();
1346     start_of_line = true;
1347   }
1348   /* If field_terminator == line_terminator, don't use line_terminator */
1349   if (field_term_length == line_term_length &&
1350       !memcmp(field_term_ptr, line_term_ptr, field_term_length)) {
1351     line_term_length = 0;
1352     line_term_ptr = nullptr;
1353   }
1354   enclosed_char = (enclosed_length = enclosed_par.length())
1355                       ? (uchar)enclosed_par[0]
1356                       : INT_MAX;
1357   field_term_char = field_term_length ? field_term_ptr[0] : INT_MAX;
1358   line_term_char = line_term_length ? line_term_ptr[0] : INT_MAX;
1359 
1360   /* Set of a stack for unget if long terminators */
1361   size_t length =
1362       max<size_t>(cs->mbmaxlen, max(field_term_length, line_term_length)) + 1;
1363   length = std::max(length, line_start.length());
1364   stack = stack_pos = (int *)(*THR_MALLOC)->Alloc(sizeof(int) * length);
1365 
1366   if (!(buffer = (uchar *)my_malloc(key_memory_READ_INFO, buff_length + 1,
1367                                     MYF(MY_WME))))
1368     error = true; /* purecov: inspected */
1369   else {
1370     end_of_buff = buffer + buff_length;
1371     if (init_io_cache(
1372             &cache, (get_it_from_net) ? -1 : file, 0,
1373             (get_it_from_net) ? READ_NET : (is_fifo ? READ_FIFO : READ_CACHE),
1374             0L, true, MYF(MY_WME))) {
1375       my_free(buffer); /* purecov: inspected */
1376       buffer = nullptr;
1377       error = true;
1378     } else {
1379       /*
1380         init_io_cache() will not initialize read_function member
1381         if the cache is READ_NET. So we work around the problem with a
1382         manual assignment
1383       */
1384       need_end_io_cache = true;
1385 
1386       if (get_it_from_net) cache.read_function = _my_b_net_read;
1387 
1388       if (mysql_bin_log.is_open())
1389         cache.pre_read = cache.pre_close = (IO_CACHE_CALLBACK)log_loaded_block;
1390     }
1391   }
1392 }
1393 
~READ_INFO()1394 READ_INFO::~READ_INFO() {
1395   if (need_end_io_cache) ::end_io_cache(&cache);
1396 
1397   if (buffer != nullptr) my_free(buffer);
1398   List_iterator<XML_TAG> xmlit(taglist);
1399   XML_TAG *t;
1400   while ((t = xmlit++)) delete (t);
1401 }
1402 
1403 /**
1404   The logic here is similar with my_mbcharlen, except for GET and PUSH
1405 
1406   @param[in]  cs  charset info
1407   @param[in]  chr the first char of sequence
1408   @param[out] len the length of multi-byte char
1409 */
1410 #define GET_MBCHARLEN(cs, chr, len)                     \
1411   do {                                                  \
1412     len = my_mbcharlen((cs), (chr));                    \
1413     if (len == 0 && my_mbmaxlenlen((cs)) == 2) {        \
1414       int chr1 = GET;                                   \
1415       if (chr1 != my_b_EOF) {                           \
1416         len = my_mbcharlen_2((cs), (chr), chr1);        \
1417         /* Character is gb18030 or invalid (len = 0) */ \
1418         DBUG_ASSERT(len == 0 || len == 2 || len == 4);  \
1419       }                                                 \
1420       if (len != 0) PUSH(chr1);                         \
1421     }                                                   \
1422   } while (0)
1423 
1424 /**
1425   Skip the terminator string (if any) in the input stream.
1426 
1427   @param ptr    Terminator string.
1428   @param length Terminator string length.
1429 
1430   @returns false if terminator was found and skipped,
1431            true if terminator was not found
1432 */
terminator(const uchar * ptr,size_t length)1433 inline bool READ_INFO::terminator(const uchar *ptr, size_t length) {
1434   int chr = 0;
1435   size_t i;
1436   for (i = 1; i < length; i++) {
1437     chr = GET;
1438     if (chr != *++ptr) {
1439       break;
1440     }
1441   }
1442   if (i == length) return true;
1443   PUSH(chr);
1444   while (i-- > 1) PUSH(*--ptr);
1445   return false;
1446 }
1447 
1448 /**
1449   @returns true if error. If READ_INFO::error is true, then error is fatal (OOM
1450            or charset error). Otherwise see READ_INFO::found_end_of_line for
1451            unexpected EOL error or READ_INFO::eof for EOF error respectively.
1452 */
read_field()1453 bool READ_INFO::read_field() {
1454   int chr, found_enclosed_char;
1455   uchar *to, *new_buffer;
1456 
1457   found_null = false;
1458   if (found_end_of_line) return true;  // One have to call next_line
1459 
1460   /* Skip until we find 'line_start' */
1461 
1462   if (start_of_line) {  // Skip until line_start
1463     start_of_line = false;
1464     if (find_start_of_fields()) return true;
1465   }
1466   if ((chr = GET) == my_b_EOF) {
1467     found_end_of_line = eof = true;
1468     return true;
1469   }
1470   to = buffer;
1471   if (chr == enclosed_char) {
1472     found_enclosed_char = enclosed_char;
1473     *to++ = (uchar)chr;  // If error
1474   } else {
1475     found_enclosed_char = INT_MAX;
1476     PUSH(chr);
1477   }
1478 
1479   for (;;) {
1480     bool escaped_mb = false;
1481     while (to < end_of_buff) {
1482       chr = GET;
1483       if (chr == my_b_EOF) goto found_eof;
1484       if (chr == escape_char) {
1485         if ((chr = GET) == my_b_EOF) {
1486           *to++ = (uchar)escape_char;
1487           goto found_eof;
1488         }
1489         /*
1490           When escape_char == enclosed_char, we treat it like we do for
1491           handling quotes in SQL parsing -- you can double-up the
1492           escape_char to include it literally, but it doesn't do escapes
1493           like \n. This allows: LOAD DATA ... ENCLOSED BY '"' ESCAPED BY '"'
1494           with data like: "fie""ld1", "field2"
1495          */
1496         if (escape_char != enclosed_char || chr == escape_char) {
1497           uint ml;
1498           GET_MBCHARLEN(read_charset, chr, ml);
1499           /*
1500             For escaped multibyte character, push back the first byte,
1501             and will handle it below.
1502             Because multibyte character's second byte is possible to be
1503             0x5C, per Query_result_export::send_data, both head byte and
1504             tail byte are escaped for such characters. So mark it if the
1505             head byte is escaped and will handle it below.
1506           */
1507           if (ml == 1)
1508             *to++ = (uchar)unescape((char)chr);
1509           else {
1510             escaped_mb = true;
1511             PUSH(chr);
1512           }
1513           continue;
1514         }
1515         PUSH(chr);
1516         chr = escape_char;
1517       }
1518       if (chr == line_term_char && found_enclosed_char == INT_MAX) {
1519         if (terminator(line_term_ptr,
1520                        line_term_length)) {  // Maybe unexpected linefeed
1521           enclosed = false;
1522           found_end_of_line = true;
1523           row_start = buffer;
1524           row_end = to;
1525           return false;
1526         }
1527       }
1528       if (chr == found_enclosed_char) {
1529         if ((chr = GET) == found_enclosed_char) {  // Remove dupplicated
1530           *to++ = (uchar)chr;
1531           continue;
1532         }
1533         // End of enclosed field if followed by field_term or line_term
1534         if (chr == my_b_EOF ||
1535             (chr == line_term_char &&
1536              terminator(line_term_ptr,
1537                         line_term_length))) {  // Maybe unexpected linefeed
1538           enclosed = true;
1539           found_end_of_line = true;
1540           row_start = buffer + 1;
1541           row_end = to;
1542           return false;
1543         }
1544         if (chr == field_term_char &&
1545             terminator(field_term_ptr, field_term_length)) {
1546           enclosed = true;
1547           row_start = buffer + 1;
1548           row_end = to;
1549           return false;
1550         }
1551         /*
1552           The string didn't terminate yet.
1553           Store back next character for the loop
1554         */
1555         PUSH(chr);
1556         /* copy the found term character to 'to' */
1557         chr = found_enclosed_char;
1558       } else if (chr == field_term_char && found_enclosed_char == INT_MAX) {
1559         if (terminator(field_term_ptr, field_term_length)) {
1560           enclosed = false;
1561           row_start = buffer;
1562           row_end = to;
1563           return false;
1564         }
1565       }
1566 
1567       uint ml;
1568       GET_MBCHARLEN(read_charset, chr, ml);
1569       if (ml == 0) {
1570         *to = '\0';
1571         my_error(ER_INVALID_CHARACTER_STRING, MYF(0), read_charset->csname,
1572                  buffer);
1573         error = true;
1574         return true;
1575       }
1576 
1577       if (ml > 1 && to + ml <= end_of_buff) {
1578         uchar *p = to;
1579         *to++ = chr;
1580 
1581         for (uint i = 1; i < ml; i++) {
1582           chr = GET;
1583           if (chr == my_b_EOF) {
1584             /*
1585              Need to back up the bytes already ready from illformed
1586              multi-byte char
1587             */
1588             to -= i;
1589             goto found_eof;
1590           } else if (chr == escape_char && escaped_mb) {
1591             // Unescape the second byte if it is escaped.
1592             chr = GET;
1593             chr = (uchar)unescape((char)chr);
1594           }
1595           *to++ = chr;
1596         }
1597         if (escaped_mb) escaped_mb = false;
1598         if (my_ismbchar(read_charset, (const char *)p, (const char *)to))
1599           continue;
1600         for (uint i = 0; i < ml; i++) PUSH(*--to);
1601         chr = GET;
1602       } else if (ml > 1) {
1603         // Buffer is too small, exit while loop, and reallocate.
1604         PUSH(chr);
1605         break;
1606       }
1607       *to++ = (uchar)chr;
1608     }
1609     /*
1610     ** We come here if buffer is too small. Enlarge it and continue
1611     */
1612     if (!(new_buffer =
1613               (uchar *)my_realloc(key_memory_READ_INFO, (char *)buffer,
1614                                   buff_length + 1 + IO_SIZE, MYF(MY_WME)))) {
1615       error = true;
1616       return true;
1617     }
1618     to = new_buffer + (to - buffer);
1619     buffer = new_buffer;
1620     buff_length += IO_SIZE;
1621     end_of_buff = buffer + buff_length;
1622   }
1623 
1624 found_eof:
1625   enclosed = false;
1626   found_end_of_line = eof = true;
1627   row_start = buffer;
1628   row_end = to;
1629   return false;
1630 }
1631 
1632 /**
1633   Read a row with fixed length.
1634 
1635   @note
1636     The row may not be fixed size on disk if there are escape
1637     characters in the file.
1638 
1639   @note
1640     One can't use fixed length with multi-byte charset **
1641 
1642   @returns true if error (unexpected end of file/line)
1643 */
read_fixed_length()1644 bool READ_INFO::read_fixed_length() {
1645   int chr;
1646   uchar *to;
1647   if (found_end_of_line) return true;  // One have to call next_line
1648 
1649   if (start_of_line) {  // Skip until line_start
1650     start_of_line = false;
1651     if (find_start_of_fields()) return true;
1652   }
1653 
1654   to = row_start = buffer;
1655   while (to < end_of_buff) {
1656     if ((chr = GET) == my_b_EOF) goto found_eof;
1657     if (chr == escape_char) {
1658       if ((chr = GET) == my_b_EOF) {
1659         *to++ = (uchar)escape_char;
1660         goto found_eof;
1661       }
1662       *to++ = (uchar)unescape((char)chr);
1663       continue;
1664     }
1665     if (chr == line_term_char) {
1666       if (terminator(line_term_ptr,
1667                      line_term_length)) {  // Maybe unexpected linefeed
1668         found_end_of_line = true;
1669         row_end = to;
1670         return false;
1671       }
1672     }
1673     *to++ = (uchar)chr;
1674   }
1675   row_end = to;  // Found full line
1676   return false;
1677 
1678 found_eof:
1679   found_end_of_line = eof = true;
1680   row_start = buffer;
1681   row_end = to;
1682   return to == buffer;
1683 }
1684 
1685 /**
1686   @returns true if error (unexpected end of file/line)
1687 */
next_line()1688 bool READ_INFO::next_line() {
1689   line_truncated = false;
1690   start_of_line = line_start_ptr != nullptr;
1691   if (found_end_of_line || eof) {
1692     found_end_of_line = false;
1693     return eof;
1694   }
1695   found_end_of_line = false;
1696   if (!line_term_length) return false;  // No lines
1697   for (;;) {
1698     int chr = GET;
1699     uint ml;
1700     if (chr == my_b_EOF) {
1701       eof = true;
1702       return true;
1703     }
1704     GET_MBCHARLEN(read_charset, chr, ml);
1705     if (ml > 1) {
1706       for (uint i = 1; chr != my_b_EOF && i < ml; i++) chr = GET;
1707       if (chr == escape_char) continue;
1708     }
1709     if (chr == my_b_EOF) {
1710       eof = true;
1711       return true;
1712     }
1713     if (chr == escape_char) {
1714       line_truncated = true;
1715       if (GET == my_b_EOF) return true;
1716       continue;
1717     }
1718     if (chr == line_term_char && terminator(line_term_ptr, line_term_length))
1719       return false;
1720     line_truncated = true;
1721   }
1722 }
1723 
1724 /**
1725   @returns true if error (unexpected end of file/line)
1726 */
find_start_of_fields()1727 bool READ_INFO::find_start_of_fields() {
1728   int chr;
1729 try_again:
1730   do {
1731     if ((chr = GET) == my_b_EOF) {
1732       found_end_of_line = eof = true;
1733       return true;
1734     }
1735   } while ((char)chr != line_start_ptr[0]);
1736   for (const char *ptr = line_start_ptr + 1; ptr != line_start_end; ptr++) {
1737     chr = GET;                // Eof will be checked later
1738     if ((char)chr != *ptr) {  // Can't be line_start
1739       PUSH(chr);
1740       while (--ptr != line_start_ptr) {  // Restart with next char
1741         PUSH(*ptr);
1742       }
1743       goto try_again;
1744     }
1745   }
1746   return false;
1747 }
1748 
1749 /*
1750   Clear taglist from tags with a specified level
1751 */
clear_level(int level_arg)1752 void READ_INFO::clear_level(int level_arg) {
1753   DBUG_TRACE;
1754   List_iterator<XML_TAG> xmlit(taglist);
1755   xmlit.rewind();
1756   XML_TAG *tag;
1757 
1758   while ((tag = xmlit++)) {
1759     if (tag->level >= level_arg) {
1760       xmlit.remove();
1761       delete tag;
1762     }
1763   }
1764 }
1765 
1766 /*
1767   Convert an XML entity to Unicode value.
1768   Return -1 on error;
1769 */
my_xml_entity_to_char(const char * name,size_t length)1770 static int my_xml_entity_to_char(const char *name, size_t length) {
1771   if (length == 2) {
1772     if (!memcmp(name, "gt", length)) return '>';
1773     if (!memcmp(name, "lt", length)) return '<';
1774   } else if (length == 3) {
1775     if (!memcmp(name, "amp", length)) return '&';
1776   } else if (length == 4) {
1777     if (!memcmp(name, "quot", length)) return '"';
1778     if (!memcmp(name, "apos", length)) return '\'';
1779   }
1780   return -1;
1781 }
1782 
1783 /**
1784   @brief Convert newline, linefeed, tab to space
1785 
1786   @param chr    character
1787 
1788   @details According to the "XML 1.0" standard,
1789            only space (@#x20) characters, carriage returns,
1790            line feeds or tabs are considered as spaces.
1791            Convert all of them to space (@#x20) for parsing simplicity.
1792 */
my_tospace(int chr)1793 static int my_tospace(int chr) {
1794   return (chr == '\t' || chr == '\r' || chr == '\n') ? ' ' : chr;
1795 }
1796 
1797 /*
1798   Read an xml value: handle multibyte and xml escape
1799 
1800   @param      delim  Delimiter character.
1801   @param[out] val    Resulting value string.
1802 
1803   @returns next character after delim
1804            or
1805            my_b_EOF in case of charset error/unexpected EOF.
1806 */
read_value(int delim,String * val)1807 int READ_INFO::read_value(int delim, String *val) {
1808   int chr;
1809   String tmp;
1810 
1811   for (chr = GET; my_tospace(chr) != delim && chr != my_b_EOF;) {
1812     uint ml;
1813     GET_MBCHARLEN(read_charset, chr, ml);
1814     if (ml == 0) {
1815       chr = my_b_EOF;
1816       val->length(0);
1817       return chr;
1818     }
1819 
1820     if (ml > 1) {
1821       DBUG_PRINT("read_xml", ("multi byte"));
1822 
1823       for (uint i = 1; i < ml; i++) {
1824         val->append(chr);
1825         /*
1826           Don't use my_tospace() in the middle of a multi-byte character
1827           TODO: check that the multi-byte sequence is valid.
1828         */
1829         chr = GET;
1830         if (chr == my_b_EOF) return chr;
1831       }
1832     }
1833     if (chr == '&') {
1834       tmp.length(0);
1835       for (chr = my_tospace(GET); chr != ';'; chr = my_tospace(GET)) {
1836         if (chr == my_b_EOF) return chr;
1837         tmp.append(chr);
1838       }
1839       if ((chr = my_xml_entity_to_char(tmp.ptr(), tmp.length())) >= 0)
1840         val->append(chr);
1841       else {
1842         val->append('&');
1843         val->append(tmp);
1844         val->append(';');
1845       }
1846     } else
1847       val->append(chr);
1848     chr = GET;
1849   }
1850   return my_tospace(chr);
1851 }
1852 
1853 /*
1854   Read CDATA value if any.
1855   Ignore multibyte and XML escape.
1856   Note: the last character read must be '<' before calling this function.
1857 
1858   @param[out] val           Resulting CDATA string.
1859   @param[out] have_cdata    Set if really read CDATA.
1860 
1861   @returns    Last character read or
1862               my_b_EOF in case of unexpected EOF.
1863 */
read_cdata(String * val,bool * have_cdata)1864 int READ_INFO::read_cdata(String *val, bool *have_cdata) {
1865   const char cdata_head[] = "![CDATA[";
1866   const char *head_ptr = cdata_head;
1867 
1868   /* Check for CDATA head "![CDATA[" */
1869   for (size_t i = 0; i < strlen(cdata_head); i++) {
1870     int chr = GET;
1871 
1872     if (chr != *head_ptr++) {
1873       /*
1874         Didn't find "![CDATA[" head,
1875         push back the last (unmatched) character
1876       */
1877       PUSH(chr);
1878       /* and all matched from the head. */
1879       while (i--) PUSH(*--head_ptr);
1880 
1881       *have_cdata = false;
1882       return '<';
1883     }
1884   }
1885 
1886   int tail[3]{0};
1887   for (tail[2] = GET; tail[2] != my_b_EOF; tail[2] = GET) {
1888     /* Check for CDATA tail "]]>" */
1889     if (tail[0] == ']' && tail[1] == ']' && tail[2] == '>') {
1890       /* Cut last two characters ("]]") which were appended to val. */
1891       DBUG_ASSERT(val->length() >= 2);
1892       val->length(val->length() - 2);
1893 
1894       *have_cdata = true;
1895       return '>';
1896     }
1897     /* Shift the tail */
1898     tail[0] = tail[1];
1899     tail[1] = tail[2];
1900 
1901     val->append(tail[2]);
1902   }
1903 
1904   /* Didn't find CDATA tail "]]>", the last character read must be my_b_EOF. */
1905   DBUG_ASSERT(tail[2] == my_b_EOF);
1906   *have_cdata = false;
1907   return my_b_EOF;
1908 }
1909 
1910 /*
1911   Read a record in xml format
1912   tags and attributes are stored in taglist
1913   when tag set in ROWS IDENTIFIED BY is closed, we are ready and return
1914 
1915   @returns true if error (unexpected end of file)
1916 */
read_xml()1917 bool READ_INFO::read_xml() {
1918   DBUG_TRACE;
1919   int chr, chr2, chr3;
1920   int delim = 0;
1921   String tag, attribute, value;
1922   bool in_tag = false;
1923 
1924   tag.length(0);
1925   attribute.length(0);
1926   value.length(0);
1927 
1928   for (chr = my_tospace(GET); chr != my_b_EOF;) {
1929     switch (chr) {
1930       case '<': /* read tag */
1931         /* TODO: check if this is a comment <!-- comment -->  */
1932         chr = my_tospace(GET);
1933         if (chr == '!') {
1934           chr2 = GET;
1935           chr3 = GET;
1936 
1937           if (chr2 == '-' && chr3 == '-') {
1938             chr2 = 0;
1939             chr3 = 0;
1940             chr = my_tospace(GET);
1941 
1942             while (chr != '>' || chr2 != '-' || chr3 != '-') {
1943               if (chr == '-') {
1944                 chr3 = chr2;
1945                 chr2 = chr;
1946               } else if (chr2 == '-') {
1947                 chr2 = 0;
1948                 chr3 = 0;
1949               }
1950               chr = my_tospace(GET);
1951               if (chr == my_b_EOF) goto found_eof;
1952             }
1953             break;
1954           }
1955         }
1956 
1957         tag.length(0);
1958         while (chr != '>' && chr != ' ' && chr != '/' && chr != my_b_EOF) {
1959           if (chr != delim) /* fix for the '<field name =' format */
1960             tag.append(chr);
1961           chr = my_tospace(GET);
1962         }
1963 
1964         // row tag should be in ROWS IDENTIFIED BY '<row>' - stored in line_term
1965         if ((tag.length() == line_term_length - 2) &&
1966             (memcmp(tag.ptr(), line_term_ptr + 1, tag.length()) == 0)) {
1967           DBUG_PRINT("read_xml", ("start-of-row: %i %s %s", level,
1968                                   tag.c_ptr_safe(), line_term_ptr));
1969         }
1970 
1971         if (chr == ' ' || chr == '>') {
1972           level++;
1973           clear_level(level + 1);
1974         }
1975 
1976         if (chr == ' ')
1977           in_tag = true;
1978         else
1979           in_tag = false;
1980         break;
1981 
1982       case ' ':            /* read attribute */
1983         while (chr == ' ') /* skip blanks */
1984           chr = my_tospace(GET);
1985 
1986         if (!in_tag) break;
1987 
1988         while (chr != '=' && chr != '/' && chr != '>' && chr != my_b_EOF) {
1989           attribute.append(chr);
1990           chr = my_tospace(GET);
1991         }
1992         break;
1993 
1994       case '>': /* end tag - read tag value */
1995         in_tag = false;
1996         /* Skip all whitespaces */
1997         while (' ' == (chr = my_tospace(GET))) {
1998         }
1999         /*
2000           Push the first non-whitespace char back to Stack. This char would be
2001           read in the upcoming call to read_value()
2002          */
2003         PUSH(chr);
2004 
2005         /* Read <![CDATA[ ... ]]> and tag's value. */
2006         bool have_cdata;
2007         do {
2008           chr = read_value('<', &value);
2009           if (chr == my_b_EOF) goto found_eof;
2010 
2011           chr = read_cdata(&value, &have_cdata);
2012           if (chr == my_b_EOF) goto found_eof;
2013         } while (have_cdata);
2014 
2015         /* save value to list */
2016         if (tag.length() > 0 && value.length() > 0) {
2017           DBUG_PRINT("read_xml", ("lev:%i tag:%s val:%s", level,
2018                                   tag.c_ptr_safe(), value.c_ptr_safe()));
2019           taglist.push_front(new XML_TAG(level, tag, value));
2020         }
2021         tag.length(0);
2022         value.length(0);
2023         attribute.length(0);
2024         break;
2025 
2026       case '/': /* close tag */
2027         chr = my_tospace(GET);
2028         /* Decrease the 'level' only when (i) It's not an */
2029         /* (without space) empty tag i.e. <tag/> or, (ii) */
2030         /* It is of format <row col="val" .../>           */
2031         if (chr != '>' || in_tag) {
2032           level--;
2033           in_tag = false;
2034         }
2035         if (chr != '>')  /* if this is an empty tag <tag   /> */
2036           tag.length(0); /* we should keep tag value          */
2037         while (chr != '>' && chr != my_b_EOF) {
2038           tag.append(chr);
2039           chr = my_tospace(GET);
2040         }
2041 
2042         if ((tag.length() == line_term_length - 2) &&
2043             (memcmp(tag.ptr(), line_term_ptr + 1, tag.length()) == 0)) {
2044           DBUG_PRINT("read_xml",
2045                      ("found end-of-row %i %s", level, tag.c_ptr_safe()));
2046           return false;  // normal return
2047         }
2048         chr = my_tospace(GET);
2049         break;
2050 
2051       case '=': /* attribute name end - read the value */
2052         // check for tag field and attribute name
2053         if (!memcmp(tag.c_ptr_safe(), STRING_WITH_LEN("field")) &&
2054             !memcmp(attribute.c_ptr_safe(), STRING_WITH_LEN("name"))) {
2055           /*
2056             this is format <field name="xx">xx</field>
2057             where actual fieldname is in attribute
2058           */
2059           delim = my_tospace(GET);
2060           tag.length(0);
2061           attribute.length(0);
2062           chr = '<'; /* we pretend that it is a tag */
2063           level--;
2064           break;
2065         }
2066 
2067         // check for " or '
2068         chr = GET;
2069         if (chr == my_b_EOF) goto found_eof;
2070         if (chr == '"' || chr == '\'') {
2071           delim = chr;
2072         } else {
2073           delim = ' '; /* no delimiter, use space */
2074           PUSH(chr);
2075         }
2076 
2077         chr = read_value(delim, &value);
2078         if (attribute.length() > 0 && value.length() > 0) {
2079           DBUG_PRINT("read_xml", ("lev:%i att:%s val:%s\n", level + 1,
2080                                   attribute.c_ptr_safe(), value.c_ptr_safe()));
2081           taglist.push_front(new XML_TAG(level + 1, attribute, value));
2082         }
2083         attribute.length(0);
2084         value.length(0);
2085         if (chr != ' ') chr = my_tospace(GET);
2086         break;
2087 
2088       default:
2089         chr = my_tospace(GET);
2090     } /* end switch */
2091   }   /* end while */
2092 
2093 found_eof:
2094   DBUG_PRINT("read_xml", ("Found eof"));
2095   eof = true;
2096   return true;
2097 }
2098 
execute(THD * thd)2099 bool Sql_cmd_load_table::execute(THD *thd) {
2100   LEX *const lex = thd->lex;
2101 
2102   uint privilege =
2103       (lex->duplicates == DUP_REPLACE ? INSERT_ACL | DELETE_ACL : INSERT_ACL) |
2104       (m_is_local_file ? 0 : FILE_ACL);
2105 
2106   if (m_is_local_file) {
2107     if (!thd->get_protocol()->has_client_capability(CLIENT_LOCAL_FILES) ||
2108         !opt_local_infile) {
2109       my_error(ER_CLIENT_LOCAL_FILES_DISABLED, MYF(0));
2110       return true;
2111     }
2112   }
2113 
2114   if (check_one_table_access(thd, privilege, lex->query_tables)) return true;
2115 
2116   /* Push strict / ignore error handler */
2117   Ignore_error_handler ignore_handler;
2118   Strict_error_handler strict_handler;
2119   if (thd->lex->is_ignore())
2120     thd->push_internal_handler(&ignore_handler);
2121   else if (thd->is_strict_mode())
2122     thd->push_internal_handler(&strict_handler);
2123 
2124   bool res = execute_inner(thd, lex->duplicates);
2125 
2126   /* Pop ignore / strict error handler */
2127   if (thd->lex->is_ignore() || thd->is_strict_mode())
2128     thd->pop_internal_handler();
2129 
2130   return res;
2131 }
2132