1 /*
2    Copyright (c) 2000, 2021, Oracle and/or its affiliates.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
23 
24 
25 /* Copy data from a textfile to table */
26 /* 2006-12 Erik Wetterberg : LOAD XML added */
27 
28 #include "sql_load.h"
29 #include "sql_cache.h"                          // query_cache_*
30 #include "sql_base.h"          // fill_record_n_invoke_before_triggers
31 #include <my_dir.h>
32 #include "sql_view.h"                           // check_key_in_view
33 #include "sql_insert.h" // check_that_all_fields_are_given_values,
34                         // prepare_triggers_for_insert_stmt,
35                         // write_record
36 #include "auth_common.h"// INSERT_ACL, UPDATE_ACL
37 #include "log_event.h"  // Delete_file_log_event,
38                         // Execute_load_query_log_event,
39                         // LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F
40 #include <m_ctype.h>
41 #include "rpl_mi.h"
42 #include "rpl_slave.h"
43 #include "table_trigger_dispatcher.h"  // Table_trigger_dispatcher
44 #include "sql_show.h"
45 #include "item_timefunc.h"  // Item_func_now_local
46 #include "rpl_rli.h"     // Relay_log_info
47 #include "log.h"
48 
49 #include "pfs_file_provider.h"
50 #include "mysql/psi/mysql_file.h"
51 
52 #include <algorithm>
53 
54 using std::min;
55 using std::max;
56 
57 class XML_TAG {
58 public:
59   int level;
60   String field;
61   String value;
62   XML_TAG(int l, String f, String v);
63 };
64 
65 
XML_TAG(int l,String f,String v)66 XML_TAG::XML_TAG(int l, String f, String v)
67 {
68   level= l;
69   field.append(f);
70   value.append(v);
71 }
72 
73 
74 #define GET (stack_pos != stack ? *--stack_pos : my_b_get(&cache))
75 #define PUSH(A) *(stack_pos++)=(A)
76 
77 class READ_INFO {
78   File	file;
79   uchar	*buffer,			/* Buffer for read text */
80 	*end_of_buff;			/* Data in bufferts ends here */
81   uint	buff_length;			/* Length of buffer */
82   const uchar *field_term_ptr, *line_term_ptr;
83   const char *line_start_ptr, *line_start_end;
84   size_t	field_term_length,line_term_length,enclosed_length;
85   int	field_term_char,line_term_char,enclosed_char,escape_char;
86   int	*stack,*stack_pos;
87   bool	found_end_of_line,start_of_line,eof;
88   bool  need_end_io_cache;
89   IO_CACHE cache;
90   int level; /* for load xml */
91 
92 public:
93   bool error,line_cuted,found_null,enclosed;
94   uchar	*row_start,			/* Found row starts here */
95 	*row_end;			/* Found row ends here */
96   const CHARSET_INFO *read_charset;
97 
98   READ_INFO(File file,uint tot_length,const CHARSET_INFO *cs,
99 	    const String &field_term,
100             const String &line_start,
101             const String &line_term,
102 	    const String &enclosed,
103             int escape,bool get_it_from_net, bool is_fifo);
104   ~READ_INFO();
105   int read_field();
106   int read_fixed_length(void);
107   int next_line(void);
108   char unescape(char chr);
109   int terminator(const uchar *ptr, size_t length);
110   bool find_start_of_fields();
111   /* load xml */
112   List<XML_TAG> taglist;
113   int read_value(int delim, String *val);
114   int read_xml();
115   int clear_level(int level);
116 
117   /*
118     We need to force cache close before destructor is invoked to log
119     the last read block
120   */
end_io_cache()121   void end_io_cache()
122   {
123     ::end_io_cache(&cache);
124     need_end_io_cache = 0;
125   }
126 
127   /*
128     Either this method, or we need to make cache public
129     Arg must be set from mysql_load() since constructor does not see
130     either the table or THD value
131   */
set_io_cache_arg(void * arg)132   void set_io_cache_arg(void* arg) { cache.arg = arg; }
133 
134   /**
135     skip all data till the eof.
136   */
skip_data_till_eof()137   void skip_data_till_eof()
138   {
139     while (GET != my_b_EOF)
140       ;
141   }
142 };
143 
144 static int read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
145                              List<Item> &fields_vars, List<Item> &set_fields,
146                              List<Item> &set_values, READ_INFO &read_info,
147 			     ulong skip_lines);
148 static int read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
149                           List<Item> &fields_vars, List<Item> &set_fields,
150                           List<Item> &set_values, READ_INFO &read_info,
151 			  const String &enclosed, ulong skip_lines);
152 
153 static int read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
154                           List<Item> &fields_vars, List<Item> &set_fields,
155                           List<Item> &set_values, READ_INFO &read_info,
156                           ulong skip_lines);
157 
158 #ifndef EMBEDDED_LIBRARY
159 static bool write_execute_load_query_log_event(THD *thd, sql_exchange* ex,
160                                                const char* db_arg, /* table's database */
161                                                const char* table_name_arg,
162                                                bool is_concurrent,
163                                                enum enum_duplicates duplicates,
164                                                bool transactional_table,
165                                                int errocode);
166 #endif /* EMBEDDED_LIBRARY */
167 
168 /*
169   Execute LOAD DATA query
170 
171   SYNOPSYS
172     mysql_load()
173       thd - current thread
174       ex  - sql_exchange object representing source file and its parsing rules
175       table_list  - list of tables to which we are loading data
176       fields_vars - list of fields and variables to which we read
177                     data from file
178       set_fields  - list of fields mentioned in set clause
179       set_values  - expressions to assign to fields in previous list
180       handle_duplicates - indicates whenever we should emit error or
181                           replace row if we will meet duplicates.
182       read_file_from_client - is this LOAD DATA LOCAL ?
183 
184   RETURN VALUES
185     TRUE - error / FALSE - success
186 */
187 
mysql_load(THD * thd,sql_exchange * ex,TABLE_LIST * table_list,List<Item> & fields_vars,List<Item> & set_fields,List<Item> & set_values,enum enum_duplicates handle_duplicates,bool read_file_from_client)188 int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
189 	        List<Item> &fields_vars, List<Item> &set_fields,
190                 List<Item> &set_values,
191                 enum enum_duplicates handle_duplicates,
192                 bool read_file_from_client)
193 {
194   char name[FN_REFLEN];
195   File file;
196   int error= 0;
197   const String *field_term= ex->field.field_term;
198   const String *escaped=    ex->field.escaped;
199   const String *enclosed=   ex->field.enclosed;
200   bool is_fifo=0;
201   SELECT_LEX *select= thd->lex->select_lex;
202 #ifndef EMBEDDED_LIBRARY
203   LOAD_FILE_INFO lf_info;
204   THD::killed_state killed_status= THD::NOT_KILLED;
205   bool is_concurrent;
206   bool transactional_table;
207 #endif
208   const char *db = table_list->db;			// This is never null
209   /*
210     If path for file is not defined, we will use the current database.
211     If this is not set, we will use the directory where the table to be
212     loaded is located
213   */
214   const char *tdb= thd->db().str ? thd->db().str : db; //Result is never null
215   ulong skip_lines= ex->skip_lines;
216   DBUG_ENTER("mysql_load");
217 
218   /*
219     Bug #34283
220     mysqlbinlog leaves tmpfile after termination if binlog contains
221     load data infile, so in mixed mode we go to row-based for
222     avoiding the problem.
223   */
224   thd->set_current_stmt_binlog_format_row_if_mixed();
225 
226 #ifdef EMBEDDED_LIBRARY
227   read_file_from_client  = 0; //server is always in the same process
228 #endif
229 
230   if (escaped->length() > 1 || enclosed->length() > 1)
231   {
232     my_message(ER_WRONG_FIELD_TERMINATORS,ER(ER_WRONG_FIELD_TERMINATORS),
233 	       MYF(0));
234     DBUG_RETURN(TRUE);
235   }
236 
237   /* Report problems with non-ascii separators */
238   if (!escaped->is_ascii() || !enclosed->is_ascii() ||
239       !field_term->is_ascii() ||
240       !ex->line.line_term->is_ascii() || !ex->line.line_start->is_ascii())
241   {
242     push_warning(thd, Sql_condition::SL_WARNING,
243                  WARN_NON_ASCII_SEPARATOR_NOT_IMPLEMENTED,
244                  ER(WARN_NON_ASCII_SEPARATOR_NOT_IMPLEMENTED));
245   }
246 
247   if (open_and_lock_tables(thd, table_list, 0))
248     DBUG_RETURN(true);
249 
250   THD_STAGE_INFO(thd, stage_executing);
251   if (select->setup_tables(thd, table_list, false))
252     DBUG_RETURN(true);
253 
254   if (run_before_dml_hook(thd))
255     DBUG_RETURN(true);
256 
257   if (table_list->is_view() && select->resolve_derived(thd, false))
258     DBUG_RETURN(true);                   /* purecov: inspected */
259 
260   TABLE_LIST *const insert_table_ref=
261     table_list->is_updatable() &&        // View must be updatable
262     !table_list->is_multiple_tables() && // Multi-table view not allowed
263     !table_list->is_derived() ?          // derived tables not allowed
264     table_list->updatable_base_table() : NULL;
265 
266   if (insert_table_ref == NULL ||
267       check_key_in_view(thd, table_list, insert_table_ref))
268   {
269     my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "LOAD");
270     DBUG_RETURN(TRUE);
271   }
272   if (select->derived_table_count &&
273       select->check_view_privileges(thd, INSERT_ACL, SELECT_ACL))
274     DBUG_RETURN(true);                   /* purecov: inspected */
275 
276   if (table_list->is_merged())
277   {
278     if (table_list->prepare_check_option(thd))
279       DBUG_RETURN(TRUE);
280 
281     if (handle_duplicates == DUP_REPLACE &&
282         table_list->prepare_replace_filter(thd))
283       DBUG_RETURN(true);
284   }
285 
286   // Pass the check option down to the underlying table:
287   insert_table_ref->check_option= table_list->check_option;
288   /*
289     Let us emit an error if we are loading data to table which is used
290     in subselect in SET clause like we do it for INSERT.
291 
292     The main thing to fix to remove this restriction is to ensure that the
293     table is marked to be 'used for insert' in which case we should never
294     mark this table as 'const table' (ie, one that has only one row).
295   */
296   if (unique_table(thd, insert_table_ref, table_list->next_global, 0))
297   {
298     my_error(ER_UPDATE_TABLE_USED, MYF(0), table_list->table_name);
299     DBUG_RETURN(TRUE);
300   }
301 
302   TABLE *const table= insert_table_ref->table;
303 
304   for (Field **cur_field= table->field; *cur_field; ++cur_field)
305     (*cur_field)->reset_warnings();
306 
307 #ifndef EMBEDDED_LIBRARY
308   transactional_table= table->file->has_transactions();
309   is_concurrent= (table_list->lock_type == TL_WRITE_CONCURRENT_INSERT);
310 #endif
311 
312   if (!fields_vars.elements)
313   {
314     Field_iterator_table_ref field_iterator;
315     field_iterator.set(table_list);
316     for (; !field_iterator.end_of_fields(); field_iterator.next())
317     {
318       Item *item;
319       if (!(item= field_iterator.create_item(thd)))
320         DBUG_RETURN(TRUE);
321 
322       if (item->field_for_view_update() == NULL)
323       {
324         my_error(ER_NONUPDATEABLE_COLUMN, MYF(0), item->item_name.ptr());
325         DBUG_RETURN(true);
326       }
327       fields_vars.push_back(item->real_item());
328     }
329     bitmap_set_all(table->write_set);
330     /*
331       Let us also prepare SET clause, altough it is probably empty
332       in this case.
333     */
334     if (setup_fields(thd, Ref_ptr_array(), set_fields, INSERT_ACL, NULL,
335                      false, true) ||
336         setup_fields(thd, Ref_ptr_array(), set_values, SELECT_ACL, NULL,
337                      false, false))
338       DBUG_RETURN(TRUE);
339   }
340   else
341   {						// Part field list
342     /*
343       Because fields_vars may contain user variables,
344       pass false for column_update in first call below.
345     */
346     if (setup_fields(thd, Ref_ptr_array(), fields_vars, INSERT_ACL, NULL,
347                      false, false) ||
348         setup_fields(thd, Ref_ptr_array(), set_fields, INSERT_ACL, NULL,
349                      false, true))
350       DBUG_RETURN(TRUE);
351 
352     /*
353       Special updatability test is needed because fields_vars may contain
354       a mix of column references and user variables.
355     */
356     Item *item;
357     List_iterator<Item> it(fields_vars);
358     while ((item= it++))
359     {
360       if ((item->type() == Item::FIELD_ITEM ||
361            item->type() == Item::REF_ITEM) &&
362           item->field_for_view_update() == NULL)
363       {
364         my_error(ER_NONUPDATEABLE_COLUMN, MYF(0), item->item_name.ptr());
365         DBUG_RETURN(true);
366       }
367     }
368     /* We explicitly ignore the return value */
369     (void)check_that_all_fields_are_given_values(thd, table, table_list);
370     /* Fix the expressions in SET clause */
371     if (setup_fields(thd, Ref_ptr_array(), set_values, SELECT_ACL, NULL,
372                      false, false))
373       DBUG_RETURN(TRUE);
374   }
375 
376   const int escape_char= (escaped->length() && (ex->escaped_given() ||
377                           !(thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES)))
378                           ? (*escaped)[0] : INT_MAX;
379 
380   /*
381     * LOAD DATA INFILE fff INTO TABLE xxx SET columns2
382     sets all columns, except if file's row lacks some: in that case,
383     defaults are set by read_fixed_length() and read_sep_field(),
384     not by COPY_INFO.
385     * LOAD DATA INFILE fff INTO TABLE xxx (columns1) SET columns2=
386     may need a default for columns other than columns1 and columns2.
387   */
388   const bool manage_defaults= fields_vars.elements != 0;
389   COPY_INFO info(COPY_INFO::INSERT_OPERATION,
390                  &fields_vars, &set_fields,
391                  manage_defaults,
392                  handle_duplicates, escape_char);
393 
394   if (info.add_function_default_columns(table, table->write_set))
395     DBUG_RETURN(TRUE);
396 
397   prepare_triggers_for_insert_stmt(table);
398 
399   uint tot_length=0;
400   bool use_blobs= 0, use_vars= 0;
401   List_iterator_fast<Item> it(fields_vars);
402   Item *item;
403 
404   while ((item= it++))
405   {
406     Item *real_item= item->real_item();
407 
408     if (real_item->type() == Item::FIELD_ITEM)
409     {
410       Field *field= ((Item_field*)real_item)->field;
411       if (field->flags & BLOB_FLAG)
412       {
413         use_blobs= 1;
414         tot_length+= 256;			// Will be extended if needed
415       }
416       else
417         tot_length+= field->field_length;
418     }
419     else if (item->type() == Item::STRING_ITEM)
420       use_vars= 1;
421   }
422   if (use_blobs && !ex->line.line_term->length() && !field_term->length())
423   {
424     my_message(ER_BLOBS_AND_NO_TERMINATED,ER(ER_BLOBS_AND_NO_TERMINATED),
425 	       MYF(0));
426     DBUG_RETURN(TRUE);
427   }
428   if (use_vars && !field_term->length() && !enclosed->length())
429   {
430     my_error(ER_LOAD_FROM_FIXED_SIZE_ROWS_TO_VAR, MYF(0));
431     DBUG_RETURN(TRUE);
432   }
433 
434 #ifndef EMBEDDED_LIBRARY
435   if (read_file_from_client)
436   {
437     (void)net_request_file(thd->get_protocol_classic()->get_net(),
438                            ex->file_name);
439     file = -1;
440   }
441   else
442 #endif
443   {
444     if (!dirname_length(ex->file_name))
445     {
446       strxnmov(name, FN_REFLEN-1, mysql_real_data_home, tdb, NullS);
447       (void) fn_format(name, ex->file_name, name, "",
448 		       MY_RELATIVE_PATH | MY_UNPACK_FILENAME);
449     }
450     else
451     {
452       (void) fn_format(name, ex->file_name, mysql_real_data_home, "",
453                        MY_RELATIVE_PATH | MY_UNPACK_FILENAME |
454                        MY_RETURN_REAL_PATH);
455     }
456 
457     if ((thd->system_thread &
458          (SYSTEM_THREAD_SLAVE_SQL | SYSTEM_THREAD_SLAVE_WORKER)) != 0)
459     {
460 #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
461       Relay_log_info* rli= thd->rli_slave->get_c_rli();
462 
463       if (strncmp(rli->slave_patternload_file, name,
464                   rli->slave_patternload_file_size))
465       {
466         /*
467           LOAD DATA INFILE in the slave SQL Thread can only read from
468           --slave-load-tmpdir". This should never happen. Please, report a bug.
469         */
470 
471         sql_print_error("LOAD DATA INFILE in the slave SQL Thread can only read from --slave-load-tmpdir. " \
472                         "Please, report a bug.");
473         my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--slave-load-tmpdir");
474         DBUG_RETURN(TRUE);
475       }
476 #else
477       /*
478         This is impossible and should never happen.
479       */
480       assert(FALSE);
481 #endif
482     }
483     else if (!is_secure_file_path(name))
484     {
485       /* Read only allowed from within dir specified by secure_file_priv */
486       my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--secure-file-priv");
487       DBUG_RETURN(TRUE);
488     }
489 
490 #if !defined(_WIN32)
491     MY_STAT stat_info;
492     if (!my_stat(name, &stat_info, MYF(MY_WME)))
493       DBUG_RETURN(TRUE);
494 
495     // if we are not in slave thread, the file must be:
496     if (!thd->slave_thread &&
497         !((stat_info.st_mode & S_IFLNK) != S_IFLNK &&   // symlink
498           ((stat_info.st_mode & S_IFREG) == S_IFREG ||  // regular file
499            (stat_info.st_mode & S_IFIFO) == S_IFIFO)))  // named pipe
500     {
501       my_error(ER_TEXTFILE_NOT_READABLE, MYF(0), name);
502       DBUG_RETURN(TRUE);
503     }
504     if ((stat_info.st_mode & S_IFIFO) == S_IFIFO)
505       is_fifo= 1;
506 #endif
507     if ((file= mysql_file_open(key_file_load,
508                                name, O_RDONLY, MYF(MY_WME))) < 0)
509 
510       DBUG_RETURN(TRUE);
511   }
512 
513   READ_INFO read_info(file,tot_length,
514                       ex->cs ? ex->cs : thd->variables.collation_database,
515 		      *field_term,*ex->line.line_start, *ex->line.line_term,
516                       *enclosed,
517 		      info.escape_char, read_file_from_client, is_fifo);
518   if (read_info.error)
519   {
520     if (file >= 0)
521       mysql_file_close(file, MYF(0));           // no files in net reading
522     DBUG_RETURN(TRUE);				// Can't allocate buffers
523   }
524 
525 #ifndef EMBEDDED_LIBRARY
526   if (mysql_bin_log.is_open())
527   {
528     lf_info.thd = thd;
529     lf_info.wrote_create_file = 0;
530     lf_info.last_pos_in_file = HA_POS_ERROR;
531     lf_info.log_delayed= transactional_table;
532     read_info.set_io_cache_arg((void*) &lf_info);
533   }
534 #endif /*!EMBEDDED_LIBRARY*/
535 
536   thd->count_cuted_fields= CHECK_FIELD_WARN;		/* calc cuted fields */
537   thd->cuted_fields=0L;
538   /* Skip lines if there is a line terminator */
539   if (ex->line.line_term->length() && ex->filetype != FILETYPE_XML)
540   {
541     /* ex->skip_lines needs to be preserved for logging */
542     while (skip_lines > 0)
543     {
544       skip_lines--;
545       if (read_info.next_line())
546 	break;
547     }
548   }
549 
550   if (!(error=MY_TEST(read_info.error)))
551   {
552 
553     table->next_number_field=table->found_next_number_field;
554     if (thd->lex->is_ignore() ||
555 	handle_duplicates == DUP_REPLACE)
556       table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
557     if (handle_duplicates == DUP_REPLACE &&
558         (!table->triggers ||
559          !table->triggers->has_delete_triggers()))
560         table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE);
561     if (thd->locked_tables_mode <= LTM_LOCK_TABLES)
562       table->file->ha_start_bulk_insert((ha_rows) 0);
563     table->copy_blobs=1;
564 
565     if (ex->filetype == FILETYPE_XML) /* load xml */
566       error= read_xml_field(thd, info, insert_table_ref, fields_vars,
567                             set_fields, set_values, read_info,
568                             skip_lines);
569     else if (!field_term->length() && !enclosed->length())
570       error= read_fixed_length(thd, info, insert_table_ref, fields_vars,
571                                set_fields, set_values, read_info,
572 			       skip_lines);
573     else
574       error= read_sep_field(thd, info, insert_table_ref, fields_vars,
575                             set_fields, set_values, read_info,
576 			    *enclosed, skip_lines);
577     if (thd->locked_tables_mode <= LTM_LOCK_TABLES &&
578         table->file->ha_end_bulk_insert() && !error)
579     {
580       table->file->print_error(my_errno(), MYF(0));
581       error= 1;
582     }
583     table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
584     table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE);
585     table->next_number_field=0;
586   }
587   if (file >= 0)
588     mysql_file_close(file, MYF(0));
589   free_blobs(table);				/* if pack_blob was used */
590   table->copy_blobs=0;
591   thd->count_cuted_fields= CHECK_FIELD_IGNORE;
592   /*
593      simulated killing in the middle of per-row loop
594      must be effective for binlogging
595   */
596   DBUG_EXECUTE_IF("simulate_kill_bug27571",
597                   {
598                     error=1;
599                     thd->killed= THD::KILL_QUERY;
600                   };);
601 
602 #ifndef EMBEDDED_LIBRARY
603   killed_status= (error == 0) ? THD::NOT_KILLED : thd->killed;
604 #endif
605 
606   /*
607     We must invalidate the table in query cache before binlog writing and
608     ha_autocommit_...
609   */
610   query_cache.invalidate_single(thd, insert_table_ref, false);
611   if (error)
612   {
613     if (read_file_from_client)
614       read_info.skip_data_till_eof();
615 
616 #ifndef EMBEDDED_LIBRARY
617     if (mysql_bin_log.is_open())
618     {
619       {
620 	/*
621 	  Make sure last block (the one which caused the error) gets
622 	  logged.  This is needed because otherwise after write of (to
623 	  the binlog, not to read_info (which is a cache))
624 	  Delete_file_log_event the bad block will remain in read_info
625 	  (because pre_read is not called at the end of the last
626 	  block; remember pre_read is called whenever a new block is
627 	  read from disk).  At the end of mysql_load(), the destructor
628 	  of read_info will call end_io_cache() which will flush
629 	  read_info, so we will finally have this in the binlog:
630 
631 	  Append_block # The last successfull block
632 	  Delete_file
633 	  Append_block # The failing block
634 	  which is nonsense.
635 	  Or could also be (for a small file)
636 	  Create_file  # The failing block
637 	  which is nonsense (Delete_file is not written in this case, because:
638 	  Create_file has not been written, so Delete_file is not written, then
639 	  when read_info is destroyed end_io_cache() is called which writes
640 	  Create_file.
641 	*/
642 	read_info.end_io_cache();
643 	/* If the file was not empty, wrote_create_file is true */
644 	if (lf_info.wrote_create_file)
645 	{
646           int errcode= query_error_code(thd, killed_status == THD::NOT_KILLED);
647 
648           /* since there is already an error, the possible error of
649              writing binary log will be ignored */
650 	  if (thd->get_transaction()->cannot_safely_rollback(
651 	      Transaction_ctx::STMT))
652             (void) write_execute_load_query_log_event(thd, ex,
653                                                       table_list->db,
654                                                       table_list->table_name,
655                                                       is_concurrent,
656                                                       handle_duplicates,
657                                                       transactional_table,
658                                                       errcode);
659 	  else
660 	  {
661 	    Delete_file_log_event d(thd, db, transactional_table);
662 	    (void) mysql_bin_log.write_event(&d);
663 	  }
664 	}
665       }
666     }
667 #endif /*!EMBEDDED_LIBRARY*/
668     error= -1;				// Error on read
669     goto err;
670   }
671 
672   my_snprintf(name, sizeof(name),
673               ER(ER_LOAD_INFO),
674               (long) info.stats.records, (long) info.stats.deleted,
675               (long) (info.stats.records - info.stats.copied),
676               (long) thd->get_stmt_da()->current_statement_cond_count());
677 
678 #ifndef EMBEDDED_LIBRARY
679   if (mysql_bin_log.is_open())
680   {
681     /*
682       We need to do the job that is normally done inside
683       binlog_query() here, which is to ensure that the pending event
684       is written before tables are unlocked and before any other
685       events are written.  We also need to update the table map
686       version for the binary log to mark that table maps are invalid
687       after this point.
688      */
689     if (thd->is_current_stmt_binlog_format_row())
690       error= thd->binlog_flush_pending_rows_event(TRUE, transactional_table);
691     else
692     {
693       /*
694         As already explained above, we need to call end_io_cache() or the last
695         block will be logged only after Execute_load_query_log_event (which is
696         wrong), when read_info is destroyed.
697       */
698       read_info.end_io_cache();
699       if (lf_info.wrote_create_file)
700       {
701         int errcode= query_error_code(thd, killed_status == THD::NOT_KILLED);
702         error= write_execute_load_query_log_event(thd, ex,
703                                                   table_list->db, table_list->table_name,
704                                                   is_concurrent,
705                                                   handle_duplicates,
706                                                   transactional_table,
707                                                   errcode);
708       }
709 
710       /*
711         Flushing the IO CACHE while writing the execute load query log event
712         may result in error (for instance, because the max_binlog_size has been
713         reached, and rotation of the binary log failed).
714       */
715       error= error || mysql_bin_log.get_log_file()->error;
716     }
717     if (error)
718       goto err;
719   }
720 #endif /*!EMBEDDED_LIBRARY*/
721 
722   /* ok to client sent only after binlog write and engine commit */
723   my_ok(thd, info.stats.copied + info.stats.deleted, 0L, name);
724 err:
725   assert(table->file->has_transactions() ||
726          !(info.stats.copied || info.stats.deleted) ||
727          thd->get_transaction()->cannot_safely_rollback(Transaction_ctx::STMT));
728   table->file->ha_release_auto_increment();
729   table->auto_increment_field_not_null= FALSE;
730   DBUG_RETURN(error);
731 }
732 
733 
734 #ifndef EMBEDDED_LIBRARY
735 
736 /* Not a very useful function; just to avoid duplication of code */
write_execute_load_query_log_event(THD * thd,sql_exchange * ex,const char * db_arg,const char * table_name_arg,bool is_concurrent,enum enum_duplicates duplicates,bool transactional_table,int errcode)737 static bool write_execute_load_query_log_event(THD *thd, sql_exchange* ex,
738                                                const char* db_arg,  /* table's database */
739                                                const char* table_name_arg,
740                                                bool is_concurrent,
741                                                enum enum_duplicates duplicates,
742                                                bool transactional_table,
743                                                int errcode)
744 {
745   char                *load_data_query,
746                       *end,
747                       *fname_start,
748                       *fname_end,
749                       *p= NULL;
750   size_t               pl= 0;
751   List<Item>           fv;
752   Item                *item;
753   String              *str;
754   String               pfield, pfields;
755   int                  n;
756   const char          *tbl= table_name_arg;
757   const char          *tdb= (thd->db().str != NULL ? thd->db().str : db_arg);
758   String              string_buf;
759   if (thd->db().str == NULL || strcmp(db_arg, thd->db().str))
760   {
761     /*
762       If used database differs from table's database,
763       prefix table name with database name so that it
764       becomes a FQ name.
765      */
766     string_buf.set_charset(system_charset_info);
767     append_identifier(thd, &string_buf, db_arg, strlen(db_arg));
768     string_buf.append(".");
769   }
770   append_identifier(thd, &string_buf, table_name_arg,
771                     strlen(table_name_arg));
772   tbl= string_buf.c_ptr_safe();
773   Load_log_event       lle(thd, ex, tdb, tbl, fv, is_concurrent,
774                            duplicates, thd->lex->is_ignore(),
775                            transactional_table);
776 
777   /*
778     force in a LOCAL if there was one in the original.
779   */
780   if (thd->lex->local_file)
781     lle.set_fname_outside_temp_buf(ex->file_name, strlen(ex->file_name));
782 
783   /*
784     prepare fields-list and SET if needed; print_query won't do that for us.
785   */
786   if (!thd->lex->load_field_list.is_empty())
787   {
788     List_iterator<Item> li(thd->lex->load_field_list);
789 
790     pfields.append(" (");
791     n= 0;
792 
793     while ((item= li++))
794     {
795       if (n++)
796         pfields.append(", ");
797       if (item->type() == Item::FIELD_ITEM ||
798                  item->type() == Item::REF_ITEM)
799         append_identifier(thd, &pfields, item->item_name.ptr(),
800                           strlen(item->item_name.ptr()));
801       else
802         item->print(&pfields, QT_ORDINARY);
803     }
804     pfields.append(")");
805   }
806 
807   if (!thd->lex->load_update_list.is_empty())
808   {
809     List_iterator<Item> lu(thd->lex->load_update_list);
810     List_iterator<String> ls(thd->lex->load_set_str_list);
811 
812     pfields.append(" SET ");
813     n= 0;
814 
815     while ((item= lu++))
816     {
817       str= ls++;
818       if (n++)
819         pfields.append(", ");
820       append_identifier(thd, &pfields, item->item_name.ptr(),
821                         strlen(item->item_name.ptr()));
822       // Extract exact Item value
823       str->copy();
824       pfields.append(str->ptr());
825       str->mem_free();
826     }
827     /*
828       Clear the SET string list once the SET command is reconstructed
829       as we donot require the list anymore.
830     */
831     thd->lex->load_set_str_list.empty();
832   }
833 
834   p= pfields.c_ptr_safe();
835   pl= strlen(p);
836 
837   if (!(load_data_query= (char *)thd->alloc(lle.get_query_buffer_length() + 1 + pl)))
838     return TRUE;
839 
840   lle.print_query(FALSE, ex->cs ? ex->cs->csname : NULL,
841                   load_data_query, &end,
842                   &fname_start, &fname_end);
843 
844   strcpy(end, p);
845   end += pl;
846 
847   Execute_load_query_log_event
848     e(thd, load_data_query, end-load_data_query,
849       static_cast<uint>(fname_start - load_data_query - 1),
850       static_cast<uint>(fname_end - load_data_query),
851       (duplicates == DUP_REPLACE) ? binary_log::LOAD_DUP_REPLACE :
852       (thd->lex->is_ignore() ? binary_log::LOAD_DUP_IGNORE :
853                                binary_log::LOAD_DUP_ERROR),
854       transactional_table, FALSE, FALSE, errcode);
855   return mysql_bin_log.write_event(&e);
856 }
857 
858 #endif
859 
860 /****************************************************************************
861 ** Read of rows of fixed size + optional garbage + optional newline
862 ****************************************************************************/
863 
864 static int
read_fixed_length(THD * thd,COPY_INFO & info,TABLE_LIST * table_list,List<Item> & fields_vars,List<Item> & set_fields,List<Item> & set_values,READ_INFO & read_info,ulong skip_lines)865 read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
866                   List<Item> &fields_vars, List<Item> &set_fields,
867                   List<Item> &set_values, READ_INFO &read_info,
868                   ulong skip_lines)
869 {
870   List_iterator_fast<Item> it(fields_vars);
871   TABLE *table= table_list->table;
872   bool err;
873   DBUG_ENTER("read_fixed_length");
874 
875   while (!read_info.read_fixed_length())
876   {
877     if (thd->killed)
878     {
879       thd->send_kill_message();
880       DBUG_RETURN(1);
881     }
882     if (skip_lines)
883     {
884       /*
885 	We could implement this with a simple seek if:
886 	- We are not using DATA INFILE LOCAL
887 	- escape character is  ""
888 	- line starting prefix is ""
889       */
890       skip_lines--;
891       continue;
892     }
893     it.rewind();
894     uchar *pos=read_info.row_start;
895 
896     restore_record(table, s->default_values);
897     /*
898       Check whether default values of the fields not specified in column list
899       are correct or not.
900     */
901     if (validate_default_values_of_unset_fields(thd, table))
902     {
903       read_info.error= true;
904       break;
905     }
906 
907     Item *item;
908     while ((item= it++))
909     {
910       /*
911         There is no variables in fields_vars list in this format so
912         this conversion is safe (no need to check for STRING_ITEM).
913       */
914       assert(item->real_item()->type() == Item::FIELD_ITEM);
915       Item_field *sql_field= static_cast<Item_field*>(item->real_item());
916       Field *field= sql_field->field;
917       if (field == table->next_number_field)
918         table->auto_increment_field_not_null= TRUE;
919       /*
920         No fields specified in fields_vars list can be null in this format.
921         Mark field as not null, we should do this for each row because of
922         restore_record...
923       */
924       field->set_notnull();
925 
926       if (pos == read_info.row_end)
927       {
928         thd->cuted_fields++;			/* Not enough fields */
929         push_warning_printf(thd, Sql_condition::SL_WARNING,
930                             ER_WARN_TOO_FEW_RECORDS,
931                             ER(ER_WARN_TOO_FEW_RECORDS),
932                             thd->get_stmt_da()->current_row_for_condition());
933         if (field->type() == FIELD_TYPE_TIMESTAMP && !field->maybe_null())
934         {
935           // Specific of TIMESTAMP NOT NULL: set to CURRENT_TIMESTAMP.
936           Item_func_now_local::store_in(field);
937         }
938       }
939       else
940       {
941 	uint length;
942 	uchar save_chr;
943 	if ((length=(uint) (read_info.row_end-pos)) >
944 	    field->field_length)
945 	  length=field->field_length;
946 	save_chr=pos[length]; pos[length]='\0'; // Safeguard aganst malloc
947         field->store((char*) pos,length,read_info.read_charset);
948 	pos[length]=save_chr;
949 	if ((pos+=length) > read_info.row_end)
950 	  pos= read_info.row_end;	/* Fills rest with space */
951       }
952     }
953     if (pos != read_info.row_end)
954     {
955       thd->cuted_fields++;			/* To long row */
956       push_warning_printf(thd, Sql_condition::SL_WARNING,
957                           ER_WARN_TOO_MANY_RECORDS,
958                           ER(ER_WARN_TOO_MANY_RECORDS),
959                           thd->get_stmt_da()->current_row_for_condition());
960     }
961 
962     if (thd->killed ||
963         fill_record_n_invoke_before_triggers(thd, &info, set_fields,
964                                              set_values, table,
965                                              TRG_EVENT_INSERT,
966                                              table->s->fields))
967       DBUG_RETURN(1);
968 
969     switch (table_list->view_check_option(thd)) {
970     case VIEW_CHECK_SKIP:
971       read_info.next_line();
972       goto continue_loop;
973     case VIEW_CHECK_ERROR:
974       DBUG_RETURN(-1);
975     }
976 
977     err= write_record(thd, table, &info, NULL);
978     table->auto_increment_field_not_null= FALSE;
979     if (err)
980       DBUG_RETURN(1);
981 
982     /*
983       We don't need to reset auto-increment field since we are restoring
984       its default value at the beginning of each loop iteration.
985     */
986     if (read_info.next_line())			// Skip to next line
987       break;
988     if (read_info.line_cuted)
989     {
990       thd->cuted_fields++;			/* To long row */
991       push_warning_printf(thd, Sql_condition::SL_WARNING,
992                           ER_WARN_TOO_MANY_RECORDS,
993                           ER(ER_WARN_TOO_MANY_RECORDS),
994                           thd->get_stmt_da()->current_row_for_condition());
995     }
996     thd->get_stmt_da()->inc_current_row_for_condition();
997 continue_loop:;
998   }
999   DBUG_RETURN(MY_TEST(read_info.error));
1000 }
1001 
1002 
1003 class Field_tmp_nullability_guard
1004 {
1005 public:
Field_tmp_nullability_guard(Item * item)1006   explicit Field_tmp_nullability_guard(Item *item)
1007    :m_field(NULL)
1008   {
1009     if (item->type() == Item::FIELD_ITEM)
1010     {
1011       m_field= ((Item_field *) item)->field;
1012       /*
1013         Enable temporary nullability for items that corresponds
1014         to table fields.
1015       */
1016       m_field->set_tmp_nullable();
1017     }
1018   }
1019 
~Field_tmp_nullability_guard()1020   ~Field_tmp_nullability_guard()
1021   {
1022     if (m_field)
1023       m_field->reset_tmp_nullable();
1024   }
1025 
1026 private:
1027   Field *m_field;
1028 };
1029 
1030 
1031 static int
read_sep_field(THD * thd,COPY_INFO & info,TABLE_LIST * table_list,List<Item> & fields_vars,List<Item> & set_fields,List<Item> & set_values,READ_INFO & read_info,const String & enclosed,ulong skip_lines)1032 read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
1033                List<Item> &fields_vars, List<Item> &set_fields,
1034                List<Item> &set_values, READ_INFO &read_info,
1035 	       const String &enclosed, ulong skip_lines)
1036 {
1037   List_iterator_fast<Item> it(fields_vars);
1038   Item *item;
1039   TABLE *table= table_list->table;
1040   size_t enclosed_length;
1041   bool err;
1042   DBUG_ENTER("read_sep_field");
1043 
1044   enclosed_length=enclosed.length();
1045 
1046   for (;;it.rewind())
1047   {
1048     if (thd->killed)
1049     {
1050       thd->send_kill_message();
1051       DBUG_RETURN(1);
1052     }
1053 
1054     restore_record(table, s->default_values);
1055     /*
1056       Check whether default values of the fields not specified in column list
1057       are correct or not.
1058     */
1059     if (validate_default_values_of_unset_fields(thd, table))
1060     {
1061       read_info.error= true;
1062       break;
1063     }
1064 
1065     while ((item= it++))
1066     {
1067       uint length;
1068       uchar *pos;
1069       Item *real_item;
1070 
1071       if (read_info.read_field())
1072 	break;
1073 
1074       /* If this line is to be skipped we don't want to fill field or var */
1075       if (skip_lines)
1076         continue;
1077 
1078       pos=read_info.row_start;
1079       length=(uint) (read_info.row_end-pos);
1080 
1081       real_item= item->real_item();
1082 
1083       Field_tmp_nullability_guard fld_tmp_nullability_guard(real_item);
1084 
1085       if ((!read_info.enclosed &&
1086 	  (enclosed_length && length == 4 &&
1087            !memcmp(pos, STRING_WITH_LEN("NULL")))) ||
1088 	  (length == 1 && read_info.found_null))
1089       {
1090 
1091         if (real_item->type() == Item::FIELD_ITEM)
1092         {
1093           Field *field= ((Item_field *)real_item)->field;
1094           if (field->reset())                   // Set to 0
1095           {
1096             my_error(ER_WARN_NULL_TO_NOTNULL, MYF(0), field->field_name,
1097                      thd->get_stmt_da()->current_row_for_condition());
1098             DBUG_RETURN(1);
1099           }
1100           if (!field->real_maybe_null() &&
1101               field->type() == FIELD_TYPE_TIMESTAMP)
1102           {
1103             // Specific of TIMESTAMP NOT NULL: set to CURRENT_TIMESTAMP.
1104             Item_func_now_local::store_in(field);
1105           }
1106           else
1107           {
1108             /*
1109               Set field to NULL. Later we will clear temporary nullability flag
1110               and check NOT NULL constraint.
1111             */
1112             field->set_null();
1113           }
1114 	}
1115         else if (item->type() == Item::STRING_ITEM)
1116         {
1117           assert(NULL != dynamic_cast<Item_user_var_as_out_param*>(item));
1118           ((Item_user_var_as_out_param *)item)->set_null_value(
1119                                                   read_info.read_charset);
1120         }
1121 
1122 	continue;
1123       }
1124 
1125       if (real_item->type() == Item::FIELD_ITEM)
1126       {
1127         Field *field= ((Item_field *)real_item)->field;
1128         field->set_notnull();
1129         read_info.row_end[0]=0;			// Safe to change end marker
1130         if (field == table->next_number_field)
1131           table->auto_increment_field_not_null= TRUE;
1132         field->store((char*) pos, length, read_info.read_charset);
1133       }
1134       else if (item->type() == Item::STRING_ITEM)
1135       {
1136         assert(NULL != dynamic_cast<Item_user_var_as_out_param*>(item));
1137         ((Item_user_var_as_out_param *)item)->set_value((char*) pos, length,
1138                                                         read_info.read_charset);
1139       }
1140     }
1141 
1142     if (thd->is_error())
1143       read_info.error= true;
1144 
1145     if (read_info.error)
1146       break;
1147     if (skip_lines)
1148     {
1149       skip_lines--;
1150       continue;
1151     }
1152     if (item)
1153     {
1154       /* Have not read any field, thus input file is simply ended */
1155       if (item == fields_vars.head())
1156 	break;
1157       for (; item ; item= it++)
1158       {
1159         Item *real_item= item->real_item();
1160         if (real_item->type() == Item::FIELD_ITEM)
1161         {
1162           Field *field= ((Item_field *)real_item)->field;
1163           /*
1164             We set to 0. But if the field is DEFAULT NULL, the "null bit"
1165             turned on by restore_record() above remains so field will be NULL.
1166           */
1167           if (field->reset())
1168           {
1169             my_error(ER_WARN_NULL_TO_NOTNULL, MYF(0),field->field_name,
1170                      thd->get_stmt_da()->current_row_for_condition());
1171             DBUG_RETURN(1);
1172           }
1173           if (field->type() == FIELD_TYPE_TIMESTAMP && !field->maybe_null())
1174             // Specific of TIMESTAMP NOT NULL: set to CURRENT_TIMESTAMP.
1175             Item_func_now_local::store_in(field);
1176           /*
1177             QQ: We probably should not throw warning for each field.
1178             But how about intention to always have the same number
1179             of warnings in THD::cuted_fields (and get rid of cuted_fields
1180             in the end ?)
1181           */
1182           thd->cuted_fields++;
1183           push_warning_printf(thd, Sql_condition::SL_WARNING,
1184                               ER_WARN_TOO_FEW_RECORDS,
1185                               ER(ER_WARN_TOO_FEW_RECORDS),
1186                               thd->get_stmt_da()->current_row_for_condition());
1187         }
1188         else if (item->type() == Item::STRING_ITEM)
1189         {
1190           assert(NULL != dynamic_cast<Item_user_var_as_out_param*>(item));
1191           ((Item_user_var_as_out_param *)item)->set_null_value(
1192                                                   read_info.read_charset);
1193         }
1194       }
1195     }
1196 
1197     if (thd->killed ||
1198         fill_record_n_invoke_before_triggers(thd, &info, set_fields,
1199                                              set_values, table,
1200                                              TRG_EVENT_INSERT,
1201                                              table->s->fields))
1202       DBUG_RETURN(1);
1203 
1204     if (!table->triggers)
1205     {
1206       /*
1207         If there is no trigger for the table then check the NOT NULL constraint
1208         for every table field.
1209 
1210         For the table that has BEFORE-INSERT trigger installed checking for
1211         NOT NULL constraint is done inside function
1212         fill_record_n_invoke_before_triggers() after all trigger instructions
1213         has been executed.
1214       */
1215       it.rewind();
1216 
1217       while ((item= it++))
1218       {
1219         Item *real_item= item->real_item();
1220         if (real_item->type() == Item::FIELD_ITEM)
1221           ((Item_field *) real_item)->field->check_constraints(ER_WARN_NULL_TO_NOTNULL);
1222       }
1223     }
1224 
1225     if (thd->is_error())
1226       DBUG_RETURN(1);
1227 
1228     switch (table_list->view_check_option(thd)) {
1229     case VIEW_CHECK_SKIP:
1230       read_info.next_line();
1231       goto continue_loop;
1232     case VIEW_CHECK_ERROR:
1233       DBUG_RETURN(-1);
1234     }
1235 
1236     err= write_record(thd, table, &info, NULL);
1237     table->auto_increment_field_not_null= FALSE;
1238     if (err)
1239       DBUG_RETURN(1);
1240     /*
1241       We don't need to reset auto-increment field since we are restoring
1242       its default value at the beginning of each loop iteration.
1243     */
1244     if (read_info.next_line())			// Skip to next line
1245       break;
1246     if (read_info.line_cuted)
1247     {
1248       thd->cuted_fields++;			/* To long row */
1249       push_warning_printf(thd, Sql_condition::SL_WARNING,
1250                           ER_WARN_TOO_MANY_RECORDS, ER(ER_WARN_TOO_MANY_RECORDS),
1251                           thd->get_stmt_da()->current_row_for_condition());
1252       if (thd->killed)
1253         DBUG_RETURN(1);
1254     }
1255     thd->get_stmt_da()->inc_current_row_for_condition();
1256 continue_loop:;
1257   }
1258   DBUG_RETURN(MY_TEST(read_info.error));
1259 }
1260 
1261 
1262 /****************************************************************************
1263 ** Read rows in xml format
1264 ****************************************************************************/
1265 static int
read_xml_field(THD * thd,COPY_INFO & info,TABLE_LIST * table_list,List<Item> & fields_vars,List<Item> & set_fields,List<Item> & set_values,READ_INFO & read_info,ulong skip_lines)1266 read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
1267                List<Item> &fields_vars, List<Item> &set_fields,
1268                List<Item> &set_values, READ_INFO &read_info,
1269                ulong skip_lines)
1270 {
1271   List_iterator_fast<Item> it(fields_vars);
1272   Item *item;
1273   TABLE *table= table_list->table;
1274   const CHARSET_INFO *cs= read_info.read_charset;
1275   DBUG_ENTER("read_xml_field");
1276 
1277   for ( ; ; it.rewind())
1278   {
1279     if (thd->killed)
1280     {
1281       thd->send_kill_message();
1282       DBUG_RETURN(1);
1283     }
1284 
1285     // read row tag and save values into tag list
1286     if (read_info.read_xml())
1287       break;
1288 
1289     List_iterator_fast<XML_TAG> xmlit(read_info.taglist);
1290     xmlit.rewind();
1291     XML_TAG *tag= NULL;
1292 
1293 #ifndef NDEBUG
1294     DBUG_PRINT("read_xml_field", ("skip_lines=%d", (int) skip_lines));
1295     while ((tag= xmlit++))
1296     {
1297       DBUG_PRINT("read_xml_field", ("got tag:%i '%s' '%s'",
1298                                     tag->level, tag->field.c_ptr(),
1299                                     tag->value.c_ptr()));
1300     }
1301 #endif
1302 
1303     restore_record(table, s->default_values);
1304     /*
1305       Check whether default values of the fields not specified in column list
1306       are correct or not.
1307     */
1308     if (validate_default_values_of_unset_fields(thd, table))
1309     {
1310       read_info.error= true;
1311       break;
1312     }
1313 
1314     while ((item= it++))
1315     {
1316       /* If this line is to be skipped we don't want to fill field or var */
1317       if (skip_lines)
1318         continue;
1319 
1320       /* find field in tag list */
1321       xmlit.rewind();
1322       tag= xmlit++;
1323 
1324       while(tag && strcmp(tag->field.c_ptr(), item->item_name.ptr()) != 0)
1325         tag= xmlit++;
1326 
1327       item= item->real_item();
1328 
1329       if (!tag) // found null
1330       {
1331         if (item->type() == Item::FIELD_ITEM)
1332         {
1333           Field *field= (static_cast<Item_field*>(item))->field;
1334           field->reset();
1335           field->set_null();
1336           if (field == table->next_number_field)
1337             table->auto_increment_field_not_null= TRUE;
1338           if (!field->maybe_null())
1339           {
1340             if (field->type() == FIELD_TYPE_TIMESTAMP)
1341               // Specific of TIMESTAMP NOT NULL: set to CURRENT_TIMESTAMP.
1342               Item_func_now_local::store_in(field);
1343             else if (field != table->next_number_field)
1344               field->set_warning(Sql_condition::SL_WARNING,
1345                                  ER_WARN_NULL_TO_NOTNULL, 1);
1346           }
1347         }
1348         else
1349         {
1350           assert(NULL != dynamic_cast<Item_user_var_as_out_param*>(item));
1351           ((Item_user_var_as_out_param *) item)->set_null_value(cs);
1352         }
1353         continue;
1354       }
1355 
1356       if (item->type() == Item::FIELD_ITEM)
1357       {
1358         Field *field= ((Item_field *)item)->field;
1359         field->set_notnull();
1360         if (field == table->next_number_field)
1361           table->auto_increment_field_not_null= TRUE;
1362         field->store((char *) tag->value.ptr(), tag->value.length(), cs);
1363       }
1364       else
1365       {
1366         assert(NULL != dynamic_cast<Item_user_var_as_out_param*>(item));
1367         ((Item_user_var_as_out_param *) item)->set_value(
1368                                                  (char *) tag->value.ptr(),
1369                                                  tag->value.length(), cs);
1370       }
1371     }
1372 
1373     if (read_info.error)
1374       break;
1375 
1376     if (skip_lines)
1377     {
1378       skip_lines--;
1379       continue;
1380     }
1381 
1382     if (item)
1383     {
1384       /* Have not read any field, thus input file is simply ended */
1385       if (item == fields_vars.head())
1386         break;
1387 
1388       for ( ; item; item= it++)
1389       {
1390         if (item->type() == Item::FIELD_ITEM)
1391         {
1392           /*
1393             QQ: We probably should not throw warning for each field.
1394             But how about intention to always have the same number
1395             of warnings in THD::cuted_fields (and get rid of cuted_fields
1396             in the end ?)
1397           */
1398           thd->cuted_fields++;
1399           push_warning_printf(thd, Sql_condition::SL_WARNING,
1400                               ER_WARN_TOO_FEW_RECORDS,
1401                               ER(ER_WARN_TOO_FEW_RECORDS),
1402                               thd->get_stmt_da()->current_row_for_condition());
1403         }
1404         else
1405         {
1406           assert(NULL != dynamic_cast<Item_user_var_as_out_param*>(item));
1407           ((Item_user_var_as_out_param *)item)->set_null_value(cs);
1408         }
1409       }
1410     }
1411 
1412     if (thd->killed ||
1413         fill_record_n_invoke_before_triggers(thd, &info, set_fields,
1414                                              set_values, table,
1415                                              TRG_EVENT_INSERT,
1416                                              table->s->fields))
1417       DBUG_RETURN(1);
1418 
1419     switch (table_list->view_check_option(thd)) {
1420     case VIEW_CHECK_SKIP:
1421       read_info.next_line();
1422       goto continue_loop;
1423     case VIEW_CHECK_ERROR:
1424       DBUG_RETURN(-1);
1425     }
1426 
1427     if (write_record(thd, table, &info, NULL))
1428       DBUG_RETURN(1);
1429 
1430     /*
1431       We don't need to reset auto-increment field since we are restoring
1432       its default value at the beginning of each loop iteration.
1433     */
1434     thd->get_stmt_da()->inc_current_row_for_condition();
1435     continue_loop:;
1436   }
1437   DBUG_RETURN(MY_TEST(read_info.error) || thd->is_error());
1438 } /* load xml end */
1439 
1440 
1441 /* Unescape all escape characters, mark \N as null */
1442 
1443 char
unescape(char chr)1444 READ_INFO::unescape(char chr)
1445 {
1446   /* keep this switch synchornous with the ESCAPE_CHARS macro */
1447   switch(chr) {
1448   case 'n': return '\n';
1449   case 't': return '\t';
1450   case 'r': return '\r';
1451   case 'b': return '\b';
1452   case '0': return 0;				// Ascii null
1453   case 'Z': return '\032';			// Win32 end of file
1454   case 'N': found_null=1;
1455 
1456     /* fall through */
1457   default:  return chr;
1458   }
1459 }
1460 
1461 
1462 /*
1463   Read a line using buffering
1464   If last line is empty (in line mode) then it isn't outputed
1465 */
1466 
1467 
READ_INFO(File file_par,uint tot_length,const CHARSET_INFO * cs,const String & field_term,const String & line_start,const String & line_term,const String & enclosed_par,int escape,bool get_it_from_net,bool is_fifo)1468 READ_INFO::READ_INFO(File file_par, uint tot_length, const CHARSET_INFO *cs,
1469                      const String &field_term,
1470                      const String &line_start,
1471                      const String &line_term,
1472                      const String &enclosed_par,
1473                      int escape, bool get_it_from_net, bool is_fifo)
1474   :file(file_par), buff_length(tot_length), escape_char(escape),
1475    found_end_of_line(false), eof(false), need_end_io_cache(false),
1476    error(false), line_cuted(false), found_null(false), read_charset(cs)
1477 {
1478   /*
1479     Field and line terminators must be interpreted as sequence of unsigned char.
1480     Otherwise, non-ascii terminators will be negative on some platforms,
1481     and positive on others (depending on the implementation of char).
1482   */
1483   field_term_ptr=
1484     static_cast<const uchar*>(static_cast<const void*>(field_term.ptr()));
1485   field_term_length= field_term.length();
1486   line_term_ptr=
1487     static_cast<const uchar*>(static_cast<const void*>(line_term.ptr()));
1488   line_term_length= line_term.length();
1489 
1490   level= 0; /* for load xml */
1491   if (line_start.length() == 0)
1492   {
1493     line_start_ptr=0;
1494     start_of_line= 0;
1495   }
1496   else
1497   {
1498     line_start_ptr= line_start.ptr();
1499     line_start_end=line_start_ptr+line_start.length();
1500     start_of_line= 1;
1501   }
1502   /* If field_terminator == line_terminator, don't use line_terminator */
1503   if (field_term_length == line_term_length &&
1504       !memcmp(field_term_ptr,line_term_ptr,field_term_length))
1505   {
1506     line_term_length=0;
1507     line_term_ptr= NULL;
1508   }
1509   enclosed_char= (enclosed_length=enclosed_par.length()) ?
1510     (uchar) enclosed_par[0] : INT_MAX;
1511   field_term_char= field_term_length ? field_term_ptr[0] : INT_MAX;
1512   line_term_char= line_term_length ? line_term_ptr[0] : INT_MAX;
1513 
1514 
1515   /* Set of a stack for unget if long terminators */
1516   size_t length= max<size_t>(cs->mbmaxlen, max(field_term_length, line_term_length)) + 1;
1517   set_if_bigger(length,line_start.length());
1518   stack=stack_pos=(int*) sql_alloc(sizeof(int)*length);
1519 
1520   if (!(buffer=(uchar*) my_malloc(key_memory_READ_INFO,
1521                                   buff_length+1, MYF(MY_WME))))
1522     error= true; /* purecov: inspected */
1523   else
1524   {
1525     end_of_buff=buffer+buff_length;
1526     if (init_io_cache(&cache,(get_it_from_net) ? -1 : file, 0,
1527 		      (get_it_from_net) ? READ_NET :
1528 		      (is_fifo ? READ_FIFO : READ_CACHE),0L,1,
1529 		      MYF(MY_WME)))
1530     {
1531       my_free(buffer); /* purecov: inspected */
1532       buffer= NULL;
1533       error= true;
1534     }
1535     else
1536     {
1537       /*
1538 	init_io_cache() will not initialize read_function member
1539 	if the cache is READ_NET. So we work around the problem with a
1540 	manual assignment
1541       */
1542       need_end_io_cache = 1;
1543 
1544 #ifndef EMBEDDED_LIBRARY
1545       if (get_it_from_net)
1546 	cache.read_function = _my_b_net_read;
1547 
1548       if (mysql_bin_log.is_open())
1549 	cache.pre_read = cache.pre_close =
1550 	  (IO_CACHE_CALLBACK) log_loaded_block;
1551 #endif
1552     }
1553   }
1554 }
1555 
1556 
~READ_INFO()1557 READ_INFO::~READ_INFO()
1558 {
1559   if (need_end_io_cache)
1560     ::end_io_cache(&cache);
1561 
1562   if (buffer != NULL)
1563     my_free(buffer);
1564   List_iterator<XML_TAG> xmlit(taglist);
1565   XML_TAG *t;
1566   while ((t= xmlit++))
1567     delete(t);
1568 }
1569 
1570 
1571 /**
1572   The logic here is similar with my_mbcharlen, except for GET and PUSH
1573 
1574   @param[in]  cs  charset info
1575   @param[in]  chr the first char of sequence
1576   @param[out] len the length of multi-byte char
1577 */
1578 #define GET_MBCHARLEN(cs, chr, len)                                           \
1579   do {                                                                        \
1580     len= my_mbcharlen((cs), (chr));                                           \
1581     if (len == 0 && my_mbmaxlenlen((cs)) == 2)                                \
1582     {                                                                         \
1583       int chr1= GET;                                                          \
1584       if (chr1 != my_b_EOF)                                                   \
1585       {                                                                       \
1586         len= my_mbcharlen_2((cs), (chr), chr1);                               \
1587         /* Character is gb18030 or invalid (len = 0) */                       \
1588         assert(len == 0 || len == 2 || len == 4);                       \
1589       }                                                                       \
1590       if (len != 0)                                                           \
1591         PUSH(chr1);                                                           \
1592     }                                                                         \
1593   } while (0)
1594 
1595 
terminator(const uchar * ptr,size_t length)1596 inline int READ_INFO::terminator(const uchar *ptr, size_t length)
1597 {
1598   int chr=0;					// Keep gcc happy
1599   size_t i;
1600   for (i=1 ; i < length ; i++)
1601   {
1602     chr= GET;
1603     if (chr != *++ptr)
1604     {
1605       break;
1606     }
1607   }
1608   if (i == length)
1609     return 1;
1610   PUSH(chr);
1611   while (i-- > 1)
1612     PUSH(*--ptr);
1613   return 0;
1614 }
1615 
1616 
read_field()1617 int READ_INFO::read_field()
1618 {
1619   int chr,found_enclosed_char;
1620   uchar *to,*new_buffer;
1621 
1622   found_null=0;
1623   if (found_end_of_line)
1624     return 1;					// One have to call next_line
1625 
1626   /* Skip until we find 'line_start' */
1627 
1628   if (start_of_line)
1629   {						// Skip until line_start
1630     start_of_line=0;
1631     if (find_start_of_fields())
1632       return 1;
1633   }
1634   if ((chr=GET) == my_b_EOF)
1635   {
1636     found_end_of_line=eof=1;
1637     return 1;
1638   }
1639   to=buffer;
1640   if (chr == enclosed_char)
1641   {
1642     found_enclosed_char=enclosed_char;
1643     *to++=(uchar) chr;				// If error
1644   }
1645   else
1646   {
1647     found_enclosed_char= INT_MAX;
1648     PUSH(chr);
1649   }
1650 
1651   for (;;)
1652   {
1653     bool escaped_mb= false;
1654     while ( to < end_of_buff)
1655     {
1656       chr = GET;
1657       if (chr == my_b_EOF)
1658 	goto found_eof;
1659       if (chr == escape_char)
1660       {
1661 	if ((chr=GET) == my_b_EOF)
1662 	{
1663 	  *to++= (uchar) escape_char;
1664 	  goto found_eof;
1665 	}
1666         /*
1667           When escape_char == enclosed_char, we treat it like we do for
1668           handling quotes in SQL parsing -- you can double-up the
1669           escape_char to include it literally, but it doesn't do escapes
1670           like \n. This allows: LOAD DATA ... ENCLOSED BY '"' ESCAPED BY '"'
1671           with data like: "fie""ld1", "field2"
1672          */
1673         if (escape_char != enclosed_char || chr == escape_char)
1674         {
1675           uint ml;
1676           GET_MBCHARLEN(read_charset, chr, ml);
1677           /*
1678             For escaped multibyte character, push back the first byte,
1679             and will handle it below.
1680             Because multibyte character's second byte is possible to be
1681             0x5C, per Query_result_export::send_data, both head byte and
1682             tail byte are escaped for such characters. So mark it if the
1683             head byte is escaped and will handle it below.
1684           */
1685           if (ml == 1)
1686             *to++= (uchar) unescape((char) chr);
1687           else
1688           {
1689             escaped_mb= true;
1690             PUSH(chr);
1691           }
1692           continue;
1693         }
1694         PUSH(chr);
1695         chr= escape_char;
1696       }
1697       if (chr == line_term_char && found_enclosed_char == INT_MAX)
1698       {
1699 	if (terminator(line_term_ptr,line_term_length))
1700 	{					// Maybe unexpected linefeed
1701 	  enclosed=0;
1702 	  found_end_of_line=1;
1703 	  row_start=buffer;
1704 	  row_end=  to;
1705 	  return 0;
1706 	}
1707       }
1708       if (chr == found_enclosed_char)
1709       {
1710 	if ((chr=GET) == found_enclosed_char)
1711 	{					// Remove dupplicated
1712 	  *to++ = (uchar) chr;
1713 	  continue;
1714 	}
1715 	// End of enclosed field if followed by field_term or line_term
1716 	if (chr == my_b_EOF ||
1717 	    (chr == line_term_char && terminator(line_term_ptr,
1718 						line_term_length)))
1719 	{					// Maybe unexpected linefeed
1720 	  enclosed=1;
1721 	  found_end_of_line=1;
1722 	  row_start=buffer+1;
1723 	  row_end=  to;
1724 	  return 0;
1725 	}
1726 	if (chr == field_term_char &&
1727 	    terminator(field_term_ptr,field_term_length))
1728 	{
1729 	  enclosed=1;
1730 	  row_start=buffer+1;
1731 	  row_end=  to;
1732 	  return 0;
1733 	}
1734 	/*
1735 	  The string didn't terminate yet.
1736 	  Store back next character for the loop
1737 	*/
1738 	PUSH(chr);
1739 	/* copy the found term character to 'to' */
1740 	chr= found_enclosed_char;
1741       }
1742       else if (chr == field_term_char && found_enclosed_char == INT_MAX)
1743       {
1744 	if (terminator(field_term_ptr,field_term_length))
1745 	{
1746 	  enclosed=0;
1747 	  row_start=buffer;
1748 	  row_end=  to;
1749 	  return 0;
1750 	}
1751       }
1752 
1753       uint ml;
1754       GET_MBCHARLEN(read_charset, chr, ml);
1755       if (ml == 0)
1756       {
1757         *to= '\0';
1758         my_error(ER_INVALID_CHARACTER_STRING, MYF(0),
1759                  read_charset->csname, buffer);
1760         error= true;
1761         return 1;
1762       }
1763 
1764 
1765       if (ml > 1 &&
1766           to + ml <= end_of_buff)
1767       {
1768         uchar* p= to;
1769         *to++ = chr;
1770 
1771         for (uint i= 1; i < ml; i++)
1772         {
1773           chr= GET;
1774           if (chr == my_b_EOF)
1775           {
1776             /*
1777              Need to back up the bytes already ready from illformed
1778              multi-byte char
1779             */
1780             to-= i;
1781             goto found_eof;
1782           }
1783           else if (chr == escape_char && escaped_mb)
1784           {
1785             // Unescape the second byte if it is escaped.
1786             chr= GET;
1787             chr= (uchar) unescape((char) chr);
1788           }
1789           *to++ = chr;
1790         }
1791         if (escaped_mb)
1792           escaped_mb= false;
1793         if (my_ismbchar(read_charset,
1794                         (const char *)p,
1795                         (const char *)to))
1796           continue;
1797         for (uint i= 0; i < ml; i++)
1798           PUSH(*--to);
1799         chr= GET;
1800       }
1801       else if (ml > 1)
1802       {
1803         // Buffer is too small, exit while loop, and reallocate.
1804         PUSH(chr);
1805         break;
1806       }
1807       *to++ = (uchar) chr;
1808     }
1809     /*
1810     ** We come here if buffer is too small. Enlarge it and continue
1811     */
1812     if (!(new_buffer=(uchar*) my_realloc(key_memory_READ_INFO,
1813                                          (char*) buffer,buff_length+1+IO_SIZE,
1814 					MYF(MY_WME))))
1815       return (error= true);
1816     to=new_buffer + (to-buffer);
1817     buffer=new_buffer;
1818     buff_length+=IO_SIZE;
1819     end_of_buff=buffer+buff_length;
1820   }
1821 
1822 found_eof:
1823   enclosed=0;
1824   found_end_of_line=eof=1;
1825   row_start=buffer;
1826   row_end=to;
1827   return 0;
1828 }
1829 
1830 /*
1831   Read a row with fixed length.
1832 
1833   NOTES
1834     The row may not be fixed size on disk if there are escape
1835     characters in the file.
1836 
1837   IMPLEMENTATION NOTE
1838     One can't use fixed length with multi-byte charset **
1839 
1840   RETURN
1841     0  ok
1842     1  error
1843 */
1844 
read_fixed_length()1845 int READ_INFO::read_fixed_length()
1846 {
1847   int chr;
1848   uchar *to;
1849   if (found_end_of_line)
1850     return 1;					// One have to call next_line
1851 
1852   if (start_of_line)
1853   {						// Skip until line_start
1854     start_of_line=0;
1855     if (find_start_of_fields())
1856       return 1;
1857   }
1858 
1859   to=row_start=buffer;
1860   while (to < end_of_buff)
1861   {
1862     if ((chr=GET) == my_b_EOF)
1863       goto found_eof;
1864     if (chr == escape_char)
1865     {
1866       if ((chr=GET) == my_b_EOF)
1867       {
1868 	*to++= (uchar) escape_char;
1869 	goto found_eof;
1870       }
1871       *to++ =(uchar) unescape((char) chr);
1872       continue;
1873     }
1874     if (chr == line_term_char)
1875     {
1876       if (terminator(line_term_ptr,line_term_length))
1877       {						// Maybe unexpected linefeed
1878 	found_end_of_line=1;
1879 	row_end=  to;
1880 	return 0;
1881       }
1882     }
1883     *to++ = (uchar) chr;
1884   }
1885   row_end=to;					// Found full line
1886   return 0;
1887 
1888 found_eof:
1889   found_end_of_line=eof=1;
1890   row_start=buffer;
1891   row_end=to;
1892   return to == buffer ? 1 : 0;
1893 }
1894 
1895 
next_line()1896 int READ_INFO::next_line()
1897 {
1898   line_cuted=0;
1899   start_of_line= line_start_ptr != 0;
1900   if (found_end_of_line || eof)
1901   {
1902     found_end_of_line=0;
1903     return eof;
1904   }
1905   found_end_of_line=0;
1906   if (!line_term_length)
1907     return 0;					// No lines
1908   for (;;)
1909   {
1910     int chr = GET;
1911     uint ml;
1912     if (chr == my_b_EOF)
1913     {
1914       eof= 1;
1915       return 1;
1916     }
1917    GET_MBCHARLEN(read_charset, chr, ml);
1918    if (ml > 1)
1919    {
1920        for (uint i=1;
1921             chr != my_b_EOF && i < ml;
1922             i++)
1923 	   chr = GET;
1924        if (chr == escape_char)
1925 	   continue;
1926    }
1927    if (chr == my_b_EOF)
1928    {
1929       eof=1;
1930       return 1;
1931     }
1932     if (chr == escape_char)
1933     {
1934       line_cuted=1;
1935       if (GET == my_b_EOF)
1936 	return 1;
1937       continue;
1938     }
1939     if (chr == line_term_char && terminator(line_term_ptr,line_term_length))
1940       return 0;
1941     line_cuted=1;
1942   }
1943 }
1944 
1945 
find_start_of_fields()1946 bool READ_INFO::find_start_of_fields()
1947 {
1948   int chr;
1949  try_again:
1950   do
1951   {
1952     if ((chr=GET) == my_b_EOF)
1953     {
1954       found_end_of_line=eof=1;
1955       return 1;
1956     }
1957   } while ((char) chr != line_start_ptr[0]);
1958   for (const char *ptr=line_start_ptr+1 ; ptr != line_start_end ; ptr++)
1959   {
1960     chr=GET;					// Eof will be checked later
1961     if ((char) chr != *ptr)
1962     {						// Can't be line_start
1963       PUSH(chr);
1964       while (--ptr != line_start_ptr)
1965       {						// Restart with next char
1966 	PUSH( *ptr);
1967       }
1968       goto try_again;
1969     }
1970   }
1971   return 0;
1972 }
1973 
1974 
1975 /*
1976   Clear taglist from tags with a specified level
1977 */
clear_level(int level_arg)1978 int READ_INFO::clear_level(int level_arg)
1979 {
1980   DBUG_ENTER("READ_INFO::read_xml clear_level");
1981   List_iterator<XML_TAG> xmlit(taglist);
1982   xmlit.rewind();
1983   XML_TAG *tag;
1984 
1985   while ((tag= xmlit++))
1986   {
1987      if(tag->level >= level_arg)
1988      {
1989        xmlit.remove();
1990        delete tag;
1991      }
1992   }
1993   DBUG_RETURN(0);
1994 }
1995 
1996 
1997 /*
1998   Convert an XML entity to Unicode value.
1999   Return -1 on error;
2000 */
2001 static int
my_xml_entity_to_char(const char * name,size_t length)2002 my_xml_entity_to_char(const char *name, size_t length)
2003 {
2004   if (length == 2)
2005   {
2006     if (!memcmp(name, "gt", length))
2007       return '>';
2008     if (!memcmp(name, "lt", length))
2009       return '<';
2010   }
2011   else if (length == 3)
2012   {
2013     if (!memcmp(name, "amp", length))
2014       return '&';
2015   }
2016   else if (length == 4)
2017   {
2018     if (!memcmp(name, "quot", length))
2019       return '"';
2020     if (!memcmp(name, "apos", length))
2021       return '\'';
2022   }
2023   return -1;
2024 }
2025 
2026 
2027 /**
2028   @brief Convert newline, linefeed, tab to space
2029 
2030   @param chr    character
2031 
2032   @details According to the "XML 1.0" standard,
2033            only space (#x20) characters, carriage returns,
2034            line feeds or tabs are considered as spaces.
2035            Convert all of them to space (#x20) for parsing simplicity.
2036 */
2037 static int
my_tospace(int chr)2038 my_tospace(int chr)
2039 {
2040   return (chr == '\t' || chr == '\r' || chr == '\n') ? ' ' : chr;
2041 }
2042 
2043 
2044 /*
2045   Read an xml value: handle multibyte and xml escape
2046 */
read_value(int delim,String * val)2047 int READ_INFO::read_value(int delim, String *val)
2048 {
2049   int chr;
2050   String tmp;
2051 
2052   for (chr= GET; my_tospace(chr) != delim && chr != my_b_EOF;)
2053   {
2054     uint ml;
2055     GET_MBCHARLEN(read_charset, chr, ml);
2056     if (ml == 0)
2057     {
2058       chr= my_b_EOF;
2059       val->length(0);
2060       return chr;
2061     }
2062 
2063     if (ml > 1)
2064     {
2065       DBUG_PRINT("read_xml",("multi byte"));
2066 
2067       for (uint i= 1; i < ml; i++)
2068       {
2069         val->append(chr);
2070         /*
2071           Don't use my_tospace() in the middle of a multi-byte character
2072           TODO: check that the multi-byte sequence is valid.
2073         */
2074         chr= GET;
2075         if (chr == my_b_EOF)
2076           return chr;
2077       }
2078     }
2079     if(chr == '&')
2080     {
2081       tmp.length(0);
2082       for (chr= my_tospace(GET) ; chr != ';' ; chr= my_tospace(GET))
2083       {
2084         if (chr == my_b_EOF)
2085           return chr;
2086         tmp.append(chr);
2087       }
2088       if ((chr= my_xml_entity_to_char(tmp.ptr(), tmp.length())) >= 0)
2089         val->append(chr);
2090       else
2091       {
2092         val->append('&');
2093         val->append(tmp);
2094         val->append(';');
2095       }
2096     }
2097     else
2098       val->append(chr);
2099     chr= GET;
2100   }
2101   return my_tospace(chr);
2102 }
2103 
2104 
2105 /*
2106   Read a record in xml format
2107   tags and attributes are stored in taglist
2108   when tag set in ROWS IDENTIFIED BY is closed, we are ready and return
2109 */
read_xml()2110 int READ_INFO::read_xml()
2111 {
2112   DBUG_ENTER("READ_INFO::read_xml");
2113   int chr, chr2, chr3;
2114   int delim= 0;
2115   String tag, attribute, value;
2116   bool in_tag= false;
2117 
2118   tag.length(0);
2119   attribute.length(0);
2120   value.length(0);
2121 
2122   for (chr= my_tospace(GET); chr != my_b_EOF ; )
2123   {
2124     switch(chr){
2125     case '<':  /* read tag */
2126         /* TODO: check if this is a comment <!-- comment -->  */
2127       chr= my_tospace(GET);
2128       if(chr == '!')
2129       {
2130         chr2= GET;
2131         chr3= GET;
2132 
2133         if(chr2 == '-' && chr3 == '-')
2134         {
2135           chr2= 0;
2136           chr3= 0;
2137           chr= my_tospace(GET);
2138 
2139           while(chr != '>' || chr2 != '-' || chr3 != '-')
2140           {
2141             if(chr == '-')
2142             {
2143               chr3= chr2;
2144               chr2= chr;
2145             }
2146             else if (chr2 == '-')
2147             {
2148               chr2= 0;
2149               chr3= 0;
2150             }
2151             chr= my_tospace(GET);
2152             if (chr == my_b_EOF)
2153               goto found_eof;
2154           }
2155           break;
2156         }
2157       }
2158 
2159       tag.length(0);
2160       while(chr != '>' && chr != ' ' && chr != '/' && chr != my_b_EOF)
2161       {
2162         if(chr != delim) /* fix for the '<field name =' format */
2163           tag.append(chr);
2164         chr= my_tospace(GET);
2165       }
2166 
2167       // row tag should be in ROWS IDENTIFIED BY '<row>' - stored in line_term
2168       if((tag.length() == line_term_length -2) &&
2169          (memcmp(tag.ptr(), line_term_ptr + 1, tag.length()) == 0))
2170       {
2171         DBUG_PRINT("read_xml", ("start-of-row: %i %s %s",
2172                                 level,tag.c_ptr_safe(), line_term_ptr));
2173       }
2174 
2175       if(chr == ' ' || chr == '>')
2176       {
2177         level++;
2178         clear_level(level + 1);
2179       }
2180 
2181       if (chr == ' ')
2182         in_tag= true;
2183       else
2184         in_tag= false;
2185       break;
2186 
2187     case ' ': /* read attribute */
2188       while(chr == ' ')  /* skip blanks */
2189         chr= my_tospace(GET);
2190 
2191       if(!in_tag)
2192         break;
2193 
2194       while(chr != '=' && chr != '/' && chr != '>' && chr != my_b_EOF)
2195       {
2196         attribute.append(chr);
2197         chr= my_tospace(GET);
2198       }
2199       break;
2200 
2201     case '>': /* end tag - read tag value */
2202       in_tag= false;
2203       /* Skip all whitespaces */
2204       while (' ' == (chr= my_tospace(GET)))
2205       {
2206       }
2207       /*
2208         Push the first non-whitespace char back to Stack. This char would be
2209         read in the upcoming call to read_value()
2210        */
2211       PUSH(chr);
2212       chr= read_value('<', &value);
2213       if(chr == my_b_EOF)
2214         goto found_eof;
2215 
2216       /* save value to list */
2217       if(tag.length() > 0 && value.length() > 0)
2218       {
2219         DBUG_PRINT("read_xml", ("lev:%i tag:%s val:%s",
2220                                 level,tag.c_ptr_safe(), value.c_ptr_safe()));
2221         taglist.push_front( new XML_TAG(level, tag, value));
2222       }
2223       tag.length(0);
2224       value.length(0);
2225       attribute.length(0);
2226       break;
2227 
2228     case '/': /* close tag */
2229       chr= my_tospace(GET);
2230       /* Decrease the 'level' only when (i) It's not an */
2231       /* (without space) empty tag i.e. <tag/> or, (ii) */
2232       /* It is of format <row col="val" .../>           */
2233       if(chr != '>' || in_tag)
2234       {
2235         level--;
2236         in_tag= false;
2237       }
2238       if(chr != '>')   /* if this is an empty tag <tag   /> */
2239         tag.length(0); /* we should keep tag value          */
2240       while(chr != '>' && chr != my_b_EOF)
2241       {
2242         tag.append(chr);
2243         chr= my_tospace(GET);
2244       }
2245 
2246       if((tag.length() == line_term_length -2) &&
2247          (memcmp(tag.ptr(), line_term_ptr + 1, tag.length()) == 0))
2248       {
2249          DBUG_PRINT("read_xml", ("found end-of-row %i %s",
2250                                  level, tag.c_ptr_safe()));
2251          DBUG_RETURN(0); //normal return
2252       }
2253       chr= my_tospace(GET);
2254       break;
2255 
2256     case '=': /* attribute name end - read the value */
2257       //check for tag field and attribute name
2258       if(!memcmp(tag.c_ptr_safe(), STRING_WITH_LEN("field")) &&
2259          !memcmp(attribute.c_ptr_safe(), STRING_WITH_LEN("name")))
2260       {
2261         /*
2262           this is format <field name="xx">xx</field>
2263           where actual fieldname is in attribute
2264         */
2265         delim= my_tospace(GET);
2266         tag.length(0);
2267         attribute.length(0);
2268         chr= '<'; /* we pretend that it is a tag */
2269         level--;
2270         break;
2271       }
2272 
2273       //check for " or '
2274       chr= GET;
2275       if (chr == my_b_EOF)
2276         goto found_eof;
2277       if(chr == '"' || chr == '\'')
2278       {
2279         delim= chr;
2280       }
2281       else
2282       {
2283         delim= ' '; /* no delimiter, use space */
2284         PUSH(chr);
2285       }
2286 
2287       chr= read_value(delim, &value);
2288       if(attribute.length() > 0 && value.length() > 0)
2289       {
2290         DBUG_PRINT("read_xml", ("lev:%i att:%s val:%s\n",
2291                                 level + 1,
2292                                 attribute.c_ptr_safe(),
2293                                 value.c_ptr_safe()));
2294         taglist.push_front(new XML_TAG(level + 1, attribute, value));
2295       }
2296       attribute.length(0);
2297       value.length(0);
2298       if (chr != ' ')
2299         chr= my_tospace(GET);
2300       break;
2301 
2302     default:
2303       chr= my_tospace(GET);
2304     } /* end switch */
2305   } /* end while */
2306 
2307 found_eof:
2308   DBUG_PRINT("read_xml",("Found eof"));
2309   eof= 1;
2310   DBUG_RETURN(1);
2311 }
2312