1 /*
2    Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
23 
24 
25 /* Copy data from a textfile to table */
26 /* 2006-12 Erik Wetterberg : LOAD XML added */
27 
28 #include "sql_load.h"
29 #include "sql_cache.h"                          // query_cache_*
30 #include "sql_base.h"          // fill_record_n_invoke_before_triggers
31 #include <my_dir.h>
32 #include "sql_view.h"                           // check_key_in_view
33 #include "sql_insert.h" // check_that_all_fields_are_given_values,
34                         // prepare_triggers_for_insert_stmt,
35                         // write_record
36 #include "auth_common.h"// INSERT_ACL, UPDATE_ACL
37 #include "log_event.h"  // Delete_file_log_event,
38                         // Execute_load_query_log_event,
39                         // LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F
40 #include <m_ctype.h>
41 #include "rpl_mi.h"
42 #include "rpl_slave.h"
43 #include "table_trigger_dispatcher.h"  // Table_trigger_dispatcher
44 #include "sql_show.h"
45 #include "item_timefunc.h"  // Item_func_now_local
46 #include "rpl_rli.h"     // Relay_log_info
47 #include "log.h"
48 
49 #include "pfs_file_provider.h"
50 #include "mysql/psi/mysql_file.h"
51 
52 #include <algorithm>
53 
54 using std::min;
55 using std::max;
56 
57 class XML_TAG {
58 public:
59   int level;
60   String field;
61   String value;
62   XML_TAG(int l, String f, String v);
63 };
64 
65 
XML_TAG(int l,String f,String v)66 XML_TAG::XML_TAG(int l, String f, String v)
67 {
68   level= l;
69   field.append(f);
70   value.append(v);
71 }
72 
73 
74 #define GET (stack_pos != stack ? *--stack_pos : my_b_get(&cache))
75 #define PUSH(A) *(stack_pos++)=(A)
76 
77 class READ_INFO {
78   File	file;
79   uchar	*buffer,			/* Buffer for read text */
80 	*end_of_buff;			/* Data in bufferts ends here */
81   uint	buff_length;			/* Length of buffer */
82   const uchar *field_term_ptr, *line_term_ptr;
83   const char *line_start_ptr, *line_start_end;
84   size_t	field_term_length,line_term_length,enclosed_length;
85   int	field_term_char,line_term_char,enclosed_char,escape_char;
86   int	*stack,*stack_pos;
87   bool	found_end_of_line,start_of_line,eof;
88   bool  need_end_io_cache;
89   IO_CACHE cache;
90   int level; /* for load xml */
91 
92 public:
93   bool error,line_cuted,found_null,enclosed;
94   uchar	*row_start,			/* Found row starts here */
95 	*row_end;			/* Found row ends here */
96   const CHARSET_INFO *read_charset;
97 
98   READ_INFO(File file,uint tot_length,const CHARSET_INFO *cs,
99 	    const String &field_term,
100             const String &line_start,
101             const String &line_term,
102 	    const String &enclosed,
103             int escape,bool get_it_from_net, bool is_fifo);
104   ~READ_INFO();
105   int read_field();
106   int read_fixed_length(void);
107   int next_line(void);
108   char unescape(char chr);
109   int terminator(const uchar *ptr, size_t length);
110   bool find_start_of_fields();
111   /* load xml */
112   List<XML_TAG> taglist;
113   int read_value(int delim, String *val);
114   int read_xml();
115   int clear_level(int level);
116 
117   /*
118     We need to force cache close before destructor is invoked to log
119     the last read block
120   */
end_io_cache()121   void end_io_cache()
122   {
123     ::end_io_cache(&cache);
124     need_end_io_cache = 0;
125   }
126 
127   /*
128     Either this method, or we need to make cache public
129     Arg must be set from mysql_load() since constructor does not see
130     either the table or THD value
131   */
set_io_cache_arg(void * arg)132   void set_io_cache_arg(void* arg) { cache.arg = arg; }
133 
134   /**
135     skip all data till the eof.
136   */
skip_data_till_eof()137   void skip_data_till_eof()
138   {
139     while (GET != my_b_EOF)
140       ;
141   }
142 };
143 
144 static int read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
145                              List<Item> &fields_vars, List<Item> &set_fields,
146                              List<Item> &set_values, READ_INFO &read_info,
147 			     ulong skip_lines);
148 static int read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
149                           List<Item> &fields_vars, List<Item> &set_fields,
150                           List<Item> &set_values, READ_INFO &read_info,
151 			  const String &enclosed, ulong skip_lines);
152 
153 static int read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
154                           List<Item> &fields_vars, List<Item> &set_fields,
155                           List<Item> &set_values, READ_INFO &read_info,
156                           ulong skip_lines);
157 
158 #ifndef EMBEDDED_LIBRARY
159 static bool write_execute_load_query_log_event(THD *thd, sql_exchange* ex,
160                                                const char* db_arg, /* table's database */
161                                                const char* table_name_arg,
162                                                bool is_concurrent,
163                                                enum enum_duplicates duplicates,
164                                                bool transactional_table,
165                                                int errocode);
166 #endif /* EMBEDDED_LIBRARY */
167 
168 /*
169   Execute LOAD DATA query
170 
171   SYNOPSYS
172     mysql_load()
173       thd - current thread
174       ex  - sql_exchange object representing source file and its parsing rules
175       table_list  - list of tables to which we are loading data
176       fields_vars - list of fields and variables to which we read
177                     data from file
178       set_fields  - list of fields mentioned in set clause
179       set_values  - expressions to assign to fields in previous list
180       handle_duplicates - indicates whenever we should emit error or
181                           replace row if we will meet duplicates.
182       read_file_from_client - is this LOAD DATA LOCAL ?
183 
184   RETURN VALUES
185     TRUE - error / FALSE - success
186 */
187 
mysql_load(THD * thd,sql_exchange * ex,TABLE_LIST * table_list,List<Item> & fields_vars,List<Item> & set_fields,List<Item> & set_values,enum enum_duplicates handle_duplicates,bool read_file_from_client)188 int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
189 	        List<Item> &fields_vars, List<Item> &set_fields,
190                 List<Item> &set_values,
191                 enum enum_duplicates handle_duplicates,
192                 bool read_file_from_client)
193 {
194   char name[FN_REFLEN];
195   File file;
196   int error= 0;
197   const String *field_term= ex->field.field_term;
198   const String *escaped=    ex->field.escaped;
199   const String *enclosed=   ex->field.enclosed;
200   bool is_fifo=0;
201   SELECT_LEX *select= thd->lex->select_lex;
202 #ifndef EMBEDDED_LIBRARY
203   LOAD_FILE_INFO lf_info;
204   THD::killed_state killed_status= THD::NOT_KILLED;
205   bool is_concurrent;
206   bool transactional_table;
207 #endif
208   const char *db = table_list->db;			// This is never null
209   /*
210     If path for file is not defined, we will use the current database.
211     If this is not set, we will use the directory where the table to be
212     loaded is located
213   */
214   const char *tdb= thd->db().str ? thd->db().str : db; //Result is never null
215   ulong skip_lines= ex->skip_lines;
216   DBUG_ENTER("mysql_load");
217 
218   /*
219     Bug #34283
220     mysqlbinlog leaves tmpfile after termination if binlog contains
221     load data infile, so in mixed mode we go to row-based for
222     avoiding the problem.
223   */
224   thd->set_current_stmt_binlog_format_row_if_mixed();
225 
226 #ifdef EMBEDDED_LIBRARY
227   read_file_from_client  = 0; //server is always in the same process
228 #endif
229 
230   if (escaped->length() > 1 || enclosed->length() > 1)
231   {
232     my_message(ER_WRONG_FIELD_TERMINATORS,ER(ER_WRONG_FIELD_TERMINATORS),
233 	       MYF(0));
234     DBUG_RETURN(TRUE);
235   }
236 
237   /* Report problems with non-ascii separators */
238   if (!escaped->is_ascii() || !enclosed->is_ascii() ||
239       !field_term->is_ascii() ||
240       !ex->line.line_term->is_ascii() || !ex->line.line_start->is_ascii())
241   {
242     push_warning(thd, Sql_condition::SL_WARNING,
243                  WARN_NON_ASCII_SEPARATOR_NOT_IMPLEMENTED,
244                  ER(WARN_NON_ASCII_SEPARATOR_NOT_IMPLEMENTED));
245   }
246 
247   if (open_and_lock_tables(thd, table_list, 0))
248     DBUG_RETURN(true);
249 
250   THD_STAGE_INFO(thd, stage_executing);
251   if (select->setup_tables(thd, table_list, false))
252     DBUG_RETURN(true);
253 
254   if (run_before_dml_hook(thd))
255     DBUG_RETURN(true);
256 
257   if (table_list->is_view() && select->resolve_derived(thd, false))
258     DBUG_RETURN(true);                   /* purecov: inspected */
259 
260   TABLE_LIST *const insert_table_ref=
261     table_list->is_updatable() &&        // View must be updatable
262     !table_list->is_multiple_tables() && // Multi-table view not allowed
263     !table_list->is_derived() ?          // derived tables not allowed
264     table_list->updatable_base_table() : NULL;
265 
266   if (insert_table_ref == NULL ||
267       check_key_in_view(thd, table_list, insert_table_ref))
268   {
269     my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "LOAD");
270     DBUG_RETURN(TRUE);
271   }
272   if (select->derived_table_count &&
273       select->check_view_privileges(thd, INSERT_ACL, SELECT_ACL))
274     DBUG_RETURN(true);                   /* purecov: inspected */
275 
276   if (table_list->is_merged())
277   {
278     if (table_list->prepare_check_option(thd))
279       DBUG_RETURN(TRUE);
280 
281     if (handle_duplicates == DUP_REPLACE &&
282         table_list->prepare_replace_filter(thd))
283       DBUG_RETURN(true);
284   }
285 
286   // Pass the check option down to the underlying table:
287   insert_table_ref->check_option= table_list->check_option;
288   /*
289     Let us emit an error if we are loading data to table which is used
290     in subselect in SET clause like we do it for INSERT.
291 
292     The main thing to fix to remove this restriction is to ensure that the
293     table is marked to be 'used for insert' in which case we should never
294     mark this table as 'const table' (ie, one that has only one row).
295   */
296   if (unique_table(thd, insert_table_ref, table_list->next_global, 0))
297   {
298     my_error(ER_UPDATE_TABLE_USED, MYF(0), table_list->table_name);
299     DBUG_RETURN(TRUE);
300   }
301 
302   TABLE *const table= insert_table_ref->table;
303 
304   for (Field **cur_field= table->field; *cur_field; ++cur_field)
305     (*cur_field)->reset_warnings();
306 
307 #ifndef EMBEDDED_LIBRARY
308   transactional_table= table->file->has_transactions();
309   is_concurrent= (table_list->lock_type == TL_WRITE_CONCURRENT_INSERT);
310 #endif
311 
312   if (!fields_vars.elements)
313   {
314     Field_iterator_table_ref field_iterator;
315     field_iterator.set(table_list);
316     for (; !field_iterator.end_of_fields(); field_iterator.next())
317     {
318       Item *item;
319       if (!(item= field_iterator.create_item(thd)))
320         DBUG_RETURN(TRUE);
321 
322       if (item->field_for_view_update() == NULL)
323       {
324         my_error(ER_NONUPDATEABLE_COLUMN, MYF(0), item->item_name.ptr());
325         DBUG_RETURN(true);
326       }
327       fields_vars.push_back(item->real_item());
328     }
329     bitmap_set_all(table->write_set);
330     /*
331       Let us also prepare SET clause, altough it is probably empty
332       in this case.
333     */
334     if (setup_fields(thd, Ref_ptr_array(), set_fields, INSERT_ACL, NULL,
335                      false, true) ||
336         setup_fields(thd, Ref_ptr_array(), set_values, SELECT_ACL, NULL,
337                      false, false))
338       DBUG_RETURN(TRUE);
339   }
340   else
341   {						// Part field list
342     /*
343       Because fields_vars may contain user variables,
344       pass false for column_update in first call below.
345     */
346     if (setup_fields(thd, Ref_ptr_array(), fields_vars, INSERT_ACL, NULL,
347                      false, false) ||
348         setup_fields(thd, Ref_ptr_array(), set_fields, INSERT_ACL, NULL,
349                      false, true))
350       DBUG_RETURN(TRUE);
351 
352     /*
353       Special updatability test is needed because fields_vars may contain
354       a mix of column references and user variables.
355     */
356     Item *item;
357     List_iterator<Item> it(fields_vars);
358     while ((item= it++))
359     {
360       if ((item->type() == Item::FIELD_ITEM ||
361            item->type() == Item::REF_ITEM) &&
362           item->field_for_view_update() == NULL)
363       {
364         my_error(ER_NONUPDATEABLE_COLUMN, MYF(0), item->item_name.ptr());
365         DBUG_RETURN(true);
366       }
367     }
368     /* We explicitly ignore the return value */
369     (void)check_that_all_fields_are_given_values(thd, table, table_list);
370     /* Fix the expressions in SET clause */
371     if (setup_fields(thd, Ref_ptr_array(), set_values, SELECT_ACL, NULL,
372                      false, false))
373       DBUG_RETURN(TRUE);
374   }
375 
376   const int escape_char= (escaped->length() && (ex->escaped_given() ||
377                           !(thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES)))
378                           ? (*escaped)[0] : INT_MAX;
379 
380   /*
381     * LOAD DATA INFILE fff INTO TABLE xxx SET columns2
382     sets all columns, except if file's row lacks some: in that case,
383     defaults are set by read_fixed_length() and read_sep_field(),
384     not by COPY_INFO.
385     * LOAD DATA INFILE fff INTO TABLE xxx (columns1) SET columns2=
386     may need a default for columns other than columns1 and columns2.
387   */
388   const bool manage_defaults= fields_vars.elements != 0;
389   COPY_INFO info(COPY_INFO::INSERT_OPERATION,
390                  &fields_vars, &set_fields,
391                  manage_defaults,
392                  handle_duplicates, escape_char);
393 
394   if (info.add_function_default_columns(table, table->write_set))
395     DBUG_RETURN(TRUE);
396 
397   prepare_triggers_for_insert_stmt(table);
398 
399   uint tot_length=0;
400   bool use_blobs= 0, use_vars= 0;
401   List_iterator_fast<Item> it(fields_vars);
402   Item *item;
403 
404   while ((item= it++))
405   {
406     Item *real_item= item->real_item();
407 
408     if (real_item->type() == Item::FIELD_ITEM)
409     {
410       Field *field= ((Item_field*)real_item)->field;
411       if (field->flags & BLOB_FLAG)
412       {
413         use_blobs= 1;
414         tot_length+= 256;			// Will be extended if needed
415       }
416       else
417         tot_length+= field->field_length;
418     }
419     else if (item->type() == Item::STRING_ITEM)
420       use_vars= 1;
421   }
422   if (use_blobs && !ex->line.line_term->length() && !field_term->length())
423   {
424     my_message(ER_BLOBS_AND_NO_TERMINATED,ER(ER_BLOBS_AND_NO_TERMINATED),
425 	       MYF(0));
426     DBUG_RETURN(TRUE);
427   }
428   if (use_vars && !field_term->length() && !enclosed->length())
429   {
430     my_error(ER_LOAD_FROM_FIXED_SIZE_ROWS_TO_VAR, MYF(0));
431     DBUG_RETURN(TRUE);
432   }
433 
434 #ifndef EMBEDDED_LIBRARY
435   if (read_file_from_client)
436   {
437     (void)net_request_file(thd->get_protocol_classic()->get_net(),
438                            ex->file_name);
439     file = -1;
440   }
441   else
442 #endif
443   {
444     if (!dirname_length(ex->file_name))
445     {
446       strxnmov(name, FN_REFLEN-1, mysql_real_data_home, tdb, NullS);
447       (void) fn_format(name, ex->file_name, name, "",
448 		       MY_RELATIVE_PATH | MY_UNPACK_FILENAME);
449     }
450     else
451     {
452       (void) fn_format(name, ex->file_name, mysql_real_data_home, "",
453                        MY_RELATIVE_PATH | MY_UNPACK_FILENAME |
454                        MY_RETURN_REAL_PATH);
455     }
456 
457     if ((thd->system_thread &
458          (SYSTEM_THREAD_SLAVE_SQL | SYSTEM_THREAD_SLAVE_WORKER)) != 0)
459     {
460 #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
461       Relay_log_info* rli= thd->rli_slave->get_c_rli();
462 
463       if (strncmp(rli->slave_patternload_file, name,
464                   rli->slave_patternload_file_size))
465       {
466         /*
467           LOAD DATA INFILE in the slave SQL Thread can only read from
468           --slave-load-tmpdir". This should never happen. Please, report a bug.
469         */
470 
471         sql_print_error("LOAD DATA INFILE in the slave SQL Thread can only read from --slave-load-tmpdir. " \
472                         "Please, report a bug.");
473         my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--slave-load-tmpdir");
474         DBUG_RETURN(TRUE);
475       }
476 #else
477       /*
478         This is impossible and should never happen.
479       */
480       DBUG_ASSERT(FALSE);
481 #endif
482     }
483     else if (!is_secure_file_path(name))
484     {
485       /* Read only allowed from within dir specified by secure_file_priv */
486       my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--secure-file-priv");
487       DBUG_RETURN(TRUE);
488     }
489 
490 #if !defined(_WIN32)
491     MY_STAT stat_info;
492     if (!my_stat(name, &stat_info, MYF(MY_WME)))
493       DBUG_RETURN(TRUE);
494 
495     // if we are not in slave thread, the file must be:
496     if (!thd->slave_thread &&
497         !((stat_info.st_mode & S_IFLNK) != S_IFLNK &&   // symlink
498           ((stat_info.st_mode & S_IFREG) == S_IFREG ||  // regular file
499            (stat_info.st_mode & S_IFIFO) == S_IFIFO)))  // named pipe
500     {
501       my_error(ER_TEXTFILE_NOT_READABLE, MYF(0), name);
502       DBUG_RETURN(TRUE);
503     }
504     if ((stat_info.st_mode & S_IFIFO) == S_IFIFO)
505       is_fifo= 1;
506 #endif
507     if ((file= mysql_file_open(key_file_load,
508                                name, O_RDONLY, MYF(MY_WME))) < 0)
509 
510       DBUG_RETURN(TRUE);
511   }
512 
513   READ_INFO read_info(file,tot_length,
514                       ex->cs ? ex->cs : thd->variables.collation_database,
515 		      *field_term,*ex->line.line_start, *ex->line.line_term,
516                       *enclosed,
517 		      info.escape_char, read_file_from_client, is_fifo);
518   if (read_info.error)
519   {
520     if (file >= 0)
521       mysql_file_close(file, MYF(0));           // no files in net reading
522     DBUG_RETURN(TRUE);				// Can't allocate buffers
523   }
524 
525 #ifndef EMBEDDED_LIBRARY
526   if (mysql_bin_log.is_open())
527   {
528     lf_info.thd = thd;
529     lf_info.wrote_create_file = 0;
530     lf_info.last_pos_in_file = HA_POS_ERROR;
531     lf_info.log_delayed= transactional_table;
532     read_info.set_io_cache_arg((void*) &lf_info);
533   }
534 #endif /*!EMBEDDED_LIBRARY*/
535 
536   thd->count_cuted_fields= CHECK_FIELD_WARN;		/* calc cuted fields */
537   thd->cuted_fields=0L;
538   /* Skip lines if there is a line terminator */
539   if (ex->line.line_term->length() && ex->filetype != FILETYPE_XML)
540   {
541     /* ex->skip_lines needs to be preserved for logging */
542     while (skip_lines > 0)
543     {
544       skip_lines--;
545       if (read_info.next_line())
546 	break;
547     }
548   }
549 
550   if (!(error=MY_TEST(read_info.error)))
551   {
552 
553     table->next_number_field=table->found_next_number_field;
554     if (thd->lex->is_ignore() ||
555 	handle_duplicates == DUP_REPLACE)
556       table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
557     if (handle_duplicates == DUP_REPLACE &&
558         (!table->triggers ||
559          !table->triggers->has_delete_triggers()))
560         table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE);
561     if (thd->locked_tables_mode <= LTM_LOCK_TABLES)
562       table->file->ha_start_bulk_insert((ha_rows) 0);
563     table->copy_blobs=1;
564 
565     if (ex->filetype == FILETYPE_XML) /* load xml */
566       error= read_xml_field(thd, info, insert_table_ref, fields_vars,
567                             set_fields, set_values, read_info,
568                             skip_lines);
569     else if (!field_term->length() && !enclosed->length())
570       error= read_fixed_length(thd, info, insert_table_ref, fields_vars,
571                                set_fields, set_values, read_info,
572 			       skip_lines);
573     else
574       error= read_sep_field(thd, info, insert_table_ref, fields_vars,
575                             set_fields, set_values, read_info,
576 			    *enclosed, skip_lines);
577     if (thd->locked_tables_mode <= LTM_LOCK_TABLES &&
578         table->file->ha_end_bulk_insert() && !error)
579     {
580       table->file->print_error(my_errno(), MYF(0));
581       error= 1;
582     }
583     table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
584     table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE);
585     table->next_number_field=0;
586   }
587   if (file >= 0)
588     mysql_file_close(file, MYF(0));
589   free_blobs(table);				/* if pack_blob was used */
590   table->copy_blobs=0;
591   thd->count_cuted_fields= CHECK_FIELD_IGNORE;
592   /*
593      simulated killing in the middle of per-row loop
594      must be effective for binlogging
595   */
596   DBUG_EXECUTE_IF("simulate_kill_bug27571",
597                   {
598                     error=1;
599                     thd->killed= THD::KILL_QUERY;
600                   };);
601 
602 #ifndef EMBEDDED_LIBRARY
603   killed_status= (error == 0) ? THD::NOT_KILLED : thd->killed;
604 #endif
605 
606   /*
607     We must invalidate the table in query cache before binlog writing and
608     ha_autocommit_...
609   */
610   query_cache.invalidate_single(thd, insert_table_ref, false);
611   if (error)
612   {
613     if (read_file_from_client)
614       read_info.skip_data_till_eof();
615 
616 #ifndef EMBEDDED_LIBRARY
617     if (mysql_bin_log.is_open())
618     {
619       {
620 	/*
621 	  Make sure last block (the one which caused the error) gets
622 	  logged.  This is needed because otherwise after write of (to
623 	  the binlog, not to read_info (which is a cache))
624 	  Delete_file_log_event the bad block will remain in read_info
625 	  (because pre_read is not called at the end of the last
626 	  block; remember pre_read is called whenever a new block is
627 	  read from disk).  At the end of mysql_load(), the destructor
628 	  of read_info will call end_io_cache() which will flush
629 	  read_info, so we will finally have this in the binlog:
630 
631 	  Append_block # The last successfull block
632 	  Delete_file
633 	  Append_block # The failing block
634 	  which is nonsense.
635 	  Or could also be (for a small file)
636 	  Create_file  # The failing block
637 	  which is nonsense (Delete_file is not written in this case, because:
638 	  Create_file has not been written, so Delete_file is not written, then
639 	  when read_info is destroyed end_io_cache() is called which writes
640 	  Create_file.
641 	*/
642 	read_info.end_io_cache();
643 	/* If the file was not empty, wrote_create_file is true */
644 	if (lf_info.wrote_create_file)
645 	{
646           int errcode= query_error_code(thd, killed_status == THD::NOT_KILLED);
647 
648           /* since there is already an error, the possible error of
649              writing binary log will be ignored */
650 	  if (thd->get_transaction()->cannot_safely_rollback(
651 	      Transaction_ctx::STMT))
652             (void) write_execute_load_query_log_event(thd, ex,
653                                                       table_list->db,
654                                                       table_list->table_name,
655                                                       is_concurrent,
656                                                       handle_duplicates,
657                                                       transactional_table,
658                                                       errcode);
659 	  else
660 	  {
661 	    Delete_file_log_event d(thd, db, transactional_table);
662 	    (void) mysql_bin_log.write_event(&d);
663 	  }
664 	}
665       }
666     }
667 #endif /*!EMBEDDED_LIBRARY*/
668     error= -1;				// Error on read
669     goto err;
670   }
671 
672   my_snprintf(name, sizeof(name),
673               ER(ER_LOAD_INFO),
674               (long) info.stats.records, (long) info.stats.deleted,
675               (long) (info.stats.records - info.stats.copied),
676               (long) thd->get_stmt_da()->current_statement_cond_count());
677 
678 #ifndef EMBEDDED_LIBRARY
679   if (mysql_bin_log.is_open())
680   {
681     /*
682       We need to do the job that is normally done inside
683       binlog_query() here, which is to ensure that the pending event
684       is written before tables are unlocked and before any other
685       events are written.  We also need to update the table map
686       version for the binary log to mark that table maps are invalid
687       after this point.
688      */
689     if (thd->is_current_stmt_binlog_format_row())
690       error= thd->binlog_flush_pending_rows_event(TRUE, transactional_table);
691     else
692     {
693       /*
694         As already explained above, we need to call end_io_cache() or the last
695         block will be logged only after Execute_load_query_log_event (which is
696         wrong), when read_info is destroyed.
697       */
698       read_info.end_io_cache();
699       if (lf_info.wrote_create_file)
700       {
701         int errcode= query_error_code(thd, killed_status == THD::NOT_KILLED);
702         error= write_execute_load_query_log_event(thd, ex,
703                                                   table_list->db, table_list->table_name,
704                                                   is_concurrent,
705                                                   handle_duplicates,
706                                                   transactional_table,
707                                                   errcode);
708       }
709 
710       /*
711         Flushing the IO CACHE while writing the execute load query log event
712         may result in error (for instance, because the max_binlog_size has been
713         reached, and rotation of the binary log failed).
714       */
715       error= error || mysql_bin_log.get_log_file()->error;
716     }
717     if (error)
718       goto err;
719   }
720 #endif /*!EMBEDDED_LIBRARY*/
721 
722   /* ok to client sent only after binlog write and engine commit */
723   my_ok(thd, info.stats.copied + info.stats.deleted, 0L, name);
724 err:
725   DBUG_ASSERT(table->file->has_transactions() ||
726               !(info.stats.copied || info.stats.deleted) ||
727               thd->get_transaction()->cannot_safely_rollback(
728                 Transaction_ctx::STMT));
729   table->file->ha_release_auto_increment();
730   table->auto_increment_field_not_null= FALSE;
731   DBUG_RETURN(error);
732 }
733 
734 
735 #ifndef EMBEDDED_LIBRARY
736 
737 /* Not a very useful function; just to avoid duplication of code */
write_execute_load_query_log_event(THD * thd,sql_exchange * ex,const char * db_arg,const char * table_name_arg,bool is_concurrent,enum enum_duplicates duplicates,bool transactional_table,int errcode)738 static bool write_execute_load_query_log_event(THD *thd, sql_exchange* ex,
739                                                const char* db_arg,  /* table's database */
740                                                const char* table_name_arg,
741                                                bool is_concurrent,
742                                                enum enum_duplicates duplicates,
743                                                bool transactional_table,
744                                                int errcode)
745 {
746   char                *load_data_query,
747                       *end,
748                       *fname_start,
749                       *fname_end,
750                       *p= NULL;
751   size_t               pl= 0;
752   List<Item>           fv;
753   Item                *item;
754   String              *str;
755   String               pfield, pfields;
756   int                  n;
757   const char          *tbl= table_name_arg;
758   const char          *tdb= (thd->db().str != NULL ? thd->db().str : db_arg);
759   String              string_buf;
760   if (thd->db().str == NULL || strcmp(db_arg, thd->db().str))
761   {
762     /*
763       If used database differs from table's database,
764       prefix table name with database name so that it
765       becomes a FQ name.
766      */
767     string_buf.set_charset(system_charset_info);
768     append_identifier(thd, &string_buf, db_arg, strlen(db_arg));
769     string_buf.append(".");
770   }
771   append_identifier(thd, &string_buf, table_name_arg,
772                     strlen(table_name_arg));
773   tbl= string_buf.c_ptr_safe();
774   Load_log_event       lle(thd, ex, tdb, tbl, fv, is_concurrent,
775                            duplicates, thd->lex->is_ignore(),
776                            transactional_table);
777 
778   /*
779     force in a LOCAL if there was one in the original.
780   */
781   if (thd->lex->local_file)
782     lle.set_fname_outside_temp_buf(ex->file_name, strlen(ex->file_name));
783 
784   /*
785     prepare fields-list and SET if needed; print_query won't do that for us.
786   */
787   if (!thd->lex->load_field_list.is_empty())
788   {
789     List_iterator<Item> li(thd->lex->load_field_list);
790 
791     pfields.append(" (");
792     n= 0;
793 
794     while ((item= li++))
795     {
796       if (n++)
797         pfields.append(", ");
798       if (item->type() == Item::FIELD_ITEM ||
799                  item->type() == Item::REF_ITEM)
800         append_identifier(thd, &pfields, item->item_name.ptr(),
801                           strlen(item->item_name.ptr()));
802       else
803         item->print(&pfields, QT_ORDINARY);
804     }
805     pfields.append(")");
806   }
807 
808   if (!thd->lex->load_update_list.is_empty())
809   {
810     List_iterator<Item> lu(thd->lex->load_update_list);
811     List_iterator<String> ls(thd->lex->load_set_str_list);
812 
813     pfields.append(" SET ");
814     n= 0;
815 
816     while ((item= lu++))
817     {
818       str= ls++;
819       if (n++)
820         pfields.append(", ");
821       append_identifier(thd, &pfields, item->item_name.ptr(),
822                         strlen(item->item_name.ptr()));
823       // Extract exact Item value
824       str->copy();
825       pfields.append(str->ptr());
826       str->mem_free();
827     }
828     /*
829       Clear the SET string list once the SET command is reconstructed
830       as we donot require the list anymore.
831     */
832     thd->lex->load_set_str_list.empty();
833   }
834 
835   p= pfields.c_ptr_safe();
836   pl= strlen(p);
837 
838   if (!(load_data_query= (char *)thd->alloc(lle.get_query_buffer_length() + 1 + pl)))
839     return TRUE;
840 
841   lle.print_query(FALSE, ex->cs ? ex->cs->csname : NULL,
842                   load_data_query, &end,
843                   &fname_start, &fname_end);
844 
845   strcpy(end, p);
846   end += pl;
847 
848   Execute_load_query_log_event
849     e(thd, load_data_query, end-load_data_query,
850       static_cast<uint>(fname_start - load_data_query - 1),
851       static_cast<uint>(fname_end - load_data_query),
852       (duplicates == DUP_REPLACE) ? binary_log::LOAD_DUP_REPLACE :
853       (thd->lex->is_ignore() ? binary_log::LOAD_DUP_IGNORE :
854                                binary_log::LOAD_DUP_ERROR),
855       transactional_table, FALSE, FALSE, errcode);
856   return mysql_bin_log.write_event(&e);
857 }
858 
859 #endif
860 
861 /****************************************************************************
862 ** Read of rows of fixed size + optional garbage + optional newline
863 ****************************************************************************/
864 
865 static int
read_fixed_length(THD * thd,COPY_INFO & info,TABLE_LIST * table_list,List<Item> & fields_vars,List<Item> & set_fields,List<Item> & set_values,READ_INFO & read_info,ulong skip_lines)866 read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
867                   List<Item> &fields_vars, List<Item> &set_fields,
868                   List<Item> &set_values, READ_INFO &read_info,
869                   ulong skip_lines)
870 {
871   List_iterator_fast<Item> it(fields_vars);
872   TABLE *table= table_list->table;
873   bool err;
874   DBUG_ENTER("read_fixed_length");
875 
876   while (!read_info.read_fixed_length())
877   {
878     if (thd->killed)
879     {
880       thd->send_kill_message();
881       DBUG_RETURN(1);
882     }
883     if (skip_lines)
884     {
885       /*
886 	We could implement this with a simple seek if:
887 	- We are not using DATA INFILE LOCAL
888 	- escape character is  ""
889 	- line starting prefix is ""
890       */
891       skip_lines--;
892       continue;
893     }
894     it.rewind();
895     uchar *pos=read_info.row_start;
896 
897     restore_record(table, s->default_values);
898     /*
899       Check whether default values of the fields not specified in column list
900       are correct or not.
901     */
902     if (validate_default_values_of_unset_fields(thd, table))
903     {
904       read_info.error= true;
905       break;
906     }
907 
908     Item *item;
909     while ((item= it++))
910     {
911       /*
912         There is no variables in fields_vars list in this format so
913         this conversion is safe (no need to check for STRING_ITEM).
914       */
915       DBUG_ASSERT(item->real_item()->type() == Item::FIELD_ITEM);
916       Item_field *sql_field= static_cast<Item_field*>(item->real_item());
917       Field *field= sql_field->field;
918       if (field == table->next_number_field)
919         table->auto_increment_field_not_null= TRUE;
920       /*
921         No fields specified in fields_vars list can be null in this format.
922         Mark field as not null, we should do this for each row because of
923         restore_record...
924       */
925       field->set_notnull();
926 
927       if (pos == read_info.row_end)
928       {
929         thd->cuted_fields++;			/* Not enough fields */
930         push_warning_printf(thd, Sql_condition::SL_WARNING,
931                             ER_WARN_TOO_FEW_RECORDS,
932                             ER(ER_WARN_TOO_FEW_RECORDS),
933                             thd->get_stmt_da()->current_row_for_condition());
934         if (field->type() == FIELD_TYPE_TIMESTAMP && !field->maybe_null())
935         {
936           // Specific of TIMESTAMP NOT NULL: set to CURRENT_TIMESTAMP.
937           Item_func_now_local::store_in(field);
938         }
939       }
940       else
941       {
942 	uint length;
943 	uchar save_chr;
944 	if ((length=(uint) (read_info.row_end-pos)) >
945 	    field->field_length)
946 	  length=field->field_length;
947 	save_chr=pos[length]; pos[length]='\0'; // Safeguard aganst malloc
948         field->store((char*) pos,length,read_info.read_charset);
949 	pos[length]=save_chr;
950 	if ((pos+=length) > read_info.row_end)
951 	  pos= read_info.row_end;	/* Fills rest with space */
952       }
953     }
954     if (pos != read_info.row_end)
955     {
956       thd->cuted_fields++;			/* To long row */
957       push_warning_printf(thd, Sql_condition::SL_WARNING,
958                           ER_WARN_TOO_MANY_RECORDS,
959                           ER(ER_WARN_TOO_MANY_RECORDS),
960                           thd->get_stmt_da()->current_row_for_condition());
961     }
962 
963     if (thd->killed ||
964         fill_record_n_invoke_before_triggers(thd, &info, set_fields,
965                                              set_values, table,
966                                              TRG_EVENT_INSERT,
967                                              table->s->fields))
968       DBUG_RETURN(1);
969 
970     switch (table_list->view_check_option(thd)) {
971     case VIEW_CHECK_SKIP:
972       read_info.next_line();
973       goto continue_loop;
974     case VIEW_CHECK_ERROR:
975       DBUG_RETURN(-1);
976     }
977 
978     err= write_record(thd, table, &info, NULL);
979     table->auto_increment_field_not_null= FALSE;
980     if (err)
981       DBUG_RETURN(1);
982 
983     /*
984       We don't need to reset auto-increment field since we are restoring
985       its default value at the beginning of each loop iteration.
986     */
987     if (read_info.next_line())			// Skip to next line
988       break;
989     if (read_info.line_cuted)
990     {
991       thd->cuted_fields++;			/* To long row */
992       push_warning_printf(thd, Sql_condition::SL_WARNING,
993                           ER_WARN_TOO_MANY_RECORDS,
994                           ER(ER_WARN_TOO_MANY_RECORDS),
995                           thd->get_stmt_da()->current_row_for_condition());
996     }
997     thd->get_stmt_da()->inc_current_row_for_condition();
998 continue_loop:;
999   }
1000   DBUG_RETURN(MY_TEST(read_info.error));
1001 }
1002 
1003 
1004 class Field_tmp_nullability_guard
1005 {
1006 public:
Field_tmp_nullability_guard(Item * item)1007   explicit Field_tmp_nullability_guard(Item *item)
1008    :m_field(NULL)
1009   {
1010     if (item->type() == Item::FIELD_ITEM)
1011     {
1012       m_field= ((Item_field *) item)->field;
1013       /*
1014         Enable temporary nullability for items that corresponds
1015         to table fields.
1016       */
1017       m_field->set_tmp_nullable();
1018     }
1019   }
1020 
~Field_tmp_nullability_guard()1021   ~Field_tmp_nullability_guard()
1022   {
1023     if (m_field)
1024       m_field->reset_tmp_nullable();
1025   }
1026 
1027 private:
1028   Field *m_field;
1029 };
1030 
1031 
1032 static int
read_sep_field(THD * thd,COPY_INFO & info,TABLE_LIST * table_list,List<Item> & fields_vars,List<Item> & set_fields,List<Item> & set_values,READ_INFO & read_info,const String & enclosed,ulong skip_lines)1033 read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
1034                List<Item> &fields_vars, List<Item> &set_fields,
1035                List<Item> &set_values, READ_INFO &read_info,
1036 	       const String &enclosed, ulong skip_lines)
1037 {
1038   List_iterator_fast<Item> it(fields_vars);
1039   Item *item;
1040   TABLE *table= table_list->table;
1041   size_t enclosed_length;
1042   bool err;
1043   DBUG_ENTER("read_sep_field");
1044 
1045   enclosed_length=enclosed.length();
1046 
1047   for (;;it.rewind())
1048   {
1049     if (thd->killed)
1050     {
1051       thd->send_kill_message();
1052       DBUG_RETURN(1);
1053     }
1054 
1055     restore_record(table, s->default_values);
1056     /*
1057       Check whether default values of the fields not specified in column list
1058       are correct or not.
1059     */
1060     if (validate_default_values_of_unset_fields(thd, table))
1061     {
1062       read_info.error= true;
1063       break;
1064     }
1065 
1066     while ((item= it++))
1067     {
1068       uint length;
1069       uchar *pos;
1070       Item *real_item;
1071 
1072       if (read_info.read_field())
1073 	break;
1074 
1075       /* If this line is to be skipped we don't want to fill field or var */
1076       if (skip_lines)
1077         continue;
1078 
1079       pos=read_info.row_start;
1080       length=(uint) (read_info.row_end-pos);
1081 
1082       real_item= item->real_item();
1083 
1084       Field_tmp_nullability_guard fld_tmp_nullability_guard(real_item);
1085 
1086       if ((!read_info.enclosed &&
1087 	  (enclosed_length && length == 4 &&
1088            !memcmp(pos, STRING_WITH_LEN("NULL")))) ||
1089 	  (length == 1 && read_info.found_null))
1090       {
1091 
1092         if (real_item->type() == Item::FIELD_ITEM)
1093         {
1094           Field *field= ((Item_field *)real_item)->field;
1095           if (field->reset())                   // Set to 0
1096           {
1097             my_error(ER_WARN_NULL_TO_NOTNULL, MYF(0), field->field_name,
1098                      thd->get_stmt_da()->current_row_for_condition());
1099             DBUG_RETURN(1);
1100           }
1101           if (!field->real_maybe_null() &&
1102               field->type() == FIELD_TYPE_TIMESTAMP)
1103           {
1104             // Specific of TIMESTAMP NOT NULL: set to CURRENT_TIMESTAMP.
1105             Item_func_now_local::store_in(field);
1106           }
1107           else
1108           {
1109             /*
1110               Set field to NULL. Later we will clear temporary nullability flag
1111               and check NOT NULL constraint.
1112             */
1113             field->set_null();
1114           }
1115 	}
1116         else if (item->type() == Item::STRING_ITEM)
1117         {
1118           DBUG_ASSERT(NULL != dynamic_cast<Item_user_var_as_out_param*>(item));
1119           ((Item_user_var_as_out_param *)item)->set_null_value(
1120                                                   read_info.read_charset);
1121         }
1122 
1123 	continue;
1124       }
1125 
1126       if (real_item->type() == Item::FIELD_ITEM)
1127       {
1128         Field *field= ((Item_field *)real_item)->field;
1129         field->set_notnull();
1130         read_info.row_end[0]=0;			// Safe to change end marker
1131         if (field == table->next_number_field)
1132           table->auto_increment_field_not_null= TRUE;
1133         field->store((char*) pos, length, read_info.read_charset);
1134       }
1135       else if (item->type() == Item::STRING_ITEM)
1136       {
1137         DBUG_ASSERT(NULL != dynamic_cast<Item_user_var_as_out_param*>(item));
1138         ((Item_user_var_as_out_param *)item)->set_value((char*) pos, length,
1139                                                         read_info.read_charset);
1140       }
1141     }
1142 
1143     if (thd->is_error())
1144       read_info.error= true;
1145 
1146     if (read_info.error)
1147       break;
1148     if (skip_lines)
1149     {
1150       skip_lines--;
1151       continue;
1152     }
1153     if (item)
1154     {
1155       /* Have not read any field, thus input file is simply ended */
1156       if (item == fields_vars.head())
1157 	break;
1158       for (; item ; item= it++)
1159       {
1160         Item *real_item= item->real_item();
1161         if (real_item->type() == Item::FIELD_ITEM)
1162         {
1163           Field *field= ((Item_field *)real_item)->field;
1164           /*
1165             We set to 0. But if the field is DEFAULT NULL, the "null bit"
1166             turned on by restore_record() above remains so field will be NULL.
1167           */
1168           if (field->reset())
1169           {
1170             my_error(ER_WARN_NULL_TO_NOTNULL, MYF(0),field->field_name,
1171                      thd->get_stmt_da()->current_row_for_condition());
1172             DBUG_RETURN(1);
1173           }
1174           if (field->type() == FIELD_TYPE_TIMESTAMP && !field->maybe_null())
1175             // Specific of TIMESTAMP NOT NULL: set to CURRENT_TIMESTAMP.
1176             Item_func_now_local::store_in(field);
1177           /*
1178             QQ: We probably should not throw warning for each field.
1179             But how about intention to always have the same number
1180             of warnings in THD::cuted_fields (and get rid of cuted_fields
1181             in the end ?)
1182           */
1183           thd->cuted_fields++;
1184           push_warning_printf(thd, Sql_condition::SL_WARNING,
1185                               ER_WARN_TOO_FEW_RECORDS,
1186                               ER(ER_WARN_TOO_FEW_RECORDS),
1187                               thd->get_stmt_da()->current_row_for_condition());
1188         }
1189         else if (item->type() == Item::STRING_ITEM)
1190         {
1191           DBUG_ASSERT(NULL != dynamic_cast<Item_user_var_as_out_param*>(item));
1192           ((Item_user_var_as_out_param *)item)->set_null_value(
1193                                                   read_info.read_charset);
1194         }
1195       }
1196     }
1197 
1198     if (thd->killed ||
1199         fill_record_n_invoke_before_triggers(thd, &info, set_fields,
1200                                              set_values, table,
1201                                              TRG_EVENT_INSERT,
1202                                              table->s->fields))
1203       DBUG_RETURN(1);
1204 
1205     if (!table->triggers)
1206     {
1207       /*
1208         If there is no trigger for the table then check the NOT NULL constraint
1209         for every table field.
1210 
1211         For the table that has BEFORE-INSERT trigger installed checking for
1212         NOT NULL constraint is done inside function
1213         fill_record_n_invoke_before_triggers() after all trigger instructions
1214         has been executed.
1215       */
1216       it.rewind();
1217 
1218       while ((item= it++))
1219       {
1220         Item *real_item= item->real_item();
1221         if (real_item->type() == Item::FIELD_ITEM)
1222           ((Item_field *) real_item)->field->check_constraints(ER_WARN_NULL_TO_NOTNULL);
1223       }
1224     }
1225 
1226     if (thd->is_error())
1227       DBUG_RETURN(1);
1228 
1229     switch (table_list->view_check_option(thd)) {
1230     case VIEW_CHECK_SKIP:
1231       read_info.next_line();
1232       goto continue_loop;
1233     case VIEW_CHECK_ERROR:
1234       DBUG_RETURN(-1);
1235     }
1236 
1237     err= write_record(thd, table, &info, NULL);
1238     table->auto_increment_field_not_null= FALSE;
1239     if (err)
1240       DBUG_RETURN(1);
1241     /*
1242       We don't need to reset auto-increment field since we are restoring
1243       its default value at the beginning of each loop iteration.
1244     */
1245     if (read_info.next_line())			// Skip to next line
1246       break;
1247     if (read_info.line_cuted)
1248     {
1249       thd->cuted_fields++;			/* To long row */
1250       push_warning_printf(thd, Sql_condition::SL_WARNING,
1251                           ER_WARN_TOO_MANY_RECORDS, ER(ER_WARN_TOO_MANY_RECORDS),
1252                           thd->get_stmt_da()->current_row_for_condition());
1253       if (thd->killed)
1254         DBUG_RETURN(1);
1255     }
1256     thd->get_stmt_da()->inc_current_row_for_condition();
1257 continue_loop:;
1258   }
1259   DBUG_RETURN(MY_TEST(read_info.error));
1260 }
1261 
1262 
1263 /****************************************************************************
1264 ** Read rows in xml format
1265 ****************************************************************************/
1266 static int
read_xml_field(THD * thd,COPY_INFO & info,TABLE_LIST * table_list,List<Item> & fields_vars,List<Item> & set_fields,List<Item> & set_values,READ_INFO & read_info,ulong skip_lines)1267 read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
1268                List<Item> &fields_vars, List<Item> &set_fields,
1269                List<Item> &set_values, READ_INFO &read_info,
1270                ulong skip_lines)
1271 {
1272   List_iterator_fast<Item> it(fields_vars);
1273   Item *item;
1274   TABLE *table= table_list->table;
1275   const CHARSET_INFO *cs= read_info.read_charset;
1276   DBUG_ENTER("read_xml_field");
1277 
1278   for ( ; ; it.rewind())
1279   {
1280     if (thd->killed)
1281     {
1282       thd->send_kill_message();
1283       DBUG_RETURN(1);
1284     }
1285 
1286     // read row tag and save values into tag list
1287     if (read_info.read_xml())
1288       break;
1289 
1290     List_iterator_fast<XML_TAG> xmlit(read_info.taglist);
1291     xmlit.rewind();
1292     XML_TAG *tag= NULL;
1293 
1294 #ifndef DBUG_OFF
1295     DBUG_PRINT("read_xml_field", ("skip_lines=%d", (int) skip_lines));
1296     while ((tag= xmlit++))
1297     {
1298       DBUG_PRINT("read_xml_field", ("got tag:%i '%s' '%s'",
1299                                     tag->level, tag->field.c_ptr(),
1300                                     tag->value.c_ptr()));
1301     }
1302 #endif
1303 
1304     restore_record(table, s->default_values);
1305     /*
1306       Check whether default values of the fields not specified in column list
1307       are correct or not.
1308     */
1309     if (validate_default_values_of_unset_fields(thd, table))
1310     {
1311       read_info.error= true;
1312       break;
1313     }
1314 
1315     while ((item= it++))
1316     {
1317       /* If this line is to be skipped we don't want to fill field or var */
1318       if (skip_lines)
1319         continue;
1320 
1321       /* find field in tag list */
1322       xmlit.rewind();
1323       tag= xmlit++;
1324 
1325       while(tag && strcmp(tag->field.c_ptr(), item->item_name.ptr()) != 0)
1326         tag= xmlit++;
1327 
1328       item= item->real_item();
1329 
1330       if (!tag) // found null
1331       {
1332         if (item->type() == Item::FIELD_ITEM)
1333         {
1334           Field *field= (static_cast<Item_field*>(item))->field;
1335           field->reset();
1336           field->set_null();
1337           if (field == table->next_number_field)
1338             table->auto_increment_field_not_null= TRUE;
1339           if (!field->maybe_null())
1340           {
1341             if (field->type() == FIELD_TYPE_TIMESTAMP)
1342               // Specific of TIMESTAMP NOT NULL: set to CURRENT_TIMESTAMP.
1343               Item_func_now_local::store_in(field);
1344             else if (field != table->next_number_field)
1345               field->set_warning(Sql_condition::SL_WARNING,
1346                                  ER_WARN_NULL_TO_NOTNULL, 1);
1347           }
1348         }
1349         else
1350         {
1351           DBUG_ASSERT(NULL != dynamic_cast<Item_user_var_as_out_param*>(item));
1352           ((Item_user_var_as_out_param *) item)->set_null_value(cs);
1353         }
1354         continue;
1355       }
1356 
1357       if (item->type() == Item::FIELD_ITEM)
1358       {
1359         Field *field= ((Item_field *)item)->field;
1360         field->set_notnull();
1361         if (field == table->next_number_field)
1362           table->auto_increment_field_not_null= TRUE;
1363         field->store((char *) tag->value.ptr(), tag->value.length(), cs);
1364       }
1365       else
1366       {
1367         DBUG_ASSERT(NULL != dynamic_cast<Item_user_var_as_out_param*>(item));
1368         ((Item_user_var_as_out_param *) item)->set_value(
1369                                                  (char *) tag->value.ptr(),
1370                                                  tag->value.length(), cs);
1371       }
1372     }
1373 
1374     if (read_info.error)
1375       break;
1376 
1377     if (skip_lines)
1378     {
1379       skip_lines--;
1380       continue;
1381     }
1382 
1383     if (item)
1384     {
1385       /* Have not read any field, thus input file is simply ended */
1386       if (item == fields_vars.head())
1387         break;
1388 
1389       for ( ; item; item= it++)
1390       {
1391         if (item->type() == Item::FIELD_ITEM)
1392         {
1393           /*
1394             QQ: We probably should not throw warning for each field.
1395             But how about intention to always have the same number
1396             of warnings in THD::cuted_fields (and get rid of cuted_fields
1397             in the end ?)
1398           */
1399           thd->cuted_fields++;
1400           push_warning_printf(thd, Sql_condition::SL_WARNING,
1401                               ER_WARN_TOO_FEW_RECORDS,
1402                               ER(ER_WARN_TOO_FEW_RECORDS),
1403                               thd->get_stmt_da()->current_row_for_condition());
1404         }
1405         else
1406         {
1407           DBUG_ASSERT(NULL != dynamic_cast<Item_user_var_as_out_param*>(item));
1408           ((Item_user_var_as_out_param *)item)->set_null_value(cs);
1409         }
1410       }
1411     }
1412 
1413     if (thd->killed ||
1414         fill_record_n_invoke_before_triggers(thd, &info, set_fields,
1415                                              set_values, table,
1416                                              TRG_EVENT_INSERT,
1417                                              table->s->fields))
1418       DBUG_RETURN(1);
1419 
1420     switch (table_list->view_check_option(thd)) {
1421     case VIEW_CHECK_SKIP:
1422       read_info.next_line();
1423       goto continue_loop;
1424     case VIEW_CHECK_ERROR:
1425       DBUG_RETURN(-1);
1426     }
1427 
1428     if (write_record(thd, table, &info, NULL))
1429       DBUG_RETURN(1);
1430 
1431     /*
1432       We don't need to reset auto-increment field since we are restoring
1433       its default value at the beginning of each loop iteration.
1434     */
1435     thd->get_stmt_da()->inc_current_row_for_condition();
1436     continue_loop:;
1437   }
1438   DBUG_RETURN(MY_TEST(read_info.error) || thd->is_error());
1439 } /* load xml end */
1440 
1441 
1442 /* Unescape all escape characters, mark \N as null */
1443 
1444 char
unescape(char chr)1445 READ_INFO::unescape(char chr)
1446 {
1447   /* keep this switch synchornous with the ESCAPE_CHARS macro */
1448   switch(chr) {
1449   case 'n': return '\n';
1450   case 't': return '\t';
1451   case 'r': return '\r';
1452   case 'b': return '\b';
1453   case '0': return 0;				// Ascii null
1454   case 'Z': return '\032';			// Win32 end of file
1455   case 'N': found_null=1;
1456 
1457     /* fall through */
1458   default:  return chr;
1459   }
1460 }
1461 
1462 
1463 /*
1464   Read a line using buffering
1465   If last line is empty (in line mode) then it isn't outputed
1466 */
1467 
1468 
READ_INFO(File file_par,uint tot_length,const CHARSET_INFO * cs,const String & field_term,const String & line_start,const String & line_term,const String & enclosed_par,int escape,bool get_it_from_net,bool is_fifo)1469 READ_INFO::READ_INFO(File file_par, uint tot_length, const CHARSET_INFO *cs,
1470                      const String &field_term,
1471                      const String &line_start,
1472                      const String &line_term,
1473                      const String &enclosed_par,
1474                      int escape, bool get_it_from_net, bool is_fifo)
1475   :file(file_par), buff_length(tot_length), escape_char(escape),
1476    found_end_of_line(false), eof(false), need_end_io_cache(false),
1477    error(false), line_cuted(false), found_null(false), read_charset(cs)
1478 {
1479   /*
1480     Field and line terminators must be interpreted as sequence of unsigned char.
1481     Otherwise, non-ascii terminators will be negative on some platforms,
1482     and positive on others (depending on the implementation of char).
1483   */
1484   field_term_ptr=
1485     static_cast<const uchar*>(static_cast<const void*>(field_term.ptr()));
1486   field_term_length= field_term.length();
1487   line_term_ptr=
1488     static_cast<const uchar*>(static_cast<const void*>(line_term.ptr()));
1489   line_term_length= line_term.length();
1490 
1491   level= 0; /* for load xml */
1492   if (line_start.length() == 0)
1493   {
1494     line_start_ptr=0;
1495     start_of_line= 0;
1496   }
1497   else
1498   {
1499     line_start_ptr= line_start.ptr();
1500     line_start_end=line_start_ptr+line_start.length();
1501     start_of_line= 1;
1502   }
1503   /* If field_terminator == line_terminator, don't use line_terminator */
1504   if (field_term_length == line_term_length &&
1505       !memcmp(field_term_ptr,line_term_ptr,field_term_length))
1506   {
1507     line_term_length=0;
1508     line_term_ptr= NULL;
1509   }
1510   enclosed_char= (enclosed_length=enclosed_par.length()) ?
1511     (uchar) enclosed_par[0] : INT_MAX;
1512   field_term_char= field_term_length ? field_term_ptr[0] : INT_MAX;
1513   line_term_char= line_term_length ? line_term_ptr[0] : INT_MAX;
1514 
1515 
1516   /* Set of a stack for unget if long terminators */
1517   size_t length= max<size_t>(cs->mbmaxlen, max(field_term_length, line_term_length)) + 1;
1518   set_if_bigger(length,line_start.length());
1519   stack=stack_pos=(int*) sql_alloc(sizeof(int)*length);
1520 
1521   if (!(buffer=(uchar*) my_malloc(key_memory_READ_INFO,
1522                                   buff_length+1, MYF(MY_WME))))
1523     error= true; /* purecov: inspected */
1524   else
1525   {
1526     end_of_buff=buffer+buff_length;
1527     if (init_io_cache(&cache,(get_it_from_net) ? -1 : file, 0,
1528 		      (get_it_from_net) ? READ_NET :
1529 		      (is_fifo ? READ_FIFO : READ_CACHE),0L,1,
1530 		      MYF(MY_WME)))
1531     {
1532       my_free(buffer); /* purecov: inspected */
1533       buffer= NULL;
1534       error= true;
1535     }
1536     else
1537     {
1538       /*
1539 	init_io_cache() will not initialize read_function member
1540 	if the cache is READ_NET. So we work around the problem with a
1541 	manual assignment
1542       */
1543       need_end_io_cache = 1;
1544 
1545 #ifndef EMBEDDED_LIBRARY
1546       if (get_it_from_net)
1547 	cache.read_function = _my_b_net_read;
1548 
1549       if (mysql_bin_log.is_open())
1550 	cache.pre_read = cache.pre_close =
1551 	  (IO_CACHE_CALLBACK) log_loaded_block;
1552 #endif
1553     }
1554   }
1555 }
1556 
1557 
~READ_INFO()1558 READ_INFO::~READ_INFO()
1559 {
1560   if (need_end_io_cache)
1561     ::end_io_cache(&cache);
1562 
1563   if (buffer != NULL)
1564     my_free(buffer);
1565   List_iterator<XML_TAG> xmlit(taglist);
1566   XML_TAG *t;
1567   while ((t= xmlit++))
1568     delete(t);
1569 }
1570 
1571 
1572 /**
1573   The logic here is similar with my_mbcharlen, except for GET and PUSH
1574 
1575   @param[in]  cs  charset info
1576   @param[in]  chr the first char of sequence
1577   @param[out] len the length of multi-byte char
1578 */
1579 #define GET_MBCHARLEN(cs, chr, len)                                           \
1580   do {                                                                        \
1581     len= my_mbcharlen((cs), (chr));                                           \
1582     if (len == 0 && my_mbmaxlenlen((cs)) == 2)                                \
1583     {                                                                         \
1584       int chr1= GET;                                                          \
1585       if (chr1 != my_b_EOF)                                                   \
1586       {                                                                       \
1587         len= my_mbcharlen_2((cs), (chr), chr1);                               \
1588         /* Character is gb18030 or invalid (len = 0) */                       \
1589         DBUG_ASSERT(len == 0 || len == 2 || len == 4);                        \
1590       }                                                                       \
1591       if (len != 0)                                                           \
1592         PUSH(chr1);                                                           \
1593     }                                                                         \
1594   } while (0)
1595 
1596 
terminator(const uchar * ptr,size_t length)1597 inline int READ_INFO::terminator(const uchar *ptr, size_t length)
1598 {
1599   int chr=0;					// Keep gcc happy
1600   size_t i;
1601   for (i=1 ; i < length ; i++)
1602   {
1603     chr= GET;
1604     if (chr != *++ptr)
1605     {
1606       break;
1607     }
1608   }
1609   if (i == length)
1610     return 1;
1611   PUSH(chr);
1612   while (i-- > 1)
1613     PUSH(*--ptr);
1614   return 0;
1615 }
1616 
1617 
read_field()1618 int READ_INFO::read_field()
1619 {
1620   int chr,found_enclosed_char;
1621   uchar *to,*new_buffer;
1622 
1623   found_null=0;
1624   if (found_end_of_line)
1625     return 1;					// One have to call next_line
1626 
1627   /* Skip until we find 'line_start' */
1628 
1629   if (start_of_line)
1630   {						// Skip until line_start
1631     start_of_line=0;
1632     if (find_start_of_fields())
1633       return 1;
1634   }
1635   if ((chr=GET) == my_b_EOF)
1636   {
1637     found_end_of_line=eof=1;
1638     return 1;
1639   }
1640   to=buffer;
1641   if (chr == enclosed_char)
1642   {
1643     found_enclosed_char=enclosed_char;
1644     *to++=(uchar) chr;				// If error
1645   }
1646   else
1647   {
1648     found_enclosed_char= INT_MAX;
1649     PUSH(chr);
1650   }
1651 
1652   for (;;)
1653   {
1654     bool escaped_mb= false;
1655     while ( to < end_of_buff)
1656     {
1657       chr = GET;
1658       if (chr == my_b_EOF)
1659 	goto found_eof;
1660       if (chr == escape_char)
1661       {
1662 	if ((chr=GET) == my_b_EOF)
1663 	{
1664 	  *to++= (uchar) escape_char;
1665 	  goto found_eof;
1666 	}
1667         /*
1668           When escape_char == enclosed_char, we treat it like we do for
1669           handling quotes in SQL parsing -- you can double-up the
1670           escape_char to include it literally, but it doesn't do escapes
1671           like \n. This allows: LOAD DATA ... ENCLOSED BY '"' ESCAPED BY '"'
1672           with data like: "fie""ld1", "field2"
1673          */
1674         if (escape_char != enclosed_char || chr == escape_char)
1675         {
1676           uint ml;
1677           GET_MBCHARLEN(read_charset, chr, ml);
1678           /*
1679             For escaped multibyte character, push back the first byte,
1680             and will handle it below.
1681             Because multibyte character's second byte is possible to be
1682             0x5C, per Query_result_export::send_data, both head byte and
1683             tail byte are escaped for such characters. So mark it if the
1684             head byte is escaped and will handle it below.
1685           */
1686           if (ml == 1)
1687             *to++= (uchar) unescape((char) chr);
1688           else
1689           {
1690             escaped_mb= true;
1691             PUSH(chr);
1692           }
1693           continue;
1694         }
1695         PUSH(chr);
1696         chr= escape_char;
1697       }
1698       if (chr == line_term_char && found_enclosed_char == INT_MAX)
1699       {
1700 	if (terminator(line_term_ptr,line_term_length))
1701 	{					// Maybe unexpected linefeed
1702 	  enclosed=0;
1703 	  found_end_of_line=1;
1704 	  row_start=buffer;
1705 	  row_end=  to;
1706 	  return 0;
1707 	}
1708       }
1709       if (chr == found_enclosed_char)
1710       {
1711 	if ((chr=GET) == found_enclosed_char)
1712 	{					// Remove dupplicated
1713 	  *to++ = (uchar) chr;
1714 	  continue;
1715 	}
1716 	// End of enclosed field if followed by field_term or line_term
1717 	if (chr == my_b_EOF ||
1718 	    (chr == line_term_char && terminator(line_term_ptr,
1719 						line_term_length)))
1720 	{					// Maybe unexpected linefeed
1721 	  enclosed=1;
1722 	  found_end_of_line=1;
1723 	  row_start=buffer+1;
1724 	  row_end=  to;
1725 	  return 0;
1726 	}
1727 	if (chr == field_term_char &&
1728 	    terminator(field_term_ptr,field_term_length))
1729 	{
1730 	  enclosed=1;
1731 	  row_start=buffer+1;
1732 	  row_end=  to;
1733 	  return 0;
1734 	}
1735 	/*
1736 	  The string didn't terminate yet.
1737 	  Store back next character for the loop
1738 	*/
1739 	PUSH(chr);
1740 	/* copy the found term character to 'to' */
1741 	chr= found_enclosed_char;
1742       }
1743       else if (chr == field_term_char && found_enclosed_char == INT_MAX)
1744       {
1745 	if (terminator(field_term_ptr,field_term_length))
1746 	{
1747 	  enclosed=0;
1748 	  row_start=buffer;
1749 	  row_end=  to;
1750 	  return 0;
1751 	}
1752       }
1753 
1754       uint ml;
1755       GET_MBCHARLEN(read_charset, chr, ml);
1756       if (ml == 0)
1757       {
1758         *to= '\0';
1759         my_error(ER_INVALID_CHARACTER_STRING, MYF(0),
1760                  read_charset->csname, buffer);
1761         error= true;
1762         return 1;
1763       }
1764 
1765 
1766       if (ml > 1 &&
1767           to + ml <= end_of_buff)
1768       {
1769         uchar* p= to;
1770         *to++ = chr;
1771 
1772         for (uint i= 1; i < ml; i++)
1773         {
1774           chr= GET;
1775           if (chr == my_b_EOF)
1776           {
1777             /*
1778              Need to back up the bytes already ready from illformed
1779              multi-byte char
1780             */
1781             to-= i;
1782             goto found_eof;
1783           }
1784           else if (chr == escape_char && escaped_mb)
1785           {
1786             // Unescape the second byte if it is escaped.
1787             chr= GET;
1788             chr= (uchar) unescape((char) chr);
1789           }
1790           *to++ = chr;
1791         }
1792         if (escaped_mb)
1793           escaped_mb= false;
1794         if (my_ismbchar(read_charset,
1795                         (const char *)p,
1796                         (const char *)to))
1797           continue;
1798         for (uint i= 0; i < ml; i++)
1799           PUSH(*--to);
1800         chr= GET;
1801       }
1802       else if (ml > 1)
1803       {
1804         // Buffer is too small, exit while loop, and reallocate.
1805         PUSH(chr);
1806         break;
1807       }
1808       *to++ = (uchar) chr;
1809     }
1810     /*
1811     ** We come here if buffer is too small. Enlarge it and continue
1812     */
1813     if (!(new_buffer=(uchar*) my_realloc(key_memory_READ_INFO,
1814                                          (char*) buffer,buff_length+1+IO_SIZE,
1815 					MYF(MY_WME))))
1816       return (error= true);
1817     to=new_buffer + (to-buffer);
1818     buffer=new_buffer;
1819     buff_length+=IO_SIZE;
1820     end_of_buff=buffer+buff_length;
1821   }
1822 
1823 found_eof:
1824   enclosed=0;
1825   found_end_of_line=eof=1;
1826   row_start=buffer;
1827   row_end=to;
1828   return 0;
1829 }
1830 
1831 /*
1832   Read a row with fixed length.
1833 
1834   NOTES
1835     The row may not be fixed size on disk if there are escape
1836     characters in the file.
1837 
1838   IMPLEMENTATION NOTE
1839     One can't use fixed length with multi-byte charset **
1840 
1841   RETURN
1842     0  ok
1843     1  error
1844 */
1845 
read_fixed_length()1846 int READ_INFO::read_fixed_length()
1847 {
1848   int chr;
1849   uchar *to;
1850   if (found_end_of_line)
1851     return 1;					// One have to call next_line
1852 
1853   if (start_of_line)
1854   {						// Skip until line_start
1855     start_of_line=0;
1856     if (find_start_of_fields())
1857       return 1;
1858   }
1859 
1860   to=row_start=buffer;
1861   while (to < end_of_buff)
1862   {
1863     if ((chr=GET) == my_b_EOF)
1864       goto found_eof;
1865     if (chr == escape_char)
1866     {
1867       if ((chr=GET) == my_b_EOF)
1868       {
1869 	*to++= (uchar) escape_char;
1870 	goto found_eof;
1871       }
1872       *to++ =(uchar) unescape((char) chr);
1873       continue;
1874     }
1875     if (chr == line_term_char)
1876     {
1877       if (terminator(line_term_ptr,line_term_length))
1878       {						// Maybe unexpected linefeed
1879 	found_end_of_line=1;
1880 	row_end=  to;
1881 	return 0;
1882       }
1883     }
1884     *to++ = (uchar) chr;
1885   }
1886   row_end=to;					// Found full line
1887   return 0;
1888 
1889 found_eof:
1890   found_end_of_line=eof=1;
1891   row_start=buffer;
1892   row_end=to;
1893   return to == buffer ? 1 : 0;
1894 }
1895 
1896 
next_line()1897 int READ_INFO::next_line()
1898 {
1899   line_cuted=0;
1900   start_of_line= line_start_ptr != 0;
1901   if (found_end_of_line || eof)
1902   {
1903     found_end_of_line=0;
1904     return eof;
1905   }
1906   found_end_of_line=0;
1907   if (!line_term_length)
1908     return 0;					// No lines
1909   for (;;)
1910   {
1911     int chr = GET;
1912     uint ml;
1913     if (chr == my_b_EOF)
1914     {
1915       eof= 1;
1916       return 1;
1917     }
1918    GET_MBCHARLEN(read_charset, chr, ml);
1919    if (ml > 1)
1920    {
1921        for (uint i=1;
1922             chr != my_b_EOF && i < ml;
1923             i++)
1924 	   chr = GET;
1925        if (chr == escape_char)
1926 	   continue;
1927    }
1928    if (chr == my_b_EOF)
1929    {
1930       eof=1;
1931       return 1;
1932     }
1933     if (chr == escape_char)
1934     {
1935       line_cuted=1;
1936       if (GET == my_b_EOF)
1937 	return 1;
1938       continue;
1939     }
1940     if (chr == line_term_char && terminator(line_term_ptr,line_term_length))
1941       return 0;
1942     line_cuted=1;
1943   }
1944 }
1945 
1946 
find_start_of_fields()1947 bool READ_INFO::find_start_of_fields()
1948 {
1949   int chr;
1950  try_again:
1951   do
1952   {
1953     if ((chr=GET) == my_b_EOF)
1954     {
1955       found_end_of_line=eof=1;
1956       return 1;
1957     }
1958   } while ((char) chr != line_start_ptr[0]);
1959   for (const char *ptr=line_start_ptr+1 ; ptr != line_start_end ; ptr++)
1960   {
1961     chr=GET;					// Eof will be checked later
1962     if ((char) chr != *ptr)
1963     {						// Can't be line_start
1964       PUSH(chr);
1965       while (--ptr != line_start_ptr)
1966       {						// Restart with next char
1967 	PUSH( *ptr);
1968       }
1969       goto try_again;
1970     }
1971   }
1972   return 0;
1973 }
1974 
1975 
1976 /*
1977   Clear taglist from tags with a specified level
1978 */
clear_level(int level_arg)1979 int READ_INFO::clear_level(int level_arg)
1980 {
1981   DBUG_ENTER("READ_INFO::read_xml clear_level");
1982   List_iterator<XML_TAG> xmlit(taglist);
1983   xmlit.rewind();
1984   XML_TAG *tag;
1985 
1986   while ((tag= xmlit++))
1987   {
1988      if(tag->level >= level_arg)
1989      {
1990        xmlit.remove();
1991        delete tag;
1992      }
1993   }
1994   DBUG_RETURN(0);
1995 }
1996 
1997 
1998 /*
1999   Convert an XML entity to Unicode value.
2000   Return -1 on error;
2001 */
2002 static int
my_xml_entity_to_char(const char * name,size_t length)2003 my_xml_entity_to_char(const char *name, size_t length)
2004 {
2005   if (length == 2)
2006   {
2007     if (!memcmp(name, "gt", length))
2008       return '>';
2009     if (!memcmp(name, "lt", length))
2010       return '<';
2011   }
2012   else if (length == 3)
2013   {
2014     if (!memcmp(name, "amp", length))
2015       return '&';
2016   }
2017   else if (length == 4)
2018   {
2019     if (!memcmp(name, "quot", length))
2020       return '"';
2021     if (!memcmp(name, "apos", length))
2022       return '\'';
2023   }
2024   return -1;
2025 }
2026 
2027 
2028 /**
2029   @brief Convert newline, linefeed, tab to space
2030 
2031   @param chr    character
2032 
2033   @details According to the "XML 1.0" standard,
2034            only space (#x20) characters, carriage returns,
2035            line feeds or tabs are considered as spaces.
2036            Convert all of them to space (#x20) for parsing simplicity.
2037 */
2038 static int
my_tospace(int chr)2039 my_tospace(int chr)
2040 {
2041   return (chr == '\t' || chr == '\r' || chr == '\n') ? ' ' : chr;
2042 }
2043 
2044 
2045 /*
2046   Read an xml value: handle multibyte and xml escape
2047 */
read_value(int delim,String * val)2048 int READ_INFO::read_value(int delim, String *val)
2049 {
2050   int chr;
2051   String tmp;
2052 
2053   for (chr= GET; my_tospace(chr) != delim && chr != my_b_EOF;)
2054   {
2055     uint ml;
2056     GET_MBCHARLEN(read_charset, chr, ml);
2057     if (ml == 0)
2058     {
2059       chr= my_b_EOF;
2060       val->length(0);
2061       return chr;
2062     }
2063 
2064     if (ml > 1)
2065     {
2066       DBUG_PRINT("read_xml",("multi byte"));
2067 
2068       for (uint i= 1; i < ml; i++)
2069       {
2070         val->append(chr);
2071         /*
2072           Don't use my_tospace() in the middle of a multi-byte character
2073           TODO: check that the multi-byte sequence is valid.
2074         */
2075         chr= GET;
2076         if (chr == my_b_EOF)
2077           return chr;
2078       }
2079     }
2080     if(chr == '&')
2081     {
2082       tmp.length(0);
2083       for (chr= my_tospace(GET) ; chr != ';' ; chr= my_tospace(GET))
2084       {
2085         if (chr == my_b_EOF)
2086           return chr;
2087         tmp.append(chr);
2088       }
2089       if ((chr= my_xml_entity_to_char(tmp.ptr(), tmp.length())) >= 0)
2090         val->append(chr);
2091       else
2092       {
2093         val->append('&');
2094         val->append(tmp);
2095         val->append(';');
2096       }
2097     }
2098     else
2099       val->append(chr);
2100     chr= GET;
2101   }
2102   return my_tospace(chr);
2103 }
2104 
2105 
2106 /*
2107   Read a record in xml format
2108   tags and attributes are stored in taglist
2109   when tag set in ROWS IDENTIFIED BY is closed, we are ready and return
2110 */
read_xml()2111 int READ_INFO::read_xml()
2112 {
2113   DBUG_ENTER("READ_INFO::read_xml");
2114   int chr, chr2, chr3;
2115   int delim= 0;
2116   String tag, attribute, value;
2117   bool in_tag= false;
2118 
2119   tag.length(0);
2120   attribute.length(0);
2121   value.length(0);
2122 
2123   for (chr= my_tospace(GET); chr != my_b_EOF ; )
2124   {
2125     switch(chr){
2126     case '<':  /* read tag */
2127         /* TODO: check if this is a comment <!-- comment -->  */
2128       chr= my_tospace(GET);
2129       if(chr == '!')
2130       {
2131         chr2= GET;
2132         chr3= GET;
2133 
2134         if(chr2 == '-' && chr3 == '-')
2135         {
2136           chr2= 0;
2137           chr3= 0;
2138           chr= my_tospace(GET);
2139 
2140           while(chr != '>' || chr2 != '-' || chr3 != '-')
2141           {
2142             if(chr == '-')
2143             {
2144               chr3= chr2;
2145               chr2= chr;
2146             }
2147             else if (chr2 == '-')
2148             {
2149               chr2= 0;
2150               chr3= 0;
2151             }
2152             chr= my_tospace(GET);
2153             if (chr == my_b_EOF)
2154               goto found_eof;
2155           }
2156           break;
2157         }
2158       }
2159 
2160       tag.length(0);
2161       while(chr != '>' && chr != ' ' && chr != '/' && chr != my_b_EOF)
2162       {
2163         if(chr != delim) /* fix for the '<field name =' format */
2164           tag.append(chr);
2165         chr= my_tospace(GET);
2166       }
2167 
2168       // row tag should be in ROWS IDENTIFIED BY '<row>' - stored in line_term
2169       if((tag.length() == line_term_length -2) &&
2170          (memcmp(tag.ptr(), line_term_ptr + 1, tag.length()) == 0))
2171       {
2172         DBUG_PRINT("read_xml", ("start-of-row: %i %s %s",
2173                                 level,tag.c_ptr_safe(), line_term_ptr));
2174       }
2175 
2176       if(chr == ' ' || chr == '>')
2177       {
2178         level++;
2179         clear_level(level + 1);
2180       }
2181 
2182       if (chr == ' ')
2183         in_tag= true;
2184       else
2185         in_tag= false;
2186       break;
2187 
2188     case ' ': /* read attribute */
2189       while(chr == ' ')  /* skip blanks */
2190         chr= my_tospace(GET);
2191 
2192       if(!in_tag)
2193         break;
2194 
2195       while(chr != '=' && chr != '/' && chr != '>' && chr != my_b_EOF)
2196       {
2197         attribute.append(chr);
2198         chr= my_tospace(GET);
2199       }
2200       break;
2201 
2202     case '>': /* end tag - read tag value */
2203       in_tag= false;
2204       /* Skip all whitespaces */
2205       while (' ' == (chr= my_tospace(GET)))
2206       {
2207       }
2208       /*
2209         Push the first non-whitespace char back to Stack. This char would be
2210         read in the upcoming call to read_value()
2211        */
2212       PUSH(chr);
2213       chr= read_value('<', &value);
2214       if(chr == my_b_EOF)
2215         goto found_eof;
2216 
2217       /* save value to list */
2218       if(tag.length() > 0 && value.length() > 0)
2219       {
2220         DBUG_PRINT("read_xml", ("lev:%i tag:%s val:%s",
2221                                 level,tag.c_ptr_safe(), value.c_ptr_safe()));
2222         taglist.push_front( new XML_TAG(level, tag, value));
2223       }
2224       tag.length(0);
2225       value.length(0);
2226       attribute.length(0);
2227       break;
2228 
2229     case '/': /* close tag */
2230       chr= my_tospace(GET);
2231       /* Decrease the 'level' only when (i) It's not an */
2232       /* (without space) empty tag i.e. <tag/> or, (ii) */
2233       /* It is of format <row col="val" .../>           */
2234       if(chr != '>' || in_tag)
2235       {
2236         level--;
2237         in_tag= false;
2238       }
2239       if(chr != '>')   /* if this is an empty tag <tag   /> */
2240         tag.length(0); /* we should keep tag value          */
2241       while(chr != '>' && chr != my_b_EOF)
2242       {
2243         tag.append(chr);
2244         chr= my_tospace(GET);
2245       }
2246 
2247       if((tag.length() == line_term_length -2) &&
2248          (memcmp(tag.ptr(), line_term_ptr + 1, tag.length()) == 0))
2249       {
2250          DBUG_PRINT("read_xml", ("found end-of-row %i %s",
2251                                  level, tag.c_ptr_safe()));
2252          DBUG_RETURN(0); //normal return
2253       }
2254       chr= my_tospace(GET);
2255       break;
2256 
2257     case '=': /* attribute name end - read the value */
2258       //check for tag field and attribute name
2259       if(!memcmp(tag.c_ptr_safe(), STRING_WITH_LEN("field")) &&
2260          !memcmp(attribute.c_ptr_safe(), STRING_WITH_LEN("name")))
2261       {
2262         /*
2263           this is format <field name="xx">xx</field>
2264           where actual fieldname is in attribute
2265         */
2266         delim= my_tospace(GET);
2267         tag.length(0);
2268         attribute.length(0);
2269         chr= '<'; /* we pretend that it is a tag */
2270         level--;
2271         break;
2272       }
2273 
2274       //check for " or '
2275       chr= GET;
2276       if (chr == my_b_EOF)
2277         goto found_eof;
2278       if(chr == '"' || chr == '\'')
2279       {
2280         delim= chr;
2281       }
2282       else
2283       {
2284         delim= ' '; /* no delimiter, use space */
2285         PUSH(chr);
2286       }
2287 
2288       chr= read_value(delim, &value);
2289       if(attribute.length() > 0 && value.length() > 0)
2290       {
2291         DBUG_PRINT("read_xml", ("lev:%i att:%s val:%s\n",
2292                                 level + 1,
2293                                 attribute.c_ptr_safe(),
2294                                 value.c_ptr_safe()));
2295         taglist.push_front(new XML_TAG(level + 1, attribute, value));
2296       }
2297       attribute.length(0);
2298       value.length(0);
2299       if (chr != ' ')
2300         chr= my_tospace(GET);
2301       break;
2302 
2303     default:
2304       chr= my_tospace(GET);
2305     } /* end switch */
2306   } /* end while */
2307 
2308 found_eof:
2309   DBUG_PRINT("read_xml",("Found eof"));
2310   eof= 1;
2311   DBUG_RETURN(1);
2312 }
2313