1 /*****************************************************************************
2 
3 Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file include/row0ftsort.h
29 Create Full Text Index with (parallel) merge sort
30 
31 Created 10/13/2010 Jimmy Yang
32 *******************************************************/
33 
34 #ifndef row0ftsort_h
35 #define row0ftsort_h
36 
37 #include "univ.i"
38 #include "data0data.h"
39 #include "dict0types.h"
40 #include "row0mysql.h"
41 #include "fts0fts.h"
42 #include "fts0types.h"
43 #include "fts0priv.h"
44 #include "row0merge.h"
45 
46 /** This structure defineds information the scan thread will fetch
47 and put to the linked list for parallel tokenization/sort threads
48 to process */
49 typedef struct fts_doc_item     fts_doc_item_t;
50 
51 /** Information about temporary files used in merge sort */
52 struct fts_doc_item {
53 	dfield_t*	field;		/*!< field contains document string */
54 	doc_id_t	doc_id;		/*!< document ID */
55 	UT_LIST_NODE_T(fts_doc_item_t)	doc_list;
56 					/*!< list of doc items */
57 };
58 
59 /** This defines the list type that scan thread would feed the parallel
60 tokenization threads and sort threads. */
61 typedef UT_LIST_BASE_NODE_T(fts_doc_item_t)     fts_doc_list_t;
62 
63 #define FTS_NUM_AUX_INDEX	6
64 #define FTS_PLL_MERGE		1
65 
66 /** Sort information passed to each individual parallel sort thread */
67 struct fts_psort_t;
68 
69 /** Common info passed to each parallel sort thread */
70 struct fts_psort_common_t {
71 	row_merge_dup_t*	dup;		/*!< descriptor of FTS index */
72 	dict_table_t*		new_table;	/*!< source table */
73 	trx_t*			trx;		/*!< transaction */
74 	fts_psort_t*		all_info;	/*!< all parallel sort info */
75 	os_event_t		sort_event;	/*!< sort event */
76 	os_event_t		merge_event;	/*!< merge event */
77 	ibool			opt_doc_id_size;/*!< whether to use 4 bytes
78 						instead of 8 bytes integer to
79 						store Doc ID during sort, if
80 						Doc ID will not be big enough
81 						to use 8 bytes value */
82 };
83 
84 struct fts_psort_t {
85 	ulint			psort_id;	/*!< Parallel sort ID */
86 	row_merge_buf_t*	merge_buf[FTS_NUM_AUX_INDEX];
87 						/*!< sort buffer */
88 	merge_file_t*		merge_file[FTS_NUM_AUX_INDEX];
89 						/*!< sort file */
90 	row_merge_block_t*	merge_block[FTS_NUM_AUX_INDEX];
91 						/*!< buffer to write to file */
92 	row_merge_block_t*	block_alloc[FTS_NUM_AUX_INDEX];
93 						/*!< buffer to allocated */
94 	ulint			child_status;	/*!< child thread status */
95 	ulint			state;		/*!< parent thread state */
96 	fts_doc_list_t		fts_doc_list;	/*!< doc list to process */
97 	fts_psort_common_t*	psort_common;	/*!< ptr to all psort info */
98 	os_thread_t		thread_hdl;	/*!< thread handler */
99 	dberr_t			error;		/*!< db error during psort */
100 	ulint			memory_used;	/*!< memory used by fts_doc_list */
101 	ib_mutex_t		mutex;		/*!< mutex for fts_doc_list */
102 };
103 
104 /** Structure stores information from string tokenization operation */
105 struct fts_tokenize_ctx {
106 	ulint			processed_len;  /*!< processed string length */
107 	ulint			init_pos;       /*!< doc start position */
108 	ulint			buf_used;       /*!< the sort buffer (ID) when
109 						tokenization stops, which
110 						could due to sort buffer full */
111 	ulint			rows_added[FTS_NUM_AUX_INDEX];
112 						/*!< number of rows added for
113 						each FTS index partition */
114 	ib_rbt_t*		cached_stopword;/*!< in: stopword list */
115 	dfield_t		sort_field[FTS_NUM_FIELDS_SORT];
116 						/*!< in: sort field */
117 };
118 
119 typedef struct fts_tokenize_ctx fts_tokenize_ctx_t;
120 
121 /** Structure stores information needed for the insertion phase of FTS
122 parallel sort. */
123 struct fts_psort_insert {
124 	trx_t*		trx;		/*!< Transaction used for insertion */
125 	que_t**		ins_graph;	/*!< insert graph */
126 	fts_table_t	fts_table;	/*!< auxiliary table */
127 	CHARSET_INFO*	charset;	/*!< charset info */
128 	mem_heap_t*	heap;		/*!< heap */
129 	ibool		opt_doc_id_size;/*!< Whether to use smaller (4 bytes)
130 					integer for Doc ID */
131 };
132 
133 typedef struct fts_psort_insert	fts_psort_insert_t;
134 
135 
136 /** status bit used for communication between parent and child thread */
137 #define FTS_PARENT_COMPLETE	1
138 #define FTS_PARENT_EXITING	2
139 #define FTS_CHILD_COMPLETE	1
140 #define FTS_CHILD_EXITING	2
141 
142 /** Print some debug information */
143 #define	FTSORT_PRINT
144 
145 #ifdef	FTSORT_PRINT
146 #define	DEBUG_FTS_SORT_PRINT(str)		\
147 	do {					\
148 		ut_print_timestamp(stderr);	\
149 		fprintf(stderr, str);		\
150 	} while (0)
151 #else
152 #define DEBUG_FTS_SORT_PRINT(str)
153 #endif	/* FTSORT_PRINT */
154 
155 /*************************************************************//**
156 Create a temporary "fts sort index" used to merge sort the
157 tokenized doc string. The index has three "fields":
158 
159 1) Tokenized word,
160 2) Doc ID
161 3) Word's position in original 'doc'.
162 
163 @return dict_index_t structure for the fts sort index */
164 UNIV_INTERN
165 dict_index_t*
166 row_merge_create_fts_sort_index(
167 /*============================*/
168 	dict_index_t*		index,	/*!< in: Original FTS index
169 					based on which this sort index
170 					is created */
171 	const dict_table_t*	table,	/*!< in: table that FTS index
172 					is being created on */
173 	ibool*			opt_doc_id_size);
174 					/*!< out: whether to use 4 bytes
175 					instead of 8 bytes integer to
176 					store Doc ID during sort */
177 
178 /********************************************************************//**
179 Initialize FTS parallel sort structures.
180 @return TRUE if all successful */
181 UNIV_INTERN
182 ibool
183 row_fts_psort_info_init(
184 /*====================*/
185 	trx_t*			trx,	/*!< in: transaction */
186 	row_merge_dup_t*	dup,	/*!< in,own: descriptor of
187 					FTS index being created */
188 	const dict_table_t*	new_table,/*!< in: table where indexes are
189 					created */
190 	ibool			opt_doc_id_size,
191 					/*!< in: whether to use 4 bytes
192 					instead of 8 bytes integer to
193 					store Doc ID during sort */
194 	fts_psort_t**		psort,	/*!< out: parallel sort info to be
195 					instantiated */
196 	fts_psort_t**		merge)	/*!< out: parallel merge info
197 					to be instantiated */
198 	MY_ATTRIBUTE((nonnull));
199 /********************************************************************//**
200 Clean up and deallocate FTS parallel sort structures, and close
201 temparary merge sort files */
202 UNIV_INTERN
203 void
204 row_fts_psort_info_destroy(
205 /*=======================*/
206 	fts_psort_t*	psort_info,	/*!< parallel sort info */
207 	fts_psort_t*	merge_info);	/*!< parallel merge info */
208 /********************************************************************//**
209 Free up merge buffers when merge sort is done */
210 UNIV_INTERN
211 void
212 row_fts_free_pll_merge_buf(
213 /*=======================*/
214 	fts_psort_t*	psort_info);	/*!< in: parallel sort info */
215 
216 /*********************************************************************//**
217 Function performs parallel tokenization of the incoming doc strings.
218 @return OS_THREAD_DUMMY_RETURN */
219 UNIV_INTERN
220 os_thread_ret_t
221 fts_parallel_tokenization(
222 /*======================*/
223 	void*		arg);		/*!< in: psort_info for the thread */
224 /*********************************************************************//**
225 Start the parallel tokenization and parallel merge sort */
226 UNIV_INTERN
227 void
228 row_fts_start_psort(
229 /*================*/
230 	fts_psort_t*	psort_info);	/*!< in: parallel sort info */
231 /*********************************************************************//**
232 Function performs the merge and insertion of the sorted records.
233 @return OS_THREAD_DUMMY_RETURN */
234 UNIV_INTERN
235 os_thread_ret_t
236 fts_parallel_merge(
237 /*===============*/
238 	void*		arg);		/*!< in: parallel merge info */
239 /*********************************************************************//**
240 Kick off the parallel merge and insert thread */
241 UNIV_INTERN
242 void
243 row_fts_start_parallel_merge(
244 /*=========================*/
245 	fts_psort_t*	merge_info);	/*!< in: parallel sort info */
246 /********************************************************************//**
247 Read sorted FTS data files and insert data tuples to auxillary tables.
248 @return DB_SUCCESS or error number */
249 UNIV_INTERN
250 void
251 row_fts_insert_tuple(
252 /*=================*/
253 	fts_psort_insert_t*
254 			ins_ctx,        /*!< in: insert context */
255 	fts_tokenizer_word_t* word,	/*!< in: last processed
256 					tokenized word */
257 	ib_vector_t*	positions,	/*!< in: word position */
258 	doc_id_t*	in_doc_id,	/*!< in: last item doc id */
259 	dtuple_t*	dtuple);	/*!< in: entry to insert */
260 /********************************************************************//**
261 Propagate a newly added record up one level in the selection tree
262 @return parent where this value propagated to */
263 UNIV_INTERN
264 int
265 row_merge_fts_sel_propagate(
266 /*========================*/
267 	int		propogated,	/*<! in: tree node propagated */
268 	int*		sel_tree,	/*<! in: selection tree */
269 	ulint		level,		/*<! in: selection tree level */
270 	const mrec_t**	 mrec,		/*<! in: sort record */
271 	ulint**		offsets,	/*<! in: record offsets */
272 	dict_index_t*	index);		/*<! in: FTS index */
273 /********************************************************************//**
274 Read sorted file containing index data tuples and insert these data
275 tuples to the index
276 @return DB_SUCCESS or error number */
277 UNIV_INTERN
278 dberr_t
279 row_fts_merge_insert(
280 /*=================*/
281 	dict_index_t*	index,		/*!< in: index */
282 	dict_table_t*	table,		/*!< in: new table */
283 	fts_psort_t*	psort_info,	/*!< parallel sort info */
284 	ulint		id)		/* !< in: which auxiliary table's data
285 					to insert to */
286 	MY_ATTRIBUTE((nonnull));
287 #endif /* row0ftsort_h */
288