1 /* Copyright (c) 2001, 2021, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 /* Written by Sergei A. Golubchik, who has a shared copyright to this code
24 added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */
25
26 #include "ftdefs.h"
27 #include <my_getopt.h>
28
29 static void usage();
30 static void complain(int val);
31 static my_bool get_one_option(int, const struct my_option *, char *);
32
33 static int count=0, stats=0, dump=0, lstats=0;
34 static my_bool verbose;
35 static char *query=NULL;
36 static uint lengths[256];
37
38 #define MAX_LEN (HA_FT_MAXBYTELEN+10)
39 #define HOW_OFTEN_TO_WRITE 10000
40
41 static struct my_option my_long_options[] =
42 {
43 {"help", 'h', "Display help and exit.",
44 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
45 {"help", '?', "Synonym for -h.",
46 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
47 {"count", 'c', "Calculate per-word stats (counts and global weights).",
48 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
49 {"dump", 'd', "Dump index (incl. data offsets and word weights).",
50 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
51 {"length", 'l', "Report length distribution.",
52 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
53 {"stats", 's', "Report global stats.",
54 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
55 {"verbose", 'v', "Be verbose.",
56 &verbose, &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
57 { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
58 };
59
60
keycache_thread_var()61 extern st_keycache_thread_var *keycache_thread_var()
62 {
63 return &main_thread_keycache_var;
64 }
65
66
main(int argc,char * argv[])67 int main(int argc,char *argv[])
68 {
69 int error=0, subkeys;
70 uint keylen, keylen2=0, inx, doc_cnt=0;
71 float weight= 1.0;
72 double gws, min_gws=0, avg_gws=0;
73 MI_INFO *info;
74 char buf[MAX_LEN], buf2[MAX_LEN], buf_maxlen[MAX_LEN], buf_min_gws[MAX_LEN];
75 ulong total=0, maxlen=0, uniq=0, max_doc_cnt=0;
76 struct { MI_INFO *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */
77
78 MY_INIT(argv[0]);
79
80 memset(&main_thread_keycache_var, 0, sizeof(st_keycache_thread_var));
81 mysql_cond_init(PSI_NOT_INSTRUMENTED,
82 &main_thread_keycache_var.suspend);
83
84 if ((error= handle_options(&argc, &argv, my_long_options, get_one_option)))
85 exit(error);
86 if (count || dump)
87 verbose=0;
88 if (!count && !dump && !lstats && !query)
89 stats=1;
90
91 if (verbose)
92 setbuf(stdout,NULL);
93
94 if (argc < 2)
95 usage();
96
97 {
98 char *end;
99 inx= (uint) my_strtoll(argv[1], &end, 10);
100 if (*end)
101 usage();
102 }
103
104 init_key_cache(dflt_key_cache,MI_KEY_BLOCK_LENGTH,USE_BUFFER_INIT, 0, 0);
105
106 if (!(info=mi_open(argv[0], O_RDONLY,
107 HA_OPEN_ABORT_IF_LOCKED|HA_OPEN_FROM_SQL_LAYER)))
108 {
109 error=my_errno();
110 goto err;
111 }
112
113 *buf2=0;
114 aio->info=info;
115
116 if ((inx >= info->s->base.keys) ||
117 !(info->s->keyinfo[inx].flag & HA_FULLTEXT))
118 {
119 printf("Key %d in table %s is not a FULLTEXT key\n", inx, info->filename);
120 goto err;
121 }
122
123 mi_lock_database(info, F_EXTRA_LCK);
124
125 info->lastpos= HA_OFFSET_ERROR;
126 info->update|= HA_STATE_PREV_FOUND;
127
128 while (!(error=mi_rnext(info,NULL,inx)))
129 {
130 keylen=*(info->lastkey);
131
132 subkeys=ft_sintXkorr(info->lastkey+keylen+1);
133 if (subkeys >= 0)
134 ft_floatXget(weight, info->lastkey+keylen+1);
135
136 my_snprintf(buf,MAX_LEN,"%.*s",(int) keylen,info->lastkey+1);
137 my_casedn_str(default_charset_info,buf);
138 total++;
139 lengths[keylen]++;
140
141 if (count || stats)
142 {
143 if (strcmp(buf, buf2))
144 {
145 if (*buf2)
146 {
147 uniq++;
148 avg_gws+=gws=GWS_IN_USE;
149 if (count)
150 printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
151 if (maxlen<keylen2)
152 {
153 maxlen=keylen2;
154 my_stpcpy(buf_maxlen, buf2);
155 }
156 if (max_doc_cnt < doc_cnt)
157 {
158 max_doc_cnt=doc_cnt;
159 my_stpcpy(buf_min_gws, buf2);
160 min_gws=gws;
161 }
162 }
163 my_stpcpy(buf2, buf);
164 keylen2=keylen;
165 doc_cnt=0;
166 }
167 doc_cnt+= (subkeys >= 0 ? 1 : -subkeys);
168 }
169 if (dump)
170 {
171 if (subkeys>=0)
172 printf("%9lx %20.7f %s\n", (long) info->lastpos,weight,buf);
173 else
174 printf("%9lx => %17d %s\n",(long) info->lastpos,-subkeys,buf);
175 }
176 if (verbose && (total%HOW_OFTEN_TO_WRITE)==0)
177 printf("%10ld\r",total);
178 }
179 mi_lock_database(info, F_UNLCK);
180
181 if (count || stats)
182 {
183 if (*buf2)
184 {
185 uniq++;
186 avg_gws+=gws=GWS_IN_USE;
187 if (count)
188 printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
189 if (maxlen<keylen2)
190 {
191 maxlen=keylen2;
192 my_stpcpy(buf_maxlen, buf2);
193 }
194 if (max_doc_cnt < doc_cnt)
195 {
196 max_doc_cnt=doc_cnt;
197 my_stpcpy(buf_min_gws, buf2);
198 min_gws=gws;
199 }
200 }
201 }
202
203 if (stats)
204 {
205 count=0;
206 for (inx=0;inx<256;inx++)
207 {
208 count+=lengths[inx];
209 if ((ulong) count >= total/2)
210 break;
211 }
212 printf("Total rows: %lu\nTotal words: %lu\n"
213 "Unique words: %lu\nLongest word: %lu chars (%s)\n"
214 "Median length: %u\n"
215 "Average global weight: %f\n"
216 "Most common word: %lu times, weight: %f (%s)\n",
217 (long) info->state->records, total, uniq, maxlen, buf_maxlen,
218 inx, avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws);
219 }
220 if (lstats)
221 {
222 count=0;
223 for (inx=0; inx<256; inx++)
224 {
225 count+=lengths[inx];
226 if (count && lengths[inx])
227 printf("%3u: %10lu %5.2f%% %20lu %4.1f%%\n", inx,
228 (ulong) lengths[inx],100.0*lengths[inx]/total,(ulong) count,
229 100.0*count/total);
230 }
231 }
232
233 err:
234 if (error && error != HA_ERR_END_OF_FILE)
235 printf("got error %d\n",my_errno());
236 if (info)
237 mi_close(info);
238 mysql_cond_destroy(&main_thread_keycache_var.suspend);
239 return 0;
240 }
241
242
243 static my_bool
get_one_option(int optid,const struct my_option * opt MY_ATTRIBUTE ((unused)),char * argument MY_ATTRIBUTE ((unused)))244 get_one_option(int optid, const struct my_option *opt MY_ATTRIBUTE((unused)),
245 char *argument MY_ATTRIBUTE((unused)))
246 {
247 switch(optid) {
248 case 'd':
249 dump=1;
250 complain(count || query);
251 break;
252 case 's':
253 stats=1;
254 complain(query!=0);
255 break;
256 case 'c':
257 count= 1;
258 complain(dump || query);
259 break;
260 case 'l':
261 lstats=1;
262 complain(query!=0);
263 break;
264 case '?':
265 case 'h':
266 usage();
267 }
268 return 0;
269 }
270
271
usage()272 static void usage()
273 {
274 printf("Use: myisam_ftdump <table_name> <index_num>\n");
275 my_print_help(my_long_options);
276 my_print_variables(my_long_options);
277 exit(1);
278 }
279
280
complain(int val)281 static void complain(int val) /* Kinda assert :-) */
282 {
283 if (val)
284 {
285 printf("You cannot use these options together!\n");
286 exit(1);
287 }
288 }
289
290 #include "mi_extrafunc.h"
291