1 /* Copyright (c) 2001, 2021, Oracle and/or its affiliates.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 /* Written by Sergei A. Golubchik, who has a shared copyright to this code
24    added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */
25 
26 #include "ftdefs.h"
27 #include <my_getopt.h>
28 
29 static void usage();
30 static void complain(int val);
31 static my_bool get_one_option(int, const struct my_option *, char *);
32 
33 static int count=0, stats=0, dump=0, lstats=0;
34 static my_bool verbose;
35 static char *query=NULL;
36 static uint lengths[256];
37 
38 #define MAX_LEN (HA_FT_MAXBYTELEN+10)
39 #define HOW_OFTEN_TO_WRITE 10000
40 
41 static struct my_option my_long_options[] =
42 {
43   {"help", 'h', "Display help and exit.",
44    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
45   {"help", '?', "Synonym for -h.",
46    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
47   {"count", 'c', "Calculate per-word stats (counts and global weights).",
48    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
49   {"dump", 'd', "Dump index (incl. data offsets and word weights).",
50    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
51   {"length", 'l', "Report length distribution.",
52    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
53   {"stats", 's', "Report global stats.",
54    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
55   {"verbose", 'v', "Be verbose.",
56    &verbose, &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
57   { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
58 };
59 
60 
keycache_thread_var()61 extern st_keycache_thread_var *keycache_thread_var()
62 {
63   return &main_thread_keycache_var;
64 }
65 
66 
main(int argc,char * argv[])67 int main(int argc,char *argv[])
68 {
69   int error=0, subkeys;
70   uint keylen, keylen2=0, inx, doc_cnt=0;
71   float weight= 1.0;
72   double gws, min_gws=0, avg_gws=0;
73   MI_INFO *info;
74   char buf[MAX_LEN], buf2[MAX_LEN], buf_maxlen[MAX_LEN], buf_min_gws[MAX_LEN];
75   ulong total=0, maxlen=0, uniq=0, max_doc_cnt=0;
76   struct { MI_INFO *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */
77 
78   MY_INIT(argv[0]);
79 
80   memset(&main_thread_keycache_var, 0, sizeof(st_keycache_thread_var));
81   mysql_cond_init(PSI_NOT_INSTRUMENTED,
82                   &main_thread_keycache_var.suspend);
83 
84   if ((error= handle_options(&argc, &argv, my_long_options, get_one_option)))
85     exit(error);
86   if (count || dump)
87     verbose=0;
88   if (!count && !dump && !lstats && !query)
89     stats=1;
90 
91   if (verbose)
92     setbuf(stdout,NULL);
93 
94   if (argc < 2)
95     usage();
96 
97   {
98     char *end;
99     inx= (uint) my_strtoll(argv[1], &end, 10);
100     if (*end)
101       usage();
102   }
103 
104   init_key_cache(dflt_key_cache,MI_KEY_BLOCK_LENGTH,USE_BUFFER_INIT, 0, 0);
105 
106   if (!(info=mi_open(argv[0], O_RDONLY,
107                      HA_OPEN_ABORT_IF_LOCKED|HA_OPEN_FROM_SQL_LAYER)))
108   {
109     error=my_errno();
110     goto err;
111   }
112 
113   *buf2=0;
114   aio->info=info;
115 
116   if ((inx >= info->s->base.keys) ||
117       !(info->s->keyinfo[inx].flag & HA_FULLTEXT))
118   {
119     printf("Key %d in table %s is not a FULLTEXT key\n", inx, info->filename);
120     goto err;
121   }
122 
123   mi_lock_database(info, F_EXTRA_LCK);
124 
125   info->lastpos= HA_OFFSET_ERROR;
126   info->update|= HA_STATE_PREV_FOUND;
127 
128   while (!(error=mi_rnext(info,NULL,inx)))
129   {
130     keylen=*(info->lastkey);
131 
132     subkeys=ft_sintXkorr(info->lastkey+keylen+1);
133     if (subkeys >= 0)
134       ft_floatXget(weight, info->lastkey+keylen+1);
135 
136     my_snprintf(buf,MAX_LEN,"%.*s",(int) keylen,info->lastkey+1);
137     my_casedn_str(default_charset_info,buf);
138     total++;
139     lengths[keylen]++;
140 
141     if (count || stats)
142     {
143       if (strcmp(buf, buf2))
144       {
145         if (*buf2)
146         {
147           uniq++;
148           avg_gws+=gws=GWS_IN_USE;
149           if (count)
150             printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
151           if (maxlen<keylen2)
152           {
153             maxlen=keylen2;
154             my_stpcpy(buf_maxlen, buf2);
155           }
156           if (max_doc_cnt < doc_cnt)
157           {
158             max_doc_cnt=doc_cnt;
159             my_stpcpy(buf_min_gws, buf2);
160             min_gws=gws;
161           }
162         }
163         my_stpcpy(buf2, buf);
164         keylen2=keylen;
165         doc_cnt=0;
166       }
167       doc_cnt+= (subkeys >= 0 ? 1 : -subkeys);
168     }
169     if (dump)
170     {
171       if (subkeys>=0)
172         printf("%9lx %20.7f %s\n", (long) info->lastpos,weight,buf);
173       else
174         printf("%9lx => %17d %s\n",(long) info->lastpos,-subkeys,buf);
175     }
176     if (verbose && (total%HOW_OFTEN_TO_WRITE)==0)
177       printf("%10ld\r",total);
178   }
179   mi_lock_database(info, F_UNLCK);
180 
181   if (count || stats)
182   {
183     if (*buf2)
184     {
185       uniq++;
186       avg_gws+=gws=GWS_IN_USE;
187       if (count)
188         printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
189       if (maxlen<keylen2)
190       {
191         maxlen=keylen2;
192         my_stpcpy(buf_maxlen, buf2);
193       }
194       if (max_doc_cnt < doc_cnt)
195       {
196         max_doc_cnt=doc_cnt;
197         my_stpcpy(buf_min_gws, buf2);
198         min_gws=gws;
199       }
200     }
201   }
202 
203   if (stats)
204   {
205     count=0;
206     for (inx=0;inx<256;inx++)
207     {
208       count+=lengths[inx];
209       if ((ulong) count >= total/2)
210         break;
211     }
212     printf("Total rows: %lu\nTotal words: %lu\n"
213            "Unique words: %lu\nLongest word: %lu chars (%s)\n"
214            "Median length: %u\n"
215            "Average global weight: %f\n"
216            "Most common word: %lu times, weight: %f (%s)\n",
217            (long) info->state->records, total, uniq, maxlen, buf_maxlen,
218            inx, avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws);
219   }
220   if (lstats)
221   {
222     count=0;
223     for (inx=0; inx<256; inx++)
224     {
225       count+=lengths[inx];
226       if (count && lengths[inx])
227         printf("%3u: %10lu %5.2f%% %20lu %4.1f%%\n", inx,
228                (ulong) lengths[inx],100.0*lengths[inx]/total,(ulong) count,
229                100.0*count/total);
230     }
231   }
232 
233 err:
234   if (error && error != HA_ERR_END_OF_FILE)
235     printf("got error %d\n",my_errno());
236   if (info)
237     mi_close(info);
238   mysql_cond_destroy(&main_thread_keycache_var.suspend);
239   return 0;
240 }
241 
242 
243 static my_bool
get_one_option(int optid,const struct my_option * opt MY_ATTRIBUTE ((unused)),char * argument MY_ATTRIBUTE ((unused)))244 get_one_option(int optid, const struct my_option *opt MY_ATTRIBUTE((unused)),
245 	       char *argument MY_ATTRIBUTE((unused)))
246 {
247   switch(optid) {
248   case 'd':
249     dump=1;
250     complain(count || query);
251     break;
252   case 's':
253     stats=1;
254     complain(query!=0);
255     break;
256   case 'c':
257     count= 1;
258     complain(dump || query);
259     break;
260   case 'l':
261     lstats=1;
262     complain(query!=0);
263     break;
264   case '?':
265   case 'h':
266     usage();
267   }
268   return 0;
269 }
270 
271 
usage()272 static void usage()
273 {
274   printf("Use: myisam_ftdump <table_name> <index_num>\n");
275   my_print_help(my_long_options);
276   my_print_variables(my_long_options);
277   exit(1);
278 }
279 
280 
complain(int val)281 static void complain(int val) /* Kinda assert :-)  */
282 {
283   if (val)
284   {
285     printf("You cannot use these options together!\n");
286     exit(1);
287   }
288 }
289 
290 #include "mi_extrafunc.h"
291