1 /*
2    Copyright (c) 2001, 2010, Oracle and/or its affiliates
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
16 
17 /* Written by Sergei A. Golubchik, who has a shared copyright to this code
18    added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */
19 
20 #include "ftdefs.h"
21 #include <my_getopt.h>
22 
23 static void usage();
24 static void complain(int val);
25 static my_bool get_one_option(int, const struct my_option *, char *);
26 
27 static int count=0, stats=0, dump=0, lstats=0;
28 static my_bool verbose;
29 static char *query=NULL;
30 static uint lengths[256];
31 
32 #define MAX_LEN (HA_FT_MAXBYTELEN+10)
33 #define HOW_OFTEN_TO_WRITE 10000
34 
35 static struct my_option my_long_options[] =
36 {
37   {"help", 'h', "Display help and exit.",
38    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
39   {"help", '?', "Synonym for -h.",
40    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
41   {"count", 'c', "Calculate per-word stats (counts and global weights).",
42    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
43   {"dump", 'd', "Dump index (incl. data offsets and word weights).",
44    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
45   {"length", 'l', "Report length distribution.",
46    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
47   {"stats", 's', "Report global stats.",
48    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
49   {"verbose", 'v', "Be verbose.",
50    &verbose, &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
51   { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
52 };
53 
54 
main(int argc,char * argv[])55 int main(int argc,char *argv[])
56 {
57   int error=0;
58   uint keylen, keylen2=0, inx, doc_cnt=0;
59   float weight= 1.0;
60   double gws, min_gws=0, avg_gws=0;
61   MI_INFO *info;
62   char buf[MAX_LEN], buf2[MAX_LEN], buf_maxlen[MAX_LEN], buf_min_gws[MAX_LEN];
63   ulong total=0, maxlen=0, uniq=0, max_doc_cnt=0;
64   struct { MI_INFO *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */
65 
66   MY_INIT(argv[0]);
67   if ((error= handle_options(&argc, &argv, my_long_options, get_one_option)))
68     exit(error);
69   if (count || dump)
70     verbose=0;
71   if (!count && !dump && !lstats && !query)
72     stats=1;
73 
74   if (verbose)
75     setbuf(stdout,NULL);
76 
77   if (argc < 2)
78     usage();
79 
80   {
81     char *end;
82     inx= (uint) strtoll(argv[1], &end, 10);
83     if (*end)
84       usage();
85   }
86 
87   init_key_cache(dflt_key_cache, MI_KEY_BLOCK_LENGTH, KEY_BUFFER_INIT, 0, 0, 0, 0);
88 
89   if (!(info=mi_open(argv[0], O_RDONLY,
90                      HA_OPEN_ABORT_IF_LOCKED|HA_OPEN_FROM_SQL_LAYER)))
91   {
92     error=my_errno;
93     goto err;
94   }
95 
96   *buf2=0;
97   aio->info=info;
98 
99   if ((inx >= info->s->base.keys) ||
100       !(info->s->keyinfo[inx].flag & HA_FULLTEXT))
101   {
102     printf("Key %d in table %s is not a FULLTEXT key\n", inx, info->filename);
103     goto err;
104   }
105 
106   mi_lock_database(info, F_EXTRA_LCK);
107 
108   info->lastpos= HA_OFFSET_ERROR;
109   info->update|= HA_STATE_PREV_FOUND;
110 
111   while (!(error=mi_rnext(info,NULL,inx)))
112   {
113     FT_WEIGTH subkeys;
114     keylen=*(info->lastkey);
115 
116     subkeys.i =ft_sintXkorr(info->lastkey+keylen+1);
117     if (subkeys.i >= 0)
118       weight= subkeys.f;
119 
120 #ifdef HAVE_SNPRINTF
121     snprintf(buf,MAX_LEN,"%.*s",(int) keylen,info->lastkey+1);
122 #else
123     sprintf(buf,"%.*s",(int) keylen,info->lastkey+1);
124 #endif
125     my_casedn_str(default_charset_info,buf);
126     total++;
127     lengths[keylen]++;
128 
129     if (count || stats)
130     {
131       if (strcmp(buf, buf2))
132       {
133         if (*buf2)
134         {
135           uniq++;
136           avg_gws+=gws=GWS_IN_USE;
137           if (count)
138             printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
139           if (maxlen<keylen2)
140           {
141             maxlen=keylen2;
142             strmov(buf_maxlen, buf2);
143           }
144           if (max_doc_cnt < doc_cnt)
145           {
146             max_doc_cnt=doc_cnt;
147             strmov(buf_min_gws, buf2);
148             min_gws=gws;
149           }
150         }
151         strmov(buf2, buf);
152         keylen2=keylen;
153         doc_cnt=0;
154       }
155       doc_cnt+= (subkeys.i >= 0 ? 1 : -subkeys.i);
156     }
157     if (dump)
158     {
159       if (subkeys.i >= 0)
160         printf("%9lx %20.7f %s\n", (long) info->lastpos,weight,buf);
161       else
162         printf("%9lx => %17d %s\n",(long) info->lastpos,-subkeys.i,buf);
163     }
164     if (verbose && (total%HOW_OFTEN_TO_WRITE)==0)
165       printf("%10ld\r",total);
166   }
167   mi_lock_database(info, F_UNLCK);
168 
169   if (count || stats)
170   {
171     if (*buf2)
172     {
173       uniq++;
174       avg_gws+=gws=GWS_IN_USE;
175       if (count)
176         printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
177       if (maxlen<keylen2)
178       {
179         maxlen=keylen2;
180         strmov(buf_maxlen, buf2);
181       }
182       if (max_doc_cnt < doc_cnt)
183       {
184         max_doc_cnt=doc_cnt;
185         strmov(buf_min_gws, buf2);
186         min_gws=gws;
187       }
188     }
189   }
190 
191   if (stats)
192   {
193     count=0;
194     for (inx=0;inx<256;inx++)
195     {
196       count+=lengths[inx];
197       if ((ulong) count >= total/2)
198         break;
199     }
200     printf("Total rows: %lu\nTotal words: %lu\n"
201            "Unique words: %lu\nLongest word: %lu chars (%s)\n"
202            "Median length: %u\n"
203            "Average global weight: %f\n"
204            "Most common word: %lu times, weight: %f (%s)\n",
205            (long) info->state->records, total, uniq, maxlen, buf_maxlen,
206            inx, avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws);
207   }
208   if (lstats)
209   {
210     count=0;
211     for (inx=0; inx<256; inx++)
212     {
213       count+=lengths[inx];
214       if (count && lengths[inx])
215         printf("%3u: %10lu %5.2f%% %20lu %4.1f%%\n", inx,
216                (ulong) lengths[inx],100.0*lengths[inx]/total,(ulong) count,
217                100.0*count/total);
218     }
219   }
220 
221 err:
222   if (error && error != HA_ERR_END_OF_FILE)
223     printf("got error %d\n",my_errno);
224   if (info)
225     mi_close(info);
226   return 0;
227 }
228 
229 
230 static my_bool
get_one_option(int optid,const struct my_option * opt,char * argument)231 get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
232 	       char *argument __attribute__((unused)))
233 {
234   switch(optid) {
235   case 'd':
236     dump=1;
237     complain(count || query);
238     break;
239   case 's':
240     stats=1;
241     complain(query!=0);
242     break;
243   case 'c':
244     count= 1;
245     complain(dump || query);
246     break;
247   case 'l':
248     lstats=1;
249     complain(query!=0);
250     break;
251   case '?':
252   case 'h':
253     usage();
254   }
255   return 0;
256 }
257 
258 
usage()259 static void usage()
260 {
261   printf("Use: myisam_ftdump <table_name> <index_num>\n");
262   my_print_help(my_long_options);
263   my_print_variables(my_long_options);
264   exit(1);
265 }
266 
267 
complain(int val)268 static void complain(int val) /* Kinda assert :-)  */
269 {
270   if (val)
271   {
272     printf("You cannot use these options together!\n");
273     exit(1);
274   }
275 }
276 
277 #include "mi_extrafunc.h"
278