1 /*
2   YamCha -- Yet Another Multipurpose CHunk Annotator
3 
4   $Id: mkdarts.cpp,v 1.7 2004/03/12 17:12:13 taku-ku Exp $;
5 
6   Copyright (C) 2000-2004 Taku Kudo <taku-ku@is.aist-nara.ac.jp>
7   This is free software with ABSOLUTELY NO WARRANTY.
8 
9   This library is free software; you can redistribute it and/or
10   modify it under the terms of the GNU Lesser General Public
11   License as published by the Free Software Foundation; either
12   version 2.1 of the License, or (at your option) any later version.
13 
14   This library is distributed in the hope that it will be useful,
15   but WITHOUT ANY WARRANTY; without even the implied warranty of
16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17   Lesser General Public License for more details.
18 
19   You should have received a copy of the GNU Lesser General Public
20   License along with this library; if not, write to the Free Software
21   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22 */
23 
24 #include <cstdlib>
25 #include <cstdio>
26 #include <cstring>
27 #include <iostream>
28 #include <fstream>
29 #include <string>
30 #include <vector>
31 #include "darts.h"
32 
33 using namespace std;
34 
progress_bar(size_t current,size_t total)35 int progress_bar (size_t current, size_t total)
36 {
37   static char bar[] = "*******************************************";
38   static int scale = sizeof(bar) - 1;
39   static int prev = 0;
40 
41   int cur_percentage  = (int)(100.0 * current/total);
42   int bar_len         = (int)(1.0   * current*scale/total);
43 
44   if (prev != cur_percentage) {
45     printf("Making Double-Array: %3d%% |%.*s%*s| ", cur_percentage, bar_len, bar, scale - bar_len, "");
46     if (cur_percentage == 100)  printf("\n");
47     else                        printf("\r");
48     fflush(stdout);
49   }
50 
51   prev = cur_percentage;
52 
53   return 1;
54 };
55 
main(int argc,char ** argv)56 int main (int argc, char **argv)
57 {
58   using namespace std;
59 
60   if (argc < 3) {
61     std::cerr << "Usage: " << argv[0] << " File Index" << std::endl;
62     return 0;
63   }
64 
65   std::string file  = argv[argc-2];
66   std::string index = argv[argc-1];
67   std::istream *is;
68 
69   if (file == "-") is = &std::cin;
70   else             is = new std::ifstream (file.c_str());
71 
72   if (! *is) {
73     std::cerr << "Cannot open: " << file << std::endl;
74     return -1;
75   }
76 
77   char buf[8192];
78   std::vector <char *> str;
79   std::vector <int> id;
80 
81   while (is->getline (buf, 8192)) {
82      unsigned int p = 0;
83      while (isspace(buf[p])) { ++p; }
84      int i = std::atoi (&buf[p]);
85      while (! isspace(buf[p])) { ++p; }
86      while (isspace(buf[p])) { ++p; }
87      char *tmp = new char [strlen (buf+p) + 1];
88      strcpy (tmp, buf+p);
89      str.push_back (tmp);
90      id.push_back (i);
91   }
92 
93   Darts::DoubleArray da;
94   if (da.build (str.size(), &str[0], 0, &id[0], &progress_bar) < 0) {
95     std::cerr << "FATAL: cannot build Double-Array" << std::endl;
96     return -1;
97   }
98 
99   if (da.save  (index.c_str()) == -1) {
100     std::cerr << "FATAL: cannot open: " << index << std::endl;
101     return -1;
102   }
103 
104   for (unsigned int i = 0; i < str.size(); i++) delete [] str[i];
105   if (file != "-") delete is;
106 
107   return 0;
108 }
109