1 /*
2 YamCha -- Yet Another Multipurpose CHunk Annotator
3
4 $Id: mkdarts.cpp,v 1.7 2004/03/12 17:12:13 taku-ku Exp $;
5
6 Copyright (C) 2000-2004 Taku Kudo <taku-ku@is.aist-nara.ac.jp>
7 This is free software with ABSOLUTELY NO WARRANTY.
8
9 This library is free software; you can redistribute it and/or
10 modify it under the terms of the GNU Lesser General Public
11 License as published by the Free Software Foundation; either
12 version 2.1 of the License, or (at your option) any later version.
13
14 This library is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 Lesser General Public License for more details.
18
19 You should have received a copy of the GNU Lesser General Public
20 License along with this library; if not, write to the Free Software
21 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
24 #include <cstdlib>
25 #include <cstdio>
26 #include <cstring>
27 #include <iostream>
28 #include <fstream>
29 #include <string>
30 #include <vector>
31 #include "darts.h"
32
33 using namespace std;
34
progress_bar(size_t current,size_t total)35 int progress_bar (size_t current, size_t total)
36 {
37 static char bar[] = "*******************************************";
38 static int scale = sizeof(bar) - 1;
39 static int prev = 0;
40
41 int cur_percentage = (int)(100.0 * current/total);
42 int bar_len = (int)(1.0 * current*scale/total);
43
44 if (prev != cur_percentage) {
45 printf("Making Double-Array: %3d%% |%.*s%*s| ", cur_percentage, bar_len, bar, scale - bar_len, "");
46 if (cur_percentage == 100) printf("\n");
47 else printf("\r");
48 fflush(stdout);
49 }
50
51 prev = cur_percentage;
52
53 return 1;
54 };
55
main(int argc,char ** argv)56 int main (int argc, char **argv)
57 {
58 using namespace std;
59
60 if (argc < 3) {
61 std::cerr << "Usage: " << argv[0] << " File Index" << std::endl;
62 return 0;
63 }
64
65 std::string file = argv[argc-2];
66 std::string index = argv[argc-1];
67 std::istream *is;
68
69 if (file == "-") is = &std::cin;
70 else is = new std::ifstream (file.c_str());
71
72 if (! *is) {
73 std::cerr << "Cannot open: " << file << std::endl;
74 return -1;
75 }
76
77 char buf[8192];
78 std::vector <char *> str;
79 std::vector <int> id;
80
81 while (is->getline (buf, 8192)) {
82 unsigned int p = 0;
83 while (isspace(buf[p])) { ++p; }
84 int i = std::atoi (&buf[p]);
85 while (! isspace(buf[p])) { ++p; }
86 while (isspace(buf[p])) { ++p; }
87 char *tmp = new char [strlen (buf+p) + 1];
88 strcpy (tmp, buf+p);
89 str.push_back (tmp);
90 id.push_back (i);
91 }
92
93 Darts::DoubleArray da;
94 if (da.build (str.size(), &str[0], 0, &id[0], &progress_bar) < 0) {
95 std::cerr << "FATAL: cannot build Double-Array" << std::endl;
96 return -1;
97 }
98
99 if (da.save (index.c_str()) == -1) {
100 std::cerr << "FATAL: cannot open: " << index << std::endl;
101 return -1;
102 }
103
104 for (unsigned int i = 0; i < str.size(); i++) delete [] str[i];
105 if (file != "-") delete is;
106
107 return 0;
108 }
109