1 //
2 // CRF++ -- Yet Another CRF toolkit
3 //
4 // $Id: tagger.cpp 1601 2007-03-31 09:47:18Z taku $;
5 //
6 // Copyright(C) 2005-2007 Taku Kudo <taku@chasen.org>
7 //
8 #include <iostream>
9 #include <vector>
10 #include <iterator>
11 #include <cmath>
12 #include <string>
13 #include <sstream>
14 #include "stream_wrapper.h"
15 #include "common.h"
16 #include "tagger.h"
17
18 namespace {
19 const char kUnknownError[] = "Unknown Error";
20 const size_t kErrorBufferSize = 256;
21 } // namespace
22
23 #if defined(_WIN32) && !defined(__CYGWIN__)
24 namespace {
25 DWORD g_tls_index = TLS_OUT_OF_INDEXES;
26
getGlobalError()27 const char *getGlobalError() {
28 LPVOID data = ::TlsGetValue(g_tls_index);
29 return data == NULL ? kUnknownError : reinterpret_cast<const char *>(data);
30 }
31
setGlobalError(const char * str)32 void setGlobalError(const char *str) {
33 char *data = reinterpret_cast<char *>(::TlsGetValue(g_tls_index));
34 if (data == NULL) {
35 return;
36 }
37 strncpy(data, str, kErrorBufferSize - 1);
38 data[kErrorBufferSize - 1] = '\0';
39 }
40 } // namespace
41 HINSTANCE DllInstance = 0;
42
43 extern "C" {
DllMain(HINSTANCE hinst,DWORD dwReason,LPVOID)44 BOOL WINAPI DllMain(HINSTANCE hinst, DWORD dwReason, LPVOID) {
45 LPVOID data = 0;
46 if (!DllInstance) {
47 DllInstance = hinst;
48 }
49 switch (dwReason) {
50 case DLL_PROCESS_ATTACH:
51 if ((g_tls_index = ::TlsAlloc()) == TLS_OUT_OF_INDEXES) {
52 return FALSE;
53 }
54 // Not break in order to initialize the TLS.
55 case DLL_THREAD_ATTACH:
56 data = (LPVOID)::LocalAlloc(LPTR, kErrorBufferSize);
57 if (data) {
58 ::TlsSetValue(g_tls_index, data);
59 }
60 break;
61 case DLL_THREAD_DETACH:
62 data = ::TlsGetValue(g_tls_index);
63 if (data) {
64 ::LocalFree((HLOCAL)data);
65 }
66 break;
67 case DLL_PROCESS_DETACH:
68 data = ::TlsGetValue(g_tls_index);
69 if (data) {
70 ::LocalFree((HLOCAL)data);
71 }
72 ::TlsFree(g_tls_index);
73 g_tls_index = TLS_OUT_OF_INDEXES;
74 break;
75 default:
76 break;
77 }
78 return TRUE;
79 }
80 }
81 #else // _WIN32
82
83 namespace {
84 #ifdef HAVE_TLS_KEYWORD
85 __thread char kErrorBuffer[kErrorBufferSize];
86 #else
87 char kErrorBuffer[kErrorBufferSize];
88 #endif
89 }
90
91 namespace {
getGlobalError()92 const char *getGlobalError() {
93 return kErrorBuffer;
94 }
95
setGlobalError(const char * str)96 void setGlobalError(const char *str) {
97 strncpy(kErrorBuffer, str, kErrorBufferSize - 1);
98 kErrorBuffer[kErrorBufferSize - 1] = '\0';
99 }
100 } // namespace
101 #endif
102
103 namespace {
104 static const CRFPP::Option long_options[] = {
105 {"model", 'm', 0, "FILE", "set FILE for model file"},
106 {"nbest", 'n', "0", "INT", "output n-best results"},
107 {"verbose" , 'v', "0", "INT", "set INT for verbose level"},
108 {"cost-factor", 'c', "1.0", "FLOAT", "set cost factor"},
109 {"output", 'o', 0, "FILE", "use FILE as output file"},
110 {"version", 'v', 0, 0, "show the version and exit" },
111 {"help", 'h', 0, 0, "show this help and exit" },
112 {0, 0, 0, 0, 0}
113 };
114 } // namespace
115
116 namespace CRFPP {
117
createTagger() const118 Tagger *ModelImpl::createTagger() const {
119 if (!feature_index_.get()) {
120 return 0;
121 }
122 scoped_ptr<TaggerImpl> tagger(new TaggerImpl);
123 tagger->open(feature_index_.get(), nbest_, vlevel_);
124 return tagger.release();
125 }
126
open(FeatureIndex * feature_index,Allocator * allocator)127 bool TaggerImpl::open(FeatureIndex *feature_index,
128 Allocator *allocator) {
129 close();
130 mode_ = LEARN;
131 feature_index_ = feature_index;
132 allocator_ = allocator;
133 ysize_ = feature_index_->ysize();
134 return true;
135 }
136
open(FeatureIndex * feature_index,unsigned int nbest,unsigned int vlevel)137 bool TaggerImpl::open(FeatureIndex *feature_index,
138 unsigned int nbest,
139 unsigned int vlevel) {
140 close();
141 mode_ = TEST_SHARED;
142 feature_index_ = feature_index;
143 nbest_ = nbest;
144 vlevel_ = vlevel;
145 allocator_ = new Allocator;
146 ysize_ = feature_index_->ysize();
147 return true;
148 }
149
openFromArray(const Param & param,const char * buf,size_t size)150 bool ModelImpl::openFromArray(const Param ¶m,
151 const char *buf,
152 size_t size) {
153 nbest_ = param.get<int>("nbest");
154 vlevel_ = param.get<int>("verbose");
155 feature_index_.reset(new DecoderFeatureIndex);
156 if (!feature_index_->openFromArray(buf, size)) {
157 WHAT << feature_index_->what();
158 feature_index_.reset(0);
159 return false;
160 }
161 const double c = param.get<double>("cost-factor");
162 feature_index_->set_cost_factor(c);
163 return true;
164 }
165
open(const Param & param)166 bool ModelImpl::open(const Param ¶m) {
167 nbest_ = param.get<int>("nbest");
168 vlevel_ = param.get<int>("verbose");
169 const std::string model = param.get<std::string>("model");
170 feature_index_.reset(new DecoderFeatureIndex);
171 if (!feature_index_->open(model.c_str())) {
172 WHAT << feature_index_->what();
173 feature_index_.reset(0);
174 return false;
175 }
176 const double c = param.get<double>("cost-factor");
177 feature_index_->set_cost_factor(c);
178 return true;
179 }
180
open(int argc,char ** argv)181 bool ModelImpl::open(int argc, char** argv) {
182 Param param;
183 CHECK_FALSE(param.open(argc, argv, long_options))
184 << param.what();
185 return open(param);
186 }
187
open(const char * arg)188 bool ModelImpl::open(const char* arg) {
189 Param param;
190 CHECK_FALSE(param.open(arg, long_options)) << param.what();
191 return open(param);
192 }
193
openFromArray(int argc,char ** argv,const char * buf,size_t size)194 bool ModelImpl::openFromArray(int argc, char** argv,
195 const char *buf, size_t size) {
196 Param param;
197 CHECK_FALSE(param.open(argc, argv, long_options))
198 << param.what();
199 return openFromArray(param, buf, size);
200 }
201
openFromArray(const char * arg,const char * buf,size_t size)202 bool ModelImpl::openFromArray(const char* arg,
203 const char *buf, size_t size) {
204 Param param;
205 CHECK_FALSE(param.open(arg, long_options)) << param.what();
206 return openFromArray(param, buf, size);
207 }
208
getTemplate() const209 const char *ModelImpl::getTemplate() const {
210 if (feature_index_.get()) {
211 return feature_index_->getTemplate();
212 }
213 return 0;
214 }
215
open(const Param & param)216 bool TaggerImpl::open(const Param ¶m) {
217 close();
218
219 if (!param.help_version()) {
220 close();
221 return false;
222 }
223
224 nbest_ = param.get<int>("nbest");
225 vlevel_ = param.get<int>("verbose");
226
227 std::string model = param.get<std::string>("model");
228
229 DecoderFeatureIndex *decoder_feature_index = new DecoderFeatureIndex;
230 feature_index_ = decoder_feature_index;
231 allocator_ = new Allocator;
232
233 if (!decoder_feature_index->open(model.c_str())) {
234 WHAT << feature_index_->what();
235 close();
236 return false;
237 }
238
239 const double c = param.get<double>("cost-factor");
240
241 if (c <= 0.0) {
242 WHAT << "cost factor must be positive";
243 close();
244 return false;
245 }
246
247 feature_index_->set_cost_factor(c);
248 ysize_ = feature_index_->ysize();
249
250 return true;
251 }
252
open(int argc,char ** argv)253 bool TaggerImpl::open(int argc, char **argv) {
254 Param param;
255 CHECK_FALSE(param.open(argc, argv, long_options))
256 << param.what();
257 return open(param);
258 }
259
open(const char * arg)260 bool TaggerImpl::open(const char *arg) {
261 Param param;
262 CHECK_FALSE(param.open(arg, long_options)) << param.what();
263 return open(param);
264 }
265
close()266 void TaggerImpl::close() {
267 if (mode_ == TEST) {
268 delete feature_index_;
269 delete allocator_;
270 feature_index_ = 0;
271 allocator_ = 0;
272 } else if (mode_ == TEST_SHARED) {
273 delete allocator_;
274 allocator_ = 0;
275 }
276 }
277
set_model(const Model & model)278 bool TaggerImpl::set_model(const Model &model) {
279 if (mode_ == TEST) {
280 // feature_index_ => took the owner
281 // allocator_ => reuse
282 delete feature_index_;
283 } else if (mode_ == LEARN) {
284 // feature_index_ => did not take the owner
285 // allocator_ => did not take the owner.
286 allocator_ = new Allocator;
287 } else if (mode_ == TEST_SHARED) {
288 // feature_index_ => did not take the owner
289 // allocator_ => reuse
290 }
291 mode_ = TEST_SHARED;
292 const ModelImpl *model_impl = static_cast<const ModelImpl *>(&model);
293 feature_index_ = model_impl->feature_index();
294 nbest_ = model_impl->nbest();
295 vlevel_ = model_impl->vlevel();
296 ysize_ = feature_index_->ysize();
297 return true;
298 }
299
add2(size_t size,const char ** column,bool copy)300 bool TaggerImpl::add2(size_t size, const char **column, bool copy) {
301 const size_t xsize = feature_index_->xsize();
302
303 if ((mode_ == LEARN && size < xsize + 1) ||
304 ((mode_ == TEST || mode_ == TEST_SHARED) && size < xsize)) {
305 CHECK_FALSE(false) << "# x is small: size="
306 << size << " xsize=" << xsize;
307 }
308
309 size_t s = x_.size() + 1;
310 x_.resize(s);
311 node_.resize(s);
312 answer_.resize(s);
313 result_.resize(s);
314 s = x_.size() - 1;
315
316 if (copy) {
317 for (size_t k = 0; k < size; ++k) {
318 x_[s].push_back(allocator_->strdup(column[k]));
319 }
320 } else {
321 for (size_t k = 0; k < size; ++k) {
322 x_[s].push_back(column[k]);
323 }
324 }
325
326 result_[s] = answer_[s] = 0; // dummy
327 if (mode_ == LEARN) {
328 size_t r = ysize_;
329 for (size_t k = 0; k < ysize_; ++k) {
330 if (std::strcmp(yname(k), column[xsize]) == 0) {
331 r = k;
332 }
333 }
334
335 CHECK_FALSE(r != ysize_) << "cannot find answer: " << column[xsize];
336 answer_[s] = r;
337 }
338
339 node_[s].resize(ysize_);
340
341 return true;
342 }
343
add(size_t size,const char ** column)344 bool TaggerImpl::add(size_t size, const char **column) {
345 return add2(size, column, true);
346 }
347
add(const char * line)348 bool TaggerImpl::add(const char* line) {
349 char *p = allocator_->strdup(line);
350 scoped_fixed_array<const char *, 8192> column;
351 const size_t size = tokenize2(p, "\t ", column.get(), column.size());
352 if (!add2(size, column.get(), false)) {
353 return false;
354 }
355 return true;
356 }
357
read(std::istream * is)358 bool TaggerImpl::read(std::istream *is) {
359 scoped_fixed_array<char, 8192> line;
360 clear();
361
362 for (;;) {
363 if (!is->getline(line.get(), line.size())) {
364 is->clear(std::ios::eofbit|std::ios::badbit);
365 return true;
366 }
367 if (line[0] == '\0' || line[0] == ' ' || line[0] == '\t') {
368 break;
369 }
370 if (!add(line.get())) {
371 return false;
372 }
373 }
374
375 return true;
376 }
377
set_penalty(size_t i,size_t j,double penalty)378 void TaggerImpl::set_penalty(size_t i, size_t j, double penalty) {
379 if (penalty_.empty()) {
380 penalty_.resize(node_.size());
381 for (size_t s = 0; s < penalty_.size(); ++s) {
382 penalty_[s].resize(ysize_);
383 }
384 }
385 penalty_[i][j] = penalty;
386 }
387
penalty(size_t i,size_t j) const388 double TaggerImpl::penalty(size_t i, size_t j) const {
389 return penalty_.empty() ? 0.0 : penalty_[i][j];
390 }
391
shrink()392 bool TaggerImpl::shrink() {
393 CHECK_FALSE(feature_index_->buildFeatures(this))
394 << feature_index_->what();
395 std::vector<std::vector<const char *> >(x_).swap(x_);
396 std::vector<std::vector<Node *> >(node_).swap(node_);
397 std::vector<unsigned short int>(answer_).swap(answer_);
398 std::vector<unsigned short int>(result_).swap(result_);
399
400 return true;
401 }
402
initNbest()403 bool TaggerImpl::initNbest() {
404 if (!agenda_.get()) {
405 agenda_.reset(new std::priority_queue <QueueElement*,
406 std::vector<QueueElement *>, QueueElementComp>);
407 nbest_freelist_.reset(new FreeList <QueueElement>(128));
408 }
409
410 nbest_freelist_->free();
411 while (!agenda_->empty()) {
412 agenda_->pop(); // make empty
413 }
414
415 const size_t k = x_.size()-1;
416 for (size_t i = 0; i < ysize_; ++i) {
417 QueueElement *eos = nbest_freelist_->alloc();
418 eos->node = node_[k][i];
419 eos->fx = -node_[k][i]->bestCost;
420 eos->gx = -node_[k][i]->cost;
421 eos->next = 0;
422 agenda_->push(eos);
423 }
424
425 return true;
426 }
427
next()428 bool TaggerImpl::next() {
429 while (!agenda_->empty()) {
430 QueueElement *top = agenda_->top();
431 agenda_->pop();
432 Node *rnode = top->node;
433
434 if (rnode->x == 0) {
435 for (QueueElement *n = top; n; n = n->next) {
436 result_[n->node->x] = n->node->y;
437 }
438 cost_ = top->gx;
439 return true;
440 }
441
442 for (const_Path_iterator it = rnode->lpath.begin();
443 it != rnode->lpath.end(); ++it) {
444 QueueElement *n =nbest_freelist_->alloc();
445 n->node = (*it)->lnode;
446 n->gx = -(*it)->lnode->cost -(*it)->cost + top->gx;
447 n->fx = -(*it)->lnode->bestCost -(*it)->cost + top->gx;
448 // | h(x) | | g(x) |
449 n->next = top;
450 agenda_->push(n);
451 }
452 }
453
454 return 0;
455 }
456
eval()457 int TaggerImpl::eval() {
458 int err = 0;
459 for (size_t i = 0; i < x_.size(); ++i) {
460 if (answer_[i] != result_[i]) {
461 ++err;
462 }
463 }
464 return err;
465 }
466
clear()467 bool TaggerImpl::clear() {
468 if (mode_ == TEST || mode_ == TEST_SHARED) {
469 allocator_->clear();
470 }
471 x_.clear();
472 node_.clear();
473 answer_.clear();
474 result_.clear();
475 Z_ = cost_ = 0.0;
476 return true;
477 }
478
buildLattice()479 void TaggerImpl::buildLattice() {
480 if (x_.empty()) {
481 return;
482 }
483
484 feature_index_->rebuildFeatures(this);
485
486 for (size_t i = 0; i < x_.size(); ++i) {
487 for (size_t j = 0; j < ysize_; ++j) {
488 feature_index_->calcCost(node_[i][j]);
489 const std::vector<Path *> &lpath = node_[i][j]->lpath;
490 for (const_Path_iterator it = lpath.begin(); it != lpath.end(); ++it) {
491 feature_index_->calcCost(*it);
492 }
493 }
494 }
495
496 // Add penalty for Dual decomposition.
497 if (!penalty_.empty()) {
498 for (size_t i = 0; i < x_.size(); ++i) {
499 for (size_t j = 0; j < ysize_; ++j) {
500 node_[i][j]->cost += penalty_[i][j];
501 }
502 }
503 }
504 }
505
forwardbackward()506 void TaggerImpl::forwardbackward() {
507 if (x_.empty()) {
508 return;
509 }
510
511 for (int i = 0; i < static_cast<int>(x_.size()); ++i) {
512 for (size_t j = 0; j < ysize_; ++j) {
513 node_[i][j]->calcAlpha();
514 }
515 }
516
517 for (int i = static_cast<int>(x_.size() - 1); i >= 0; --i) {
518 for (size_t j = 0; j < ysize_; ++j) {
519 node_[i][j]->calcBeta();
520 }
521 }
522
523 Z_ = 0.0;
524 for (size_t j = 0; j < ysize_; ++j) {
525 Z_ = logsumexp(Z_, node_[0][j]->beta, j == 0);
526 }
527
528 return;
529 }
530
viterbi()531 void TaggerImpl::viterbi() {
532 for (size_t i = 0; i < x_.size(); ++i) {
533 for (size_t j = 0; j < ysize_; ++j) {
534 double bestc = -1e37;
535 Node *best = 0;
536 const std::vector<Path *> &lpath = node_[i][j]->lpath;
537 for (const_Path_iterator it = lpath.begin(); it != lpath.end(); ++it) {
538 double cost = (*it)->lnode->bestCost +(*it)->cost +
539 node_[i][j]->cost;
540 if (cost > bestc) {
541 bestc = cost;
542 best = (*it)->lnode;
543 }
544 }
545 node_[i][j]->prev = best;
546 node_[i][j]->bestCost = best ? bestc : node_[i][j]->cost;
547 }
548 }
549
550 double bestc = -1e37;
551 Node *best = 0;
552 size_t s = x_.size()-1;
553 for (size_t j = 0; j < ysize_; ++j) {
554 if (bestc < node_[s][j]->bestCost) {
555 best = node_[s][j];
556 bestc = node_[s][j]->bestCost;
557 }
558 }
559
560 for (Node *n = best; n; n = n->prev) {
561 result_[n->x] = n->y;
562 }
563
564 cost_ = -node_[x_.size()-1][result_[x_.size()-1]]->bestCost;
565 }
566
gradient(double * expected)567 double TaggerImpl::gradient(double *expected) {
568 if (x_.empty()) return 0.0;
569
570 buildLattice();
571 forwardbackward();
572 double s = 0.0;
573
574 for (size_t i = 0; i < x_.size(); ++i) {
575 for (size_t j = 0; j < ysize_; ++j) {
576 node_[i][j]->calcExpectation(expected, Z_, ysize_);
577 }
578 }
579
580 for (size_t i = 0; i < x_.size(); ++i) {
581 for (const int *f = node_[i][answer_[i]]->fvector; *f != -1; ++f) {
582 --expected[*f + answer_[i]];
583 }
584 s += node_[i][answer_[i]]->cost; // UNIGRAM cost
585 const std::vector<Path *> &lpath = node_[i][answer_[i]]->lpath;
586 for (const_Path_iterator it = lpath.begin(); it != lpath.end(); ++it) {
587 if ((*it)->lnode->y == answer_[(*it)->lnode->x]) {
588 for (const int *f = (*it)->fvector; *f != -1; ++f) {
589 --expected[*f +(*it)->lnode->y * ysize_ +(*it)->rnode->y];
590 }
591 s += (*it)->cost; // BIGRAM COST
592 break;
593 }
594 }
595 }
596
597 viterbi(); // call for eval()
598
599 return Z_ - s ;
600 }
601
collins(double * collins)602 double TaggerImpl::collins(double *collins) {
603 if (x_.empty()) {
604 return 0.0;
605 }
606
607 buildLattice();
608 viterbi(); // call for finding argmax y*
609 double s = 0.0;
610
611 // if correct parse, do not run forward + backward
612 {
613 size_t num = 0;
614 for (size_t i = 0; i < x_.size(); ++i) {
615 if (answer_[i] == result_[i]) {
616 ++num;
617 }
618 }
619
620 if (num == x_.size()) return 0.0;
621 }
622
623 for (size_t i = 0; i < x_.size(); ++i) {
624 // answer
625 {
626 s += node_[i][answer_[i]]->cost;
627 for (const int *f = node_[i][answer_[i]]->fvector; *f != -1; ++f) {
628 ++collins[*f + answer_[i]];
629 }
630
631 const std::vector<Path *> &lpath = node_[i][answer_[i]]->lpath;
632 for (const_Path_iterator it = lpath.begin(); it != lpath.end(); ++it) {
633 if ((*it)->lnode->y == answer_[(*it)->lnode->x]) {
634 for (const int *f = (*it)->fvector; *f != -1; ++f) {
635 ++collins[*f +(*it)->lnode->y * ysize_ +(*it)->rnode->y];
636 }
637 s += (*it)->cost;
638 break;
639 }
640 }
641 }
642
643 // result
644 {
645 s -= node_[i][result_[i]]->cost;
646 for (const int *f = node_[i][result_[i]]->fvector; *f != -1; ++f) {
647 --collins[*f + result_[i]];
648 }
649
650 const std::vector<Path *> &lpath = node_[i][result_[i]]->lpath;
651 for (const_Path_iterator it = lpath.begin(); it != lpath.end(); ++it) {
652 if ((*it)->lnode->y == result_[(*it)->lnode->x]) {
653 for (const int *f = (*it)->fvector; *f != -1; ++f) {
654 --collins[*f +(*it)->lnode->y * ysize_ +(*it)->rnode->y];
655 }
656 s -= (*it)->cost;
657 break;
658 }
659 }
660 }
661 }
662
663 return -s;
664 }
665
parse()666 bool TaggerImpl::parse() {
667 CHECK_FALSE(feature_index_->buildFeatures(this))
668 << feature_index_->what();
669
670 if (x_.empty()) {
671 return true;
672 }
673 buildLattice();
674 if (nbest_ || vlevel_ >= 1) {
675 forwardbackward();
676 }
677 viterbi();
678 if (nbest_) {
679 initNbest();
680 }
681
682 return true;
683 }
684
parse(const char * input)685 const char* TaggerImpl::parse(const char* input) {
686 return parse(input, std::strlen(input));
687 }
688
parse(const char * input,size_t length)689 const char* TaggerImpl::parse(const char* input, size_t length) {
690 std::istringstream is(std::string(input, length));
691 if (!read(&is) || !parse()) {
692 return 0;
693 }
694 toString();
695 return os_.c_str();
696 }
697
parse(const char * input,size_t len1,char * output,size_t len2)698 const char* TaggerImpl::parse(const char*input, size_t len1,
699 char *output, size_t len2) {
700 std::istringstream is(std::string(input, len1));
701 if (x_.empty()) {
702 return 0;
703 }
704 toString();
705 if ((os_.size() + 1) < len2) {
706 memcpy(output, os_.data(), os_.size());
707 output[os_.size()] = '\0';
708 return output;
709 } else {
710 return 0;
711 }
712 }
713
parse_stream(std::istream * is,std::ostream * os)714 bool TaggerImpl::parse_stream(std::istream *is,
715 std::ostream *os) {
716 if (!read(is) || !parse()) {
717 return false;
718 }
719 if (x_.empty()) {
720 return true;
721 }
722 toString();
723 os->write(os_.data(), os_.size());
724 return true;
725 }
726
toString(char * output,size_t len)727 const char* TaggerImpl::toString(char *output,
728 size_t len) {
729 const char* p = toString();
730 const size_t l = std::min(std::strlen(p), len);
731 std::strncpy(output, p, l);
732 return output;
733 }
734
toString()735 const char* TaggerImpl::toString() {
736 os_.assign("");
737
738 #define PRINT \
739 for (size_t i = 0; i < x_.size(); ++i) { \
740 for (std::vector<const char*>::iterator it = x_[i].begin(); \
741 it != x_[i].end(); ++it) \
742 os_ << *it << '\t'; \
743 os_ << yname(y(i)); \
744 if (vlevel_ >= 1) os_ << '/' << prob(i); \
745 if (vlevel_ >= 2) { \
746 for (size_t j = 0; j < ysize_; ++j) \
747 os_ << '\t' << yname(j) << '/' << prob(i, j); \
748 } \
749 os_ << '\n'; \
750 } \
751 os_ << '\n';
752
753 if (nbest_ >= 1) {
754 for (size_t n = 0; n < nbest_; ++n) {
755 if (!next()) {
756 break;
757 }
758 os_ << "# " << n << ' ' << prob() << '\n';
759 PRINT;
760 }
761 } else {
762 if (vlevel_ >= 1) {
763 os_ << "# " << prob() << '\n';
764 }
765 PRINT;
766 }
767
768 return const_cast<const char*>(os_.c_str());
769
770 #undef PRINT
771 }
772
createTagger(int argc,char ** argv)773 Tagger *createTagger(int argc, char **argv) {
774 TaggerImpl *tagger = new TaggerImpl();
775 if (!tagger->open(argc, argv)) {
776 setGlobalError(tagger->what());
777 delete tagger;
778 return 0;
779 }
780 return tagger;
781 }
782
createTagger(const char * argv)783 Tagger *createTagger(const char *argv) {
784 TaggerImpl *tagger = new TaggerImpl();
785 if (!tagger->open(argv)) {
786 setGlobalError(tagger->what());
787 delete tagger;
788 return 0;
789 }
790 return tagger;
791 }
792
createModel(int argc,char ** argv)793 Model *createModel(int argc, char **argv) {
794 ModelImpl *model = new ModelImpl();
795 if (!model->open(argc, argv)) {
796 setGlobalError(model->what());
797 delete model;
798 return 0;
799 }
800 return model;
801 }
802
createModelFromArray(int argc,char ** argv,const char * buf,size_t size)803 Model *createModelFromArray(int argc, char **argv,
804 const char *buf, size_t size) {
805 ModelImpl *model = new ModelImpl();
806 if (!model->openFromArray(argc, argv, buf, size)) {
807 setGlobalError(model->what());
808 delete model;
809 return 0;
810 }
811 return model;
812 }
813
createModel(const char * argv)814 Model *createModel(const char *argv) {
815 ModelImpl *model = new ModelImpl();
816 if (!model->open(argv)) {
817 setGlobalError(model->what());
818 delete model;
819 return 0;
820 }
821 return model;
822 }
823
createModelFromArray(const char * arg,const char * buf,size_t size)824 Model *createModelFromArray(const char *arg,
825 const char *buf, size_t size) {
826 ModelImpl *model = new ModelImpl();
827 if (!model->openFromArray(arg, buf, size)) {
828 setGlobalError(model->what());
829 delete model;
830 return 0;
831 }
832 return model;
833 }
834
getTaggerError()835 const char *getTaggerError() {
836 return getGlobalError();
837 }
838
getLastError()839 const char *getLastError() {
840 return getGlobalError();
841 }
842
843 namespace {
crfpp_test(const Param & param)844 int crfpp_test(const Param ¶m) {
845 if (param.get<bool>("version")) {
846 std::cout << param.version();
847 return -1;
848 }
849
850 if (param.get<bool>("help")) {
851 std::cout << param.help();
852 return -1;
853 }
854
855 CRFPP::TaggerImpl tagger;
856 if (!tagger.open(param)) {
857 std::cerr << tagger.what() << std::endl;
858 return -1;
859 }
860
861 std::string output = param.get<std::string>("output");
862 if (output.empty()) {
863 output = "-";
864 }
865
866 CRFPP::ostream_wrapper os(output.c_str());
867 if (!*os) {
868 std::cerr << "no such file or directory: " << output << std::endl;
869 return -1;
870 }
871
872 const std::vector<std::string>& rest_ = param.rest_args();
873 std::vector<std::string> rest = rest_; // trivial copy
874 if (rest.empty()) {
875 rest.push_back("-");
876 }
877
878 for (size_t i = 0; i < rest.size(); ++i) {
879 CRFPP::istream_wrapper is(rest[i].c_str());
880 if (!*is) {
881 std::cerr << "no such file or directory: " << rest[i] << std::endl;
882 return -1;
883 }
884 while (*is) {
885 tagger.parse_stream(is.get(), os.get());
886 }
887 }
888
889 return 0;
890 }
891 } // namepace
892 } // namespace CRFPP
893
crfpp_test(int argc,char ** argv)894 int crfpp_test(int argc, char **argv) {
895 CRFPP::Param param;
896 param.open(argc, argv, long_options);
897 return CRFPP::crfpp_test(param);
898 }
899
crfpp_test2(const char * arg)900 int crfpp_test2(const char *arg) {
901 CRFPP::Param param;
902 param.open(arg, long_options);
903 return CRFPP::crfpp_test(param);
904 }
905