1 //
2 //  CRF++ -- Yet Another CRF toolkit
3 //
4 //  $Id: tagger.cpp 1601 2007-03-31 09:47:18Z taku $;
5 //
6 //  Copyright(C) 2005-2007 Taku Kudo <taku@chasen.org>
7 //
8 #include <iostream>
9 #include <vector>
10 #include <iterator>
11 #include <cmath>
12 #include <string>
13 #include <sstream>
14 #include "stream_wrapper.h"
15 #include "common.h"
16 #include "tagger.h"
17 
18 namespace {
19 const char kUnknownError[] = "Unknown Error";
20 const size_t kErrorBufferSize = 256;
21 }  // namespace
22 
23 #if defined(_WIN32) && !defined(__CYGWIN__)
24 namespace {
25 DWORD g_tls_index = TLS_OUT_OF_INDEXES;
26 
getGlobalError()27 const char *getGlobalError() {
28   LPVOID data = ::TlsGetValue(g_tls_index);
29   return data == NULL ? kUnknownError : reinterpret_cast<const char *>(data);
30 }
31 
setGlobalError(const char * str)32 void setGlobalError(const char *str) {
33   char *data = reinterpret_cast<char *>(::TlsGetValue(g_tls_index));
34   if (data == NULL) {
35     return;
36   }
37   strncpy(data, str, kErrorBufferSize - 1);
38   data[kErrorBufferSize - 1] = '\0';
39 }
40 }  // namespace
41 HINSTANCE DllInstance = 0;
42 
43 extern "C" {
DllMain(HINSTANCE hinst,DWORD dwReason,LPVOID)44   BOOL WINAPI DllMain(HINSTANCE hinst, DWORD dwReason, LPVOID) {
45     LPVOID data = 0;
46     if (!DllInstance) {
47       DllInstance = hinst;
48     }
49     switch (dwReason) {
50       case DLL_PROCESS_ATTACH:
51         if ((g_tls_index = ::TlsAlloc()) == TLS_OUT_OF_INDEXES) {
52           return FALSE;
53         }
54         // Not break in order to initialize the TLS.
55       case DLL_THREAD_ATTACH:
56         data = (LPVOID)::LocalAlloc(LPTR, kErrorBufferSize);
57         if (data) {
58           ::TlsSetValue(g_tls_index, data);
59         }
60         break;
61       case DLL_THREAD_DETACH:
62         data = ::TlsGetValue(g_tls_index);
63         if (data) {
64           ::LocalFree((HLOCAL)data);
65         }
66         break;
67       case DLL_PROCESS_DETACH:
68         data = ::TlsGetValue(g_tls_index);
69         if (data) {
70           ::LocalFree((HLOCAL)data);
71         }
72         ::TlsFree(g_tls_index);
73         g_tls_index = TLS_OUT_OF_INDEXES;
74         break;
75       default:
76         break;
77     }
78     return TRUE;
79   }
80 }
81 #else  // _WIN32
82 
83 namespace {
84 #ifdef HAVE_TLS_KEYWORD
85 __thread char kErrorBuffer[kErrorBufferSize];
86 #else
87 char kErrorBuffer[kErrorBufferSize];
88 #endif
89 }
90 
91 namespace {
getGlobalError()92 const char *getGlobalError() {
93   return kErrorBuffer;
94 }
95 
setGlobalError(const char * str)96 void setGlobalError(const char *str) {
97   strncpy(kErrorBuffer, str, kErrorBufferSize - 1);
98   kErrorBuffer[kErrorBufferSize - 1] = '\0';
99 }
100 }  // namespace
101 #endif
102 
103 namespace {
104 static const CRFPP::Option long_options[] = {
105   {"model",  'm',  0,       "FILE",  "set FILE for model file"},
106   {"nbest",  'n', "0",      "INT",   "output n-best results"},
107   {"verbose" , 'v', "0",    "INT",   "set INT for verbose level"},
108   {"cost-factor", 'c', "1.0", "FLOAT", "set cost factor"},
109   {"output",         'o',  0,       "FILE",  "use FILE as output file"},
110   {"version",        'v',  0,        0,       "show the version and exit" },
111   {"help",   'h',  0,        0,       "show this help and exit" },
112   {0, 0, 0, 0, 0}
113 };
114 }  // namespace
115 
116 namespace CRFPP {
117 
createTagger() const118 Tagger *ModelImpl::createTagger() const {
119   if (!feature_index_.get()) {
120     return 0;
121   }
122   scoped_ptr<TaggerImpl> tagger(new TaggerImpl);
123   tagger->open(feature_index_.get(), nbest_, vlevel_);
124   return tagger.release();
125 }
126 
open(FeatureIndex * feature_index,Allocator * allocator)127 bool TaggerImpl::open(FeatureIndex *feature_index,
128                       Allocator *allocator) {
129   close();
130   mode_ = LEARN;
131   feature_index_ = feature_index;
132   allocator_ = allocator;
133   ysize_ = feature_index_->ysize();
134   return true;
135 }
136 
open(FeatureIndex * feature_index,unsigned int nbest,unsigned int vlevel)137 bool TaggerImpl::open(FeatureIndex *feature_index,
138                       unsigned int nbest,
139                       unsigned int vlevel) {
140   close();
141   mode_ = TEST_SHARED;
142   feature_index_ = feature_index;
143   nbest_ = nbest;
144   vlevel_ = vlevel;
145   allocator_ = new Allocator;
146   ysize_ = feature_index_->ysize();
147   return true;
148 }
149 
openFromArray(const Param & param,const char * buf,size_t size)150 bool ModelImpl::openFromArray(const Param &param,
151                               const char *buf,
152                               size_t size) {
153   nbest_ = param.get<int>("nbest");
154   vlevel_ = param.get<int>("verbose");
155   feature_index_.reset(new DecoderFeatureIndex);
156   if (!feature_index_->openFromArray(buf, size)) {
157     WHAT << feature_index_->what();
158     feature_index_.reset(0);
159     return false;
160   }
161   const double c = param.get<double>("cost-factor");
162   feature_index_->set_cost_factor(c);
163   return true;
164 }
165 
open(const Param & param)166 bool ModelImpl::open(const Param &param) {
167   nbest_ = param.get<int>("nbest");
168   vlevel_ = param.get<int>("verbose");
169   const std::string model = param.get<std::string>("model");
170   feature_index_.reset(new DecoderFeatureIndex);
171   if (!feature_index_->open(model.c_str())) {
172     WHAT << feature_index_->what();
173     feature_index_.reset(0);
174     return false;
175   }
176   const double c = param.get<double>("cost-factor");
177   feature_index_->set_cost_factor(c);
178   return true;
179 }
180 
open(int argc,char ** argv)181 bool ModelImpl::open(int argc,  char** argv) {
182   Param param;
183   CHECK_FALSE(param.open(argc, argv, long_options))
184       << param.what();
185   return open(param);
186 }
187 
open(const char * arg)188 bool ModelImpl::open(const char* arg) {
189   Param param;
190   CHECK_FALSE(param.open(arg, long_options)) << param.what();
191   return open(param);
192 }
193 
openFromArray(int argc,char ** argv,const char * buf,size_t size)194 bool ModelImpl::openFromArray(int argc,  char** argv,
195                               const char *buf, size_t size) {
196   Param param;
197   CHECK_FALSE(param.open(argc, argv, long_options))
198       << param.what();
199   return openFromArray(param, buf, size);
200 }
201 
openFromArray(const char * arg,const char * buf,size_t size)202 bool ModelImpl::openFromArray(const char* arg,
203                               const char *buf, size_t size) {
204   Param param;
205   CHECK_FALSE(param.open(arg, long_options)) << param.what();
206   return openFromArray(param, buf, size);
207 }
208 
getTemplate() const209 const char *ModelImpl::getTemplate() const {
210   if (feature_index_.get()) {
211     return feature_index_->getTemplate();
212   }
213   return 0;
214 }
215 
open(const Param & param)216 bool TaggerImpl::open(const Param &param) {
217   close();
218 
219   if (!param.help_version()) {
220     close();
221     return false;
222   }
223 
224   nbest_ = param.get<int>("nbest");
225   vlevel_ = param.get<int>("verbose");
226 
227   std::string model = param.get<std::string>("model");
228 
229   DecoderFeatureIndex *decoder_feature_index = new DecoderFeatureIndex;
230   feature_index_ = decoder_feature_index;
231   allocator_ = new Allocator;
232 
233   if (!decoder_feature_index->open(model.c_str())) {
234     WHAT << feature_index_->what();
235     close();
236     return false;
237   }
238 
239   const double c = param.get<double>("cost-factor");
240 
241   if (c <= 0.0) {
242     WHAT << "cost factor must be positive";
243     close();
244     return false;
245   }
246 
247   feature_index_->set_cost_factor(c);
248   ysize_ = feature_index_->ysize();
249 
250   return true;
251 }
252 
open(int argc,char ** argv)253 bool TaggerImpl::open(int argc, char **argv) {
254   Param param;
255   CHECK_FALSE(param.open(argc, argv, long_options))
256       << param.what();
257   return open(param);
258 }
259 
open(const char * arg)260 bool TaggerImpl::open(const char *arg) {
261   Param param;
262   CHECK_FALSE(param.open(arg, long_options)) << param.what();
263   return open(param);
264 }
265 
close()266 void TaggerImpl::close() {
267   if (mode_ == TEST) {
268     delete feature_index_;
269     delete allocator_;
270     feature_index_ = 0;
271     allocator_ = 0;
272   } else if (mode_ == TEST_SHARED) {
273     delete allocator_;
274     allocator_ = 0;
275   }
276 }
277 
set_model(const Model & model)278 bool TaggerImpl::set_model(const Model &model) {
279   if (mode_ == TEST) {
280     // feature_index_ => took the owner
281     // allocator_ => reuse
282     delete feature_index_;
283   } else if (mode_ == LEARN) {
284     // feature_index_ => did not take the owner
285     // allocator_ => did not take the owner.
286     allocator_ = new Allocator;
287   } else if (mode_ == TEST_SHARED) {
288     // feature_index_ => did not take the owner
289     // allocator_ => reuse
290   }
291   mode_ = TEST_SHARED;
292   const ModelImpl *model_impl = static_cast<const ModelImpl *>(&model);
293   feature_index_ = model_impl->feature_index();
294   nbest_ = model_impl->nbest();
295   vlevel_ = model_impl->vlevel();
296   ysize_ = feature_index_->ysize();
297   return true;
298 }
299 
add2(size_t size,const char ** column,bool copy)300 bool TaggerImpl::add2(size_t size, const char **column, bool copy) {
301   const size_t xsize = feature_index_->xsize();
302 
303   if ((mode_ == LEARN && size < xsize + 1) ||
304       ((mode_ == TEST || mode_ == TEST_SHARED)  && size < xsize)) {
305     CHECK_FALSE(false) << "# x is small: size="
306                        << size << " xsize=" << xsize;
307   }
308 
309   size_t s = x_.size() + 1;
310   x_.resize(s);
311   node_.resize(s);
312   answer_.resize(s);
313   result_.resize(s);
314   s = x_.size() - 1;
315 
316   if (copy) {
317     for (size_t k = 0; k < size; ++k) {
318       x_[s].push_back(allocator_->strdup(column[k]));
319     }
320   } else {
321     for (size_t k = 0; k < size; ++k) {
322       x_[s].push_back(column[k]);
323     }
324   }
325 
326   result_[s] = answer_[s] = 0;  // dummy
327   if (mode_ == LEARN) {
328     size_t r = ysize_;
329     for (size_t k = 0; k < ysize_; ++k) {
330       if (std::strcmp(yname(k), column[xsize]) == 0) {
331         r = k;
332       }
333     }
334 
335     CHECK_FALSE(r != ysize_) << "cannot find answer: " << column[xsize];
336     answer_[s] = r;
337   }
338 
339   node_[s].resize(ysize_);
340 
341   return true;
342 }
343 
add(size_t size,const char ** column)344 bool TaggerImpl::add(size_t size, const char **column) {
345   return add2(size, column, true);
346 }
347 
add(const char * line)348 bool TaggerImpl::add(const char* line) {
349   char *p = allocator_->strdup(line);
350   scoped_fixed_array<const char *, 8192> column;
351   const size_t size = tokenize2(p, "\t ", column.get(), column.size());
352   if (!add2(size, column.get(), false)) {
353     return false;
354   }
355   return true;
356 }
357 
read(std::istream * is)358 bool TaggerImpl::read(std::istream *is) {
359   scoped_fixed_array<char, 8192> line;
360   clear();
361 
362   for (;;) {
363     if (!is->getline(line.get(), line.size())) {
364       is->clear(std::ios::eofbit|std::ios::badbit);
365       return true;
366     }
367     if (line[0] == '\0' || line[0] == ' ' || line[0] == '\t') {
368       break;
369     }
370     if (!add(line.get())) {
371       return false;
372     }
373   }
374 
375   return true;
376 }
377 
set_penalty(size_t i,size_t j,double penalty)378 void TaggerImpl::set_penalty(size_t i, size_t j, double penalty) {
379   if (penalty_.empty()) {
380     penalty_.resize(node_.size());
381     for (size_t s = 0; s < penalty_.size(); ++s) {
382       penalty_[s].resize(ysize_);
383     }
384   }
385   penalty_[i][j] = penalty;
386 }
387 
penalty(size_t i,size_t j) const388 double TaggerImpl::penalty(size_t i, size_t j) const {
389   return penalty_.empty() ? 0.0 : penalty_[i][j];
390 }
391 
shrink()392 bool TaggerImpl::shrink() {
393   CHECK_FALSE(feature_index_->buildFeatures(this))
394       << feature_index_->what();
395   std::vector<std::vector<const char *> >(x_).swap(x_);
396   std::vector<std::vector<Node *> >(node_).swap(node_);
397   std::vector<unsigned short int>(answer_).swap(answer_);
398   std::vector<unsigned short int>(result_).swap(result_);
399 
400   return true;
401 }
402 
initNbest()403 bool TaggerImpl::initNbest() {
404   if (!agenda_.get()) {
405     agenda_.reset(new std::priority_queue <QueueElement*,
406                   std::vector<QueueElement *>, QueueElementComp>);
407     nbest_freelist_.reset(new FreeList <QueueElement>(128));
408   }
409 
410   nbest_freelist_->free();
411   while (!agenda_->empty()) {
412     agenda_->pop();   // make empty
413   }
414 
415   const size_t k = x_.size()-1;
416   for (size_t i = 0; i < ysize_; ++i) {
417     QueueElement *eos = nbest_freelist_->alloc();
418     eos->node = node_[k][i];
419     eos->fx = -node_[k][i]->bestCost;
420     eos->gx = -node_[k][i]->cost;
421     eos->next = 0;
422     agenda_->push(eos);
423   }
424 
425   return true;
426 }
427 
next()428 bool TaggerImpl::next() {
429   while (!agenda_->empty()) {
430     QueueElement *top = agenda_->top();
431     agenda_->pop();
432     Node *rnode = top->node;
433 
434     if (rnode->x == 0) {
435       for (QueueElement *n = top; n; n = n->next) {
436         result_[n->node->x] = n->node->y;
437       }
438       cost_ = top->gx;
439       return true;
440     }
441 
442     for (const_Path_iterator it = rnode->lpath.begin();
443          it != rnode->lpath.end(); ++it) {
444       QueueElement *n =nbest_freelist_->alloc();
445       n->node = (*it)->lnode;
446       n->gx   = -(*it)->lnode->cost     -(*it)->cost +  top->gx;
447       n->fx   = -(*it)->lnode->bestCost -(*it)->cost +  top->gx;
448       //          |              h(x)                 |  |  g(x)  |
449       n->next = top;
450       agenda_->push(n);
451     }
452   }
453 
454   return 0;
455 }
456 
eval()457 int TaggerImpl::eval() {
458   int err = 0;
459   for (size_t i = 0; i < x_.size(); ++i) {
460     if (answer_[i] != result_[i]) {
461       ++err;
462     }
463   }
464   return err;
465 }
466 
clear()467 bool TaggerImpl::clear() {
468   if (mode_ == TEST || mode_ == TEST_SHARED) {
469     allocator_->clear();
470   }
471   x_.clear();
472   node_.clear();
473   answer_.clear();
474   result_.clear();
475   Z_ = cost_ = 0.0;
476   return true;
477 }
478 
buildLattice()479 void TaggerImpl::buildLattice() {
480   if (x_.empty()) {
481     return;
482   }
483 
484   feature_index_->rebuildFeatures(this);
485 
486   for (size_t i = 0; i < x_.size(); ++i) {
487     for (size_t j = 0; j < ysize_; ++j) {
488       feature_index_->calcCost(node_[i][j]);
489       const std::vector<Path *> &lpath = node_[i][j]->lpath;
490       for (const_Path_iterator it = lpath.begin(); it != lpath.end(); ++it) {
491         feature_index_->calcCost(*it);
492       }
493     }
494   }
495 
496   // Add penalty for Dual decomposition.
497   if (!penalty_.empty()) {
498     for (size_t i = 0; i < x_.size(); ++i) {
499       for (size_t j = 0; j < ysize_; ++j) {
500         node_[i][j]->cost += penalty_[i][j];
501       }
502     }
503   }
504 }
505 
forwardbackward()506 void TaggerImpl::forwardbackward() {
507   if (x_.empty()) {
508     return;
509   }
510 
511   for (int i = 0; i < static_cast<int>(x_.size()); ++i) {
512     for (size_t j = 0; j < ysize_; ++j) {
513       node_[i][j]->calcAlpha();
514     }
515   }
516 
517   for (int i = static_cast<int>(x_.size() - 1); i >= 0;  --i) {
518     for (size_t j = 0; j < ysize_; ++j) {
519       node_[i][j]->calcBeta();
520     }
521   }
522 
523   Z_ = 0.0;
524   for (size_t j = 0; j < ysize_; ++j) {
525     Z_ = logsumexp(Z_, node_[0][j]->beta, j == 0);
526   }
527 
528   return;
529 }
530 
viterbi()531 void TaggerImpl::viterbi() {
532   for (size_t i = 0;   i < x_.size(); ++i) {
533     for (size_t j = 0; j < ysize_; ++j) {
534       double bestc = -1e37;
535       Node *best = 0;
536       const std::vector<Path *> &lpath = node_[i][j]->lpath;
537       for (const_Path_iterator it = lpath.begin(); it != lpath.end(); ++it) {
538         double cost = (*it)->lnode->bestCost +(*it)->cost +
539             node_[i][j]->cost;
540         if (cost > bestc) {
541           bestc = cost;
542           best  = (*it)->lnode;
543         }
544       }
545       node_[i][j]->prev     = best;
546       node_[i][j]->bestCost = best ? bestc : node_[i][j]->cost;
547     }
548   }
549 
550   double bestc = -1e37;
551   Node *best = 0;
552   size_t s = x_.size()-1;
553   for (size_t j = 0; j < ysize_; ++j) {
554     if (bestc < node_[s][j]->bestCost) {
555       best  = node_[s][j];
556       bestc = node_[s][j]->bestCost;
557     }
558   }
559 
560   for (Node *n = best; n; n = n->prev) {
561     result_[n->x] = n->y;
562   }
563 
564   cost_ = -node_[x_.size()-1][result_[x_.size()-1]]->bestCost;
565 }
566 
gradient(double * expected)567 double TaggerImpl::gradient(double *expected) {
568   if (x_.empty()) return 0.0;
569 
570   buildLattice();
571   forwardbackward();
572   double s = 0.0;
573 
574   for (size_t i = 0;   i < x_.size(); ++i) {
575     for (size_t j = 0; j < ysize_; ++j) {
576       node_[i][j]->calcExpectation(expected, Z_, ysize_);
577     }
578   }
579 
580   for (size_t i = 0;   i < x_.size(); ++i) {
581     for (const int *f = node_[i][answer_[i]]->fvector; *f != -1; ++f) {
582       --expected[*f + answer_[i]];
583     }
584     s += node_[i][answer_[i]]->cost;  // UNIGRAM cost
585     const std::vector<Path *> &lpath = node_[i][answer_[i]]->lpath;
586     for (const_Path_iterator it = lpath.begin(); it != lpath.end(); ++it) {
587       if ((*it)->lnode->y == answer_[(*it)->lnode->x]) {
588         for (const int *f = (*it)->fvector; *f != -1; ++f) {
589           --expected[*f +(*it)->lnode->y * ysize_ +(*it)->rnode->y];
590         }
591         s += (*it)->cost;  // BIGRAM COST
592         break;
593       }
594     }
595   }
596 
597   viterbi();  // call for eval()
598 
599   return Z_ - s ;
600 }
601 
collins(double * collins)602 double TaggerImpl::collins(double *collins) {
603   if (x_.empty()) {
604     return 0.0;
605   }
606 
607   buildLattice();
608   viterbi();  // call for finding argmax y*
609   double s = 0.0;
610 
611   // if correct parse, do not run forward + backward
612   {
613     size_t num = 0;
614     for (size_t i = 0; i < x_.size(); ++i) {
615       if (answer_[i] == result_[i]) {
616         ++num;
617       }
618     }
619 
620     if (num == x_.size()) return 0.0;
621   }
622 
623   for (size_t i = 0; i < x_.size(); ++i) {
624     // answer
625     {
626       s += node_[i][answer_[i]]->cost;
627       for (const int *f = node_[i][answer_[i]]->fvector; *f != -1; ++f) {
628         ++collins[*f + answer_[i]];
629       }
630 
631       const std::vector<Path *> &lpath = node_[i][answer_[i]]->lpath;
632       for (const_Path_iterator it = lpath.begin(); it != lpath.end(); ++it) {
633         if ((*it)->lnode->y == answer_[(*it)->lnode->x]) {
634           for (const int *f = (*it)->fvector; *f != -1; ++f) {
635             ++collins[*f +(*it)->lnode->y * ysize_ +(*it)->rnode->y];
636           }
637           s += (*it)->cost;
638           break;
639         }
640       }
641     }
642 
643     // result
644     {
645       s -= node_[i][result_[i]]->cost;
646       for (const int *f = node_[i][result_[i]]->fvector; *f != -1; ++f) {
647         --collins[*f + result_[i]];
648       }
649 
650       const std::vector<Path *> &lpath = node_[i][result_[i]]->lpath;
651       for (const_Path_iterator it = lpath.begin(); it != lpath.end(); ++it) {
652         if ((*it)->lnode->y == result_[(*it)->lnode->x]) {
653           for (const int *f = (*it)->fvector; *f != -1; ++f) {
654             --collins[*f +(*it)->lnode->y * ysize_ +(*it)->rnode->y];
655           }
656           s -= (*it)->cost;
657           break;
658         }
659       }
660     }
661   }
662 
663   return -s;
664 }
665 
parse()666 bool TaggerImpl::parse() {
667   CHECK_FALSE(feature_index_->buildFeatures(this))
668       << feature_index_->what();
669 
670   if (x_.empty()) {
671     return true;
672   }
673   buildLattice();
674   if (nbest_ || vlevel_ >= 1) {
675     forwardbackward();
676   }
677   viterbi();
678   if (nbest_) {
679     initNbest();
680   }
681 
682   return true;
683 }
684 
parse(const char * input)685 const char* TaggerImpl::parse(const char* input) {
686   return parse(input, std::strlen(input));
687 }
688 
parse(const char * input,size_t length)689 const char* TaggerImpl::parse(const char* input, size_t length) {
690   std::istringstream is(std::string(input, length));
691   if (!read(&is) || !parse()) {
692     return 0;
693   }
694   toString();
695   return os_.c_str();
696 }
697 
parse(const char * input,size_t len1,char * output,size_t len2)698 const char* TaggerImpl::parse(const char*input, size_t len1,
699                               char *output, size_t len2) {
700   std::istringstream is(std::string(input, len1));
701   if (x_.empty()) {
702     return 0;
703   }
704   toString();
705   if ((os_.size() + 1) < len2) {
706     memcpy(output, os_.data(), os_.size());
707     output[os_.size()] = '\0';
708     return output;
709   } else {
710     return 0;
711   }
712 }
713 
parse_stream(std::istream * is,std::ostream * os)714 bool TaggerImpl::parse_stream(std::istream *is,
715                               std::ostream *os) {
716   if (!read(is) || !parse()) {
717     return false;
718   }
719   if (x_.empty()) {
720     return true;
721   }
722   toString();
723   os->write(os_.data(), os_.size());
724   return true;
725 }
726 
toString(char * output,size_t len)727 const char* TaggerImpl::toString(char *output,
728                                  size_t len) {
729   const char* p = toString();
730   const size_t l = std::min(std::strlen(p), len);
731   std::strncpy(output, p, l);
732   return output;
733 }
734 
toString()735 const char* TaggerImpl::toString() {
736   os_.assign("");
737 
738 #define PRINT                                                   \
739   for (size_t i = 0; i < x_.size(); ++i) {                      \
740     for (std::vector<const char*>::iterator it = x_[i].begin(); \
741          it != x_[i].end(); ++it)                               \
742       os_ << *it << '\t';                                       \
743     os_ << yname(y(i));                                         \
744     if (vlevel_ >= 1) os_ << '/' << prob(i);                    \
745     if (vlevel_ >= 2) {                                         \
746       for (size_t j = 0; j < ysize_; ++j)                       \
747         os_ << '\t' << yname(j) << '/' << prob(i, j);           \
748     }                                                           \
749     os_ << '\n';                                                \
750   }                                                             \
751   os_ << '\n';
752 
753   if (nbest_ >= 1) {
754     for (size_t n = 0; n < nbest_; ++n) {
755       if (!next()) {
756         break;
757       }
758       os_ << "# " << n << ' ' << prob() << '\n';
759       PRINT;
760     }
761   } else {
762     if (vlevel_ >= 1) {
763       os_ << "# " << prob() << '\n';
764     }
765     PRINT;
766   }
767 
768   return const_cast<const char*>(os_.c_str());
769 
770 #undef PRINT
771 }
772 
createTagger(int argc,char ** argv)773 Tagger *createTagger(int argc, char **argv) {
774   TaggerImpl *tagger = new TaggerImpl();
775   if (!tagger->open(argc, argv)) {
776     setGlobalError(tagger->what());
777     delete tagger;
778     return 0;
779   }
780   return tagger;
781 }
782 
createTagger(const char * argv)783 Tagger *createTagger(const char *argv) {
784   TaggerImpl *tagger = new TaggerImpl();
785   if (!tagger->open(argv)) {
786     setGlobalError(tagger->what());
787     delete tagger;
788     return 0;
789   }
790   return tagger;
791 }
792 
createModel(int argc,char ** argv)793 Model *createModel(int argc, char **argv) {
794   ModelImpl *model = new ModelImpl();
795   if (!model->open(argc, argv)) {
796     setGlobalError(model->what());
797     delete model;
798     return 0;
799   }
800   return model;
801 }
802 
createModelFromArray(int argc,char ** argv,const char * buf,size_t size)803 Model *createModelFromArray(int argc, char **argv,
804                             const char *buf, size_t size) {
805   ModelImpl *model = new ModelImpl();
806   if (!model->openFromArray(argc, argv, buf, size)) {
807     setGlobalError(model->what());
808     delete model;
809     return 0;
810   }
811   return model;
812 }
813 
createModel(const char * argv)814 Model *createModel(const char *argv) {
815   ModelImpl *model = new ModelImpl();
816   if (!model->open(argv)) {
817     setGlobalError(model->what());
818     delete model;
819     return 0;
820   }
821   return model;
822 }
823 
createModelFromArray(const char * arg,const char * buf,size_t size)824 Model *createModelFromArray(const char *arg,
825                            const char *buf, size_t size) {
826   ModelImpl *model = new ModelImpl();
827   if (!model->openFromArray(arg, buf, size)) {
828     setGlobalError(model->what());
829     delete model;
830     return 0;
831   }
832   return model;
833 }
834 
getTaggerError()835 const char *getTaggerError() {
836   return getGlobalError();
837 }
838 
getLastError()839 const char *getLastError() {
840   return getGlobalError();
841 }
842 
843 namespace {
crfpp_test(const Param & param)844 int crfpp_test(const Param &param) {
845   if (param.get<bool>("version")) {
846     std::cout <<  param.version();
847     return -1;
848   }
849 
850   if (param.get<bool>("help")) {
851     std::cout <<  param.help();
852     return -1;
853   }
854 
855   CRFPP::TaggerImpl tagger;
856   if (!tagger.open(param)) {
857     std::cerr << tagger.what() << std::endl;
858     return -1;
859   }
860 
861   std::string output = param.get<std::string>("output");
862   if (output.empty()) {
863     output = "-";
864   }
865 
866   CRFPP::ostream_wrapper os(output.c_str());
867   if (!*os) {
868     std::cerr << "no such file or directory: " << output << std::endl;
869     return -1;
870   }
871 
872   const std::vector<std::string>& rest_ = param.rest_args();
873   std::vector<std::string> rest = rest_;  // trivial copy
874   if (rest.empty()) {
875     rest.push_back("-");
876   }
877 
878   for (size_t i = 0; i < rest.size(); ++i) {
879     CRFPP::istream_wrapper is(rest[i].c_str());
880     if (!*is) {
881       std::cerr << "no such file or directory: " << rest[i] << std::endl;
882       return -1;
883     }
884     while (*is) {
885       tagger.parse_stream(is.get(), os.get());
886     }
887   }
888 
889   return 0;
890 }
891 }  // namepace
892 }  // namespace CRFPP
893 
crfpp_test(int argc,char ** argv)894 int crfpp_test(int argc, char **argv) {
895   CRFPP::Param param;
896   param.open(argc, argv, long_options);
897   return CRFPP::crfpp_test(param);
898 }
899 
crfpp_test2(const char * arg)900 int crfpp_test2(const char *arg) {
901   CRFPP::Param param;
902   param.open(arg, long_options);
903   return CRFPP::crfpp_test(param);
904 }
905