1 /* 2 MeCab -- Yet Another Part-of-Speech and Morphological Analyzer 3 4 Copyright(C) 2001-2011 Taku Kudo <taku@chasen.org> 5 Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation 6 */ 7 #ifndef MECAB_MECAB_H_ 8 #define MECAB_MECAB_H_ 9 10 /* C/C++ common data structures */ 11 12 /** 13 * DictionaryInfo structure 14 */ 15 struct mecab_dictionary_info_t { 16 /** 17 * filename of dictionary 18 * On Windows, filename is stored in UTF-8 encoding 19 */ 20 const char *filename; 21 22 /** 23 * character set of the dictionary. e.g., "SHIFT-JIS", "UTF-8" 24 */ 25 const char *charset; 26 27 /** 28 * How many words are registered in this dictionary. 29 */ 30 unsigned int size; 31 32 /** 33 * dictionary type 34 * this value should be MECAB_USR_DIC, MECAB_SYS_DIC, or MECAB_UNK_DIC. 35 */ 36 int type; 37 38 /** 39 * left attributes size 40 */ 41 unsigned int lsize; 42 43 /** 44 * right attributes size 45 */ 46 unsigned int rsize; 47 48 /** 49 * version of this dictionary 50 */ 51 unsigned short version; 52 53 /** 54 * pointer to the next dictionary info. 55 */ 56 struct mecab_dictionary_info_t *next; 57 }; 58 59 /** 60 * Path structure 61 */ 62 struct mecab_path_t { 63 /** 64 * pointer to the right node 65 */ 66 struct mecab_node_t* rnode; 67 68 /** 69 * pointer to the next right path 70 */ 71 struct mecab_path_t* rnext; 72 73 /** 74 * pointer to the left node 75 */ 76 struct mecab_node_t* lnode; 77 78 /** 79 * pointer to the next left path 80 */ 81 82 struct mecab_path_t* lnext; 83 84 /** 85 * local cost 86 */ 87 int cost; 88 89 /** 90 * marginal probability 91 */ 92 float prob; 93 }; 94 95 /** 96 * Node structure 97 */ 98 struct mecab_node_t { 99 /** 100 * pointer to the previous node. 101 */ 102 struct mecab_node_t *prev; 103 104 /** 105 * pointer to the next node. 106 */ 107 struct mecab_node_t *next; 108 109 /** 110 * pointer to the node which ends at the same position. 111 */ 112 struct mecab_node_t *enext; 113 114 /** 115 * pointer to the node which starts at the same position. 116 */ 117 struct mecab_node_t *bnext; 118 119 /** 120 * pointer to the right path. 121 * this value is NULL if MECAB_ONE_BEST mode. 122 */ 123 struct mecab_path_t *rpath; 124 125 /** 126 * pointer to the right path. 127 * this value is NULL if MECAB_ONE_BEST mode. 128 */ 129 struct mecab_path_t *lpath; 130 131 /** 132 * surface string. 133 * this value is not 0 terminated. 134 * You can get the length with length/rlength members. 135 */ 136 const char *surface; 137 138 /** 139 * feature string 140 */ 141 const char *feature; 142 143 /** 144 * unique node id 145 */ 146 unsigned int id; 147 148 /** 149 * length of the surface form. 150 */ 151 unsigned short length; 152 153 /** 154 * length of the surface form including white space before the morph. 155 */ 156 unsigned short rlength; 157 158 /** 159 * right attribute id 160 */ 161 unsigned short rcAttr; 162 163 /** 164 * left attribute id 165 */ 166 unsigned short lcAttr; 167 168 /** 169 * unique part of speech id. This value is defined in "pos.def" file. 170 */ 171 unsigned short posid; 172 173 /** 174 * character type 175 */ 176 unsigned char char_type; 177 178 /** 179 * status of this model. 180 * This value is MECAB_NOR_NODE, MECAB_UNK_NODE, MECAB_BOS_NODE, MECAB_EOS_NODE, or MECAB_EON_NODE. 181 */ 182 unsigned char stat; 183 184 /** 185 * set 1 if this node is best node. 186 */ 187 unsigned char isbest; 188 189 /** 190 * forward accumulative log summation. 191 * This value is only available when MECAB_MARGINAL_PROB is passed. 192 */ 193 float alpha; 194 195 /** 196 * backward accumulative log summation. 197 * This value is only available when MECAB_MARGINAL_PROB is passed. 198 */ 199 float beta; 200 201 /** 202 * marginal probability. 203 * This value is only available when MECAB_MARGINAL_PROB is passed. 204 */ 205 float prob; 206 207 /** 208 * word cost. 209 */ 210 short wcost; 211 212 /** 213 * best accumulative cost from bos node to this node. 214 */ 215 long cost; 216 }; 217 218 /** 219 * Parameters for MeCab::Node::stat 220 */ 221 enum { 222 /** 223 * Normal node defined in the dictionary. 224 */ 225 MECAB_NOR_NODE = 0, 226 /** 227 * Unknown node not defined in the dictionary. 228 */ 229 MECAB_UNK_NODE = 1, 230 /** 231 * Virtual node representing a beginning of the sentence. 232 */ 233 MECAB_BOS_NODE = 2, 234 /** 235 * Virtual node representing a end of the sentence. 236 */ 237 MECAB_EOS_NODE = 3, 238 239 /** 240 * Virtual node representing a end of the N-best enumeration. 241 */ 242 MECAB_EON_NODE = 4 243 }; 244 245 /** 246 * Parameters for MeCab::DictionaryInfo::type 247 */ 248 enum { 249 /** 250 * This is a system dictionary. 251 */ 252 MECAB_SYS_DIC = 0, 253 254 /** 255 * This is a user dictionary. 256 */ 257 MECAB_USR_DIC = 1, 258 259 /** 260 * This is a unknown word dictionary. 261 */ 262 MECAB_UNK_DIC = 2 263 }; 264 265 /** 266 * Parameters for MeCab::Lattice::request_type 267 */ 268 enum { 269 /** 270 * One best result is obtained (default mode) 271 */ 272 MECAB_ONE_BEST = 1, 273 /** 274 * Set this flag if you want to obtain N best results. 275 */ 276 MECAB_NBEST = 2, 277 /** 278 * Set this flag if you want to enable a partial parsing mode. 279 * When this flag is set, the input |sentence| needs to be written 280 * in partial parsing format. 281 */ 282 MECAB_PARTIAL = 4, 283 /** 284 * Set this flag if you want to obtain marginal probabilities. 285 * Marginal probability is set in MeCab::Node::prob. 286 * The parsing speed will get 3-5 times slower than the default mode. 287 */ 288 MECAB_MARGINAL_PROB = 8, 289 /** 290 * Set this flag if you want to obtain alternative results. 291 * Not implemented. 292 */ 293 MECAB_ALTERNATIVE = 16, 294 /** 295 * When this flag is set, the result linked-list (Node::next/prev) 296 * traverses all nodes in the lattice. 297 */ 298 MECAB_ALL_MORPHS = 32, 299 300 /** 301 * When this flag is set, tagger internally copies the body of passed 302 * sentence into internal buffer. 303 */ 304 MECAB_ALLOCATE_SENTENCE = 64 305 }; 306 307 /** 308 * Parameters for MeCab::Lattice::boundary_constraint_type 309 */ 310 enum { 311 /** 312 * The token boundary is not specified. 313 */ 314 MECAB_ANY_BOUNDARY = 0, 315 316 /** 317 * The position is a strong token boundary. 318 */ 319 MECAB_TOKEN_BOUNDARY = 1, 320 321 /** 322 * The position is not a token boundary. 323 */ 324 MECAB_INSIDE_TOKEN = 2 325 }; 326 327 /* C interface */ 328 #ifdef __cplusplus 329 #include <cstdio> 330 #else 331 #include <stdio.h> 332 #endif 333 334 #ifdef __cplusplus 335 extern "C" { 336 #endif 337 338 #ifdef _WIN32 339 #include <windows.h> 340 # ifdef DLL_EXPORT 341 # define MECAB_DLL_EXTERN __declspec(dllexport) 342 # define MECAB_DLL_CLASS_EXTERN __declspec(dllexport) 343 # else 344 # define MECAB_DLL_EXTERN __declspec(dllimport) 345 # endif 346 #endif 347 348 #ifndef MECAB_DLL_EXTERN 349 # define MECAB_DLL_EXTERN extern 350 #endif 351 352 #ifndef MECAB_DLL_CLASS_EXTERN 353 # define MECAB_DLL_CLASS_EXTERN 354 #endif 355 356 typedef struct mecab_t mecab_t; 357 typedef struct mecab_model_t mecab_model_t; 358 typedef struct mecab_lattice_t mecab_lattice_t; 359 typedef struct mecab_dictionary_info_t mecab_dictionary_info_t; 360 typedef struct mecab_node_t mecab_node_t; 361 typedef struct mecab_path_t mecab_path_t; 362 363 #ifndef SWIG 364 /* C interface */ 365 366 /* old mecab interface */ 367 /** 368 * C wrapper of MeCab::Tagger::create(argc, argv) 369 */ 370 MECAB_DLL_EXTERN mecab_t* mecab_new(int argc, char **argv); 371 372 /** 373 * C wrapper of MeCab::Tagger::create(arg) 374 */ 375 MECAB_DLL_EXTERN mecab_t* mecab_new2(const char *arg); 376 377 /** 378 * C wrapper of MeCab::Tagger::version() 379 */ 380 MECAB_DLL_EXTERN const char* mecab_version(); 381 382 /** 383 * C wrapper of MeCab::getLastError() 384 */ 385 MECAB_DLL_EXTERN const char* mecab_strerror(mecab_t *mecab); 386 387 /** 388 * C wrapper of MeCab::deleteTagger(tagger) 389 */ 390 MECAB_DLL_EXTERN void mecab_destroy(mecab_t *mecab); 391 392 /** 393 * C wrapper of MeCab::Tagger:set_partial() 394 */ 395 MECAB_DLL_EXTERN int mecab_get_partial(mecab_t *mecab); 396 397 /** 398 * C wrapper of MeCab::Tagger::partial() 399 */ 400 MECAB_DLL_EXTERN void mecab_set_partial(mecab_t *mecab, int partial); 401 402 /** 403 * C wrapper of MeCab::Tagger::theta() 404 */ 405 MECAB_DLL_EXTERN float mecab_get_theta(mecab_t *mecab); 406 407 /** 408 * C wrapper of MeCab::Tagger::set_theta() 409 */ 410 MECAB_DLL_EXTERN void mecab_set_theta(mecab_t *mecab, float theta); 411 412 /** 413 * C wrapper of MeCab::Tagger::lattice_level() 414 */ 415 MECAB_DLL_EXTERN int mecab_get_lattice_level(mecab_t *mecab); 416 417 /** 418 * C wrapper of MeCab::Tagger::set_lattice_level() 419 */ 420 MECAB_DLL_EXTERN void mecab_set_lattice_level(mecab_t *mecab, int level); 421 422 /** 423 * C wrapper of MeCab::Tagger::all_morphs() 424 */ 425 MECAB_DLL_EXTERN int mecab_get_all_morphs(mecab_t *mecab); 426 427 /** 428 * C wrapper of MeCab::Tagger::set_all_moprhs() 429 */ 430 MECAB_DLL_EXTERN void mecab_set_all_morphs(mecab_t *mecab, int all_morphs); 431 432 /** 433 * C wrapper of MeCab::Tagger::parse(MeCab::Lattice *lattice) 434 */ 435 MECAB_DLL_EXTERN int mecab_parse_lattice(mecab_t *mecab, mecab_lattice_t *lattice); 436 437 /** 438 * C wrapper of MeCab::Tagger::parse(const char *str) 439 */ 440 MECAB_DLL_EXTERN const char* mecab_sparse_tostr(mecab_t *mecab, const char *str); 441 442 /** 443 * C wrapper of MeCab::Tagger::parse(const char *str, size_t len) 444 */ 445 MECAB_DLL_EXTERN const char* mecab_sparse_tostr2(mecab_t *mecab, const char *str, size_t len); 446 447 /** 448 * C wrapper of MeCab::Tagger::parse(const char *str, char *ostr, size_t olen) 449 */ 450 MECAB_DLL_EXTERN char* mecab_sparse_tostr3(mecab_t *mecab, const char *str, size_t len, 451 char *ostr, size_t olen); 452 453 /** 454 * C wrapper of MeCab::Tagger::parseToNode(const char *str) 455 */ 456 MECAB_DLL_EXTERN const mecab_node_t* mecab_sparse_tonode(mecab_t *mecab, const char*); 457 458 /** 459 * C wrapper of MeCab::Tagger::parseToNode(const char *str, size_t len) 460 */ 461 MECAB_DLL_EXTERN const mecab_node_t* mecab_sparse_tonode2(mecab_t *mecab, const char*, size_t); 462 463 /** 464 * C wrapper of MeCab::Tagger::parseNBest(size_t N, const char *str) 465 */ 466 MECAB_DLL_EXTERN const char* mecab_nbest_sparse_tostr(mecab_t *mecab, size_t N, const char *str); 467 468 /** 469 * C wrapper of MeCab::Tagger::parseNBest(size_t N, const char *str, size_t len) 470 */ 471 MECAB_DLL_EXTERN const char* mecab_nbest_sparse_tostr2(mecab_t *mecab, size_t N, 472 const char *str, size_t len); 473 474 /** 475 * C wrapper of MeCab::Tagger::parseNBest(size_t N, const char *str, char *ostr, size_t olen) 476 */ 477 MECAB_DLL_EXTERN char* mecab_nbest_sparse_tostr3(mecab_t *mecab, size_t N, 478 const char *str, size_t len, 479 char *ostr, size_t olen); 480 481 /** 482 * C wrapper of MeCab::Tagger::parseNBestInit(const char *str) 483 */ 484 MECAB_DLL_EXTERN int mecab_nbest_init(mecab_t *mecab, const char *str); 485 486 /** 487 * C wrapper of MeCab::Tagger::parseNBestInit(const char *str, size_t len) 488 */ 489 MECAB_DLL_EXTERN int mecab_nbest_init2(mecab_t *mecab, const char *str, size_t len); 490 491 /** 492 * C wrapper of MeCab::Tagger::next() 493 */ 494 MECAB_DLL_EXTERN const char* mecab_nbest_next_tostr(mecab_t *mecab); 495 496 /** 497 * C wrapper of MeCab::Tagger::next(char *ostr, size_t olen) 498 */ 499 MECAB_DLL_EXTERN char* mecab_nbest_next_tostr2(mecab_t *mecab, char *ostr, size_t olen); 500 501 /** 502 * C wrapper of MeCab::Tagger::nextNode() 503 */ 504 MECAB_DLL_EXTERN const mecab_node_t* mecab_nbest_next_tonode(mecab_t *mecab); 505 506 /** 507 * C wrapper of MeCab::Tagger::formatNode(const Node *node) 508 */ 509 MECAB_DLL_EXTERN const char* mecab_format_node(mecab_t *mecab, const mecab_node_t *node); 510 511 /** 512 * C wrapper of MeCab::Tagger::dictionary_info() 513 */ 514 MECAB_DLL_EXTERN const mecab_dictionary_info_t* mecab_dictionary_info(mecab_t *mecab); 515 516 /* lattice interface */ 517 /** 518 * C wrapper of MeCab::createLattice() 519 */ 520 MECAB_DLL_EXTERN mecab_lattice_t *mecab_lattice_new(); 521 522 /** 523 * C wrapper of MeCab::deleteLattice(lattice) 524 */ 525 MECAB_DLL_EXTERN void mecab_lattice_destroy(mecab_lattice_t *lattice); 526 527 /** 528 * C wrapper of MeCab::Lattice::clear() 529 */ 530 MECAB_DLL_EXTERN void mecab_lattice_clear(mecab_lattice_t *lattice); 531 532 /** 533 * C wrapper of MeCab::Lattice::is_available() 534 */ 535 536 MECAB_DLL_EXTERN int mecab_lattice_is_available(mecab_lattice_t *lattice); 537 538 /** 539 * C wrapper of MeCab::Lattice::bos_node() 540 */ 541 MECAB_DLL_EXTERN mecab_node_t *mecab_lattice_get_bos_node(mecab_lattice_t *lattice); 542 543 /** 544 * C wrapper of MeCab::Lattice::eos_node() 545 */ 546 MECAB_DLL_EXTERN mecab_node_t *mecab_lattice_get_eos_node(mecab_lattice_t *lattice); 547 548 /** 549 * C wrapper of MeCab::Lattice::begin_nodes() 550 */ 551 552 MECAB_DLL_EXTERN mecab_node_t **mecab_lattice_get_all_begin_nodes(mecab_lattice_t *lattice); 553 /** 554 * C wrapper of MeCab::Lattice::end_nodes() 555 */ 556 MECAB_DLL_EXTERN mecab_node_t **mecab_lattice_get_all_end_nodes(mecab_lattice_t *lattice); 557 558 /** 559 * C wrapper of MeCab::Lattice::begin_nodes(pos) 560 */ 561 MECAB_DLL_EXTERN mecab_node_t *mecab_lattice_get_begin_nodes(mecab_lattice_t *lattice, size_t pos); 562 563 /** 564 * C wrapper of MeCab::Lattice::end_nodes(pos) 565 */ 566 MECAB_DLL_EXTERN mecab_node_t *mecab_lattice_get_end_nodes(mecab_lattice_t *lattice, size_t pos); 567 568 /** 569 * C wrapper of MeCab::Lattice::sentence() 570 */ 571 MECAB_DLL_EXTERN const char *mecab_lattice_get_sentence(mecab_lattice_t *lattice); 572 573 /** 574 * C wrapper of MeCab::Lattice::set_sentence(sentence) 575 */ 576 MECAB_DLL_EXTERN void mecab_lattice_set_sentence(mecab_lattice_t *lattice, const char *sentence); 577 578 /** 579 * C wrapper of MeCab::Lattice::set_sentence(sentence, len) 580 */ 581 582 MECAB_DLL_EXTERN void mecab_lattice_set_sentence2(mecab_lattice_t *lattice, const char *sentence, size_t len); 583 584 /** 585 * C wrapper of MeCab::Lattice::size() 586 */ 587 MECAB_DLL_EXTERN size_t mecab_lattice_get_size(mecab_lattice_t *lattice); 588 589 /** 590 * C wrapper of MeCab::Lattice::Z() 591 */ 592 MECAB_DLL_EXTERN double mecab_lattice_get_z(mecab_lattice_t *lattice); 593 594 /** 595 * C wrapper of MeCab::Lattice::set_Z() 596 */ 597 MECAB_DLL_EXTERN void mecab_lattice_set_z(mecab_lattice_t *lattice, double Z); 598 599 /** 600 * C wrapper of MeCab::Lattice::theta() 601 */ 602 MECAB_DLL_EXTERN double mecab_lattice_get_theta(mecab_lattice_t *lattice); 603 604 /** 605 * C wrapper of MeCab::Lattice::set_theta() 606 */ 607 608 MECAB_DLL_EXTERN void mecab_lattice_set_theta(mecab_lattice_t *lattice, double theta); 609 610 /** 611 * C wrapper of MeCab::Lattice::next() 612 */ 613 MECAB_DLL_EXTERN int mecab_lattice_next(mecab_lattice_t *lattice); 614 615 /** 616 * C wrapper of MeCab::Lattice::request_type() 617 */ 618 MECAB_DLL_EXTERN int mecab_lattice_get_request_type(mecab_lattice_t *lattice); 619 620 /** 621 * C wrapper of MeCab::Lattice::has_request_type() 622 */ 623 MECAB_DLL_EXTERN int mecab_lattice_has_request_type(mecab_lattice_t *lattice, int request_type); 624 625 /** 626 * C wrapper of MeCab::Lattice::set_request_type() 627 */ 628 MECAB_DLL_EXTERN void mecab_lattice_set_request_type(mecab_lattice_t *lattice, int request_type); 629 630 /** 631 * C wrapper of MeCab::Lattice::add_request_type() 632 */ 633 634 MECAB_DLL_EXTERN void mecab_lattice_add_request_type(mecab_lattice_t *lattice, int request_type); 635 636 /** 637 * C wrapper of MeCab::Lattice::remove_request_type() 638 */ 639 MECAB_DLL_EXTERN void mecab_lattice_remove_request_type(mecab_lattice_t *lattice, int request_type); 640 641 /** 642 * C wrapper of MeCab::Lattice::newNode(); 643 */ 644 MECAB_DLL_EXTERN mecab_node_t *mecab_lattice_new_node(mecab_lattice_t *lattice); 645 646 /** 647 * C wrapper of MeCab::Lattice::toString() 648 */ 649 MECAB_DLL_EXTERN const char *mecab_lattice_tostr(mecab_lattice_t *lattice); 650 651 /** 652 * C wrapper of MeCab::Lattice::toString(buf, size) 653 */ 654 MECAB_DLL_EXTERN const char *mecab_lattice_tostr2(mecab_lattice_t *lattice, char *buf, size_t size); 655 656 /** 657 * C wrapper of MeCab::Lattice::enumNBestAsString(N) 658 */ 659 MECAB_DLL_EXTERN const char *mecab_lattice_nbest_tostr(mecab_lattice_t *lattice, size_t N); 660 661 /** 662 * C wrapper of MeCab::Lattice::enumNBestAsString(N, buf, size) 663 */ 664 665 MECAB_DLL_EXTERN const char *mecab_lattice_nbest_tostr2(mecab_lattice_t *lattice, size_t N, char *buf, size_t size); 666 667 /** 668 * C wrapper of MeCab::Lattice::has_constraint() 669 */ 670 MECAB_DLL_EXTERN int mecab_lattice_has_constraint(mecab_lattice_t *lattice); 671 672 /** 673 * C wrapper of MeCab::Lattice::boundary_constraint(pos) 674 */ 675 MECAB_DLL_EXTERN int mecab_lattice_get_boundary_constraint(mecab_lattice_t *lattice, size_t pos); 676 677 678 /** 679 * C wrapper of MeCab::Lattice::feature_constraint(pos) 680 */ 681 MECAB_DLL_EXTERN const char *mecab_lattice_get_feature_constraint(mecab_lattice_t *lattice, size_t pos); 682 683 /** 684 * C wrapper of MeCab::Lattice::boundary_constraint(pos, type) 685 */ 686 MECAB_DLL_EXTERN void mecab_lattice_set_boundary_constraint(mecab_lattice_t *lattice, size_t pos, int boundary_type); 687 688 /** 689 * C wrapper of MeCab::Lattice::set_feature_constraint(begin_pos, end_pos, feature) 690 */ 691 MECAB_DLL_EXTERN void mecab_lattice_set_feature_constraint(mecab_lattice_t *lattice, size_t begin_pos, size_t end_pos, const char *feature); 692 693 /** 694 * C wrapper of MeCab::Lattice::set_result(result); 695 */ 696 MECAB_DLL_EXTERN void mecab_lattice_set_result(mecab_lattice_t *lattice, const char *result); 697 698 /** 699 * C wrapper of MeCab::Lattice::what() 700 */ 701 MECAB_DLL_EXTERN const char *mecab_lattice_strerror(mecab_lattice_t *lattice); 702 703 704 /* model interface */ 705 /** 706 * C wapper of MeCab::Model::create(argc, argv) 707 */ 708 MECAB_DLL_EXTERN mecab_model_t *mecab_model_new(int argc, char **argv); 709 710 /** 711 * C wapper of MeCab::Model::create(arg) 712 */ 713 MECAB_DLL_EXTERN mecab_model_t *mecab_model_new2(const char *arg); 714 715 /** 716 * C wapper of MeCab::deleteModel(model) 717 */ 718 719 MECAB_DLL_EXTERN void mecab_model_destroy(mecab_model_t *model); 720 721 /** 722 * C wapper of MeCab::Model::createTagger() 723 */ 724 MECAB_DLL_EXTERN mecab_t *mecab_model_new_tagger(mecab_model_t *model); 725 726 /** 727 * C wapper of MeCab::Model::createLattice() 728 */ 729 MECAB_DLL_EXTERN mecab_lattice_t *mecab_model_new_lattice(mecab_model_t *model); 730 731 /** 732 * C wrapper of MeCab::Model::swap() 733 */ 734 MECAB_DLL_EXTERN int mecab_model_swap(mecab_model_t *model, mecab_model_t *new_model); 735 736 /** 737 * C wapper of MeCab::Model::dictionary_info() 738 */ 739 MECAB_DLL_EXTERN const mecab_dictionary_info_t* mecab_model_dictionary_info(mecab_model_t *model); 740 741 /** 742 * C wrapper of MeCab::Model::transition_cost() 743 */ 744 MECAB_DLL_EXTERN int mecab_model_transition_cost(mecab_model_t *model, 745 unsigned short rcAttr, 746 unsigned short lcAttr); 747 748 /** 749 * C wrapper of MeCab::Model::lookup() 750 */ 751 MECAB_DLL_EXTERN mecab_node_t *mecab_model_lookup(mecab_model_t *model, 752 const char *begin, 753 const char *end, 754 mecab_lattice_t *lattice); 755 756 /* static functions */ 757 MECAB_DLL_EXTERN int mecab_do(int argc, char **argv); 758 MECAB_DLL_EXTERN int mecab_dict_index(int argc, char **argv); 759 MECAB_DLL_EXTERN int mecab_dict_gen(int argc, char **argv); 760 MECAB_DLL_EXTERN int mecab_cost_train(int argc, char **argv); 761 MECAB_DLL_EXTERN int mecab_system_eval(int argc, char **argv); 762 MECAB_DLL_EXTERN int mecab_test_gen(int argc, char **argv); 763 #endif 764 765 #ifdef __cplusplus 766 } 767 #endif 768 769 /* C++ interface */ 770 #ifdef __cplusplus 771 772 namespace MeCab { 773 typedef struct mecab_dictionary_info_t DictionaryInfo; 774 typedef struct mecab_path_t Path; 775 typedef struct mecab_node_t Node; 776 777 template <typename N, typename P> class Allocator; 778 class Tagger; 779 780 /** 781 * Lattice class 782 */ 783 class MECAB_DLL_CLASS_EXTERN Lattice { 784 public: 785 /** 786 * Clear all internal lattice data. 787 */ 788 virtual void clear() = 0; 789 790 /** 791 * Return true if result object is available. 792 * @return boolean 793 */ 794 virtual bool is_available() const = 0; 795 796 /** 797 * Return bos (begin of sentence) node. 798 * You can obtain all nodes via "for (const Node *node = lattice->bos_node(); node; node = node->next) {}" 799 * @return bos node object 800 */ 801 virtual Node *bos_node() const = 0; 802 803 /** 804 * Return eos (end of sentence) node. 805 * @return eos node object 806 */ 807 virtual Node *eos_node() const = 0; 808 809 #ifndef SWIG 810 /** 811 * This method is used internally. 812 */ 813 virtual Node **begin_nodes() const = 0; 814 815 /** 816 * This method is used internally. 817 */ 818 virtual Node **end_nodes() const = 0; 819 #endif 820 821 /** 822 * Return node linked list ending at |pos|. 823 * You can obtain all nodes via "for (const Node *node = lattice->end_nodes(pos); node; node = node->enext) {}" 824 * @param pos position of nodes. 0 <= pos < size() 825 * @return node linked list 826 */ 827 virtual Node *end_nodes(size_t pos) const = 0; 828 829 /** 830 * Return node linked list starting at |pos|. 831 * You can obtain all nodes via "for (const Node *node = lattice->begin_nodes(pos); node; node = node->bnext) {}" 832 * @param pos position of nodes. 0 <= pos < size() 833 * @return node linked list 834 */ 835 virtual Node *begin_nodes(size_t pos) const = 0; 836 837 /** 838 * Return sentence. 839 * If MECAB_NBEST or MECAB_PARTIAL mode is off, the returned poiner is the same as the one set by set_sentence(). 840 * @return sentence 841 */ 842 virtual const char *sentence() const = 0; 843 844 /** 845 * Set sentence. This method does not take the ownership of the object. 846 * @param sentence sentence 847 */ 848 virtual void set_sentence(const char *sentence) = 0; 849 850 #ifndef SWIG 851 /** 852 * Set sentence. This method does not take the ownership of the object. 853 * @param sentence sentence 854 * @param len length of the sentence 855 */ 856 virtual void set_sentence(const char *sentence, size_t len) = 0; 857 #endif 858 859 /** 860 * Return sentence size. 861 * @return sentence size 862 */ 863 virtual size_t size() const = 0; 864 865 /** 866 * Set normalization factor of CRF. 867 * @param Z new normalization factor. 868 */ 869 virtual void set_Z(double Z) = 0; 870 871 /** 872 * return normalization factor of CRF. 873 * @return normalization factor. 874 */ 875 virtual double Z() const = 0; 876 877 /** 878 * Set temparature parameter theta. 879 * @param theta temparature parameter. 880 */ 881 virtual void set_theta(float theta) = 0; 882 883 /** 884 * Return temparature parameter theta. 885 * @return temparature parameter. 886 */ 887 virtual float theta() const = 0; 888 889 /** 890 * Obtain next-best result. The internal linked list structure is updated. 891 * You should set MECAB_NBEST reques_type in advance. 892 * Return false if no more results are available or request_type is invalid. 893 * @return boolean 894 */ 895 virtual bool next() = 0; 896 897 /** 898 * Return the current request type. 899 * @return request type 900 */ 901 virtual int request_type() const = 0; 902 903 /** 904 * Return true if the object has a specified request type. 905 * @return boolean 906 */ 907 virtual bool has_request_type(int request_type) const = 0; 908 909 /** 910 * Set request type. 911 * @param request_type new request type assigned 912 */ 913 virtual void set_request_type(int request_type) = 0; 914 915 /** 916 * Add request type. 917 * @param request_type new request type added 918 */ 919 virtual void add_request_type(int request_type) = 0; 920 921 /** 922 * Remove request type. 923 * @param request_type new request type removed 924 */ 925 virtual void remove_request_type(int request_type) = 0; 926 927 #ifndef SWIG 928 /** 929 * This method is used internally. 930 */ 931 virtual Allocator<Node, Path> *allocator() const = 0; 932 #endif 933 934 /** 935 * Return new node. Lattice objects has the ownership of the node. 936 * @return new node object 937 */ 938 virtual Node *newNode() = 0; 939 940 /** 941 * Return string representation of the lattice. 942 * Returned object is managed by this instance. When clear/set_sentence() method 943 * is called, the returned buffer is initialized. 944 * @return string representation of the lattice 945 */ 946 virtual const char *toString() = 0; 947 948 /** 949 * Return string representation of the node. 950 * Returned object is managed by this instance. When clear/set_sentence() method 951 * is called, the returned buffer is initialized. 952 * @return string representation of the node 953 * @param node node object 954 */ 955 virtual const char *toString(const Node *node) = 0; 956 957 /** 958 * Return string representation of the N-best results. 959 * Returned object is managed by this instance. When clear/set_sentence() method 960 * is called, the returned buffer is initialized. 961 * @return string representation of the node 962 * @param N how many results you want to obtain 963 */ 964 virtual const char *enumNBestAsString(size_t N) = 0; 965 966 #ifndef SWIG 967 /** 968 * Return string representation of the lattice. 969 * Result is saved in the specified buffer. 970 * @param buf output buffer 971 * @param size output buffer size 972 * @return string representation of the lattice 973 */ 974 virtual const char *toString(char *buf, size_t size) = 0; 975 976 /** 977 * Return string representation of the node. 978 * Result is saved in the specified buffer. 979 * @param node node object 980 * @param buf output buffer 981 * @param size output buffer size 982 * @return string representation of the lattice 983 */ 984 virtual const char *toString(const Node *node, 985 char *buf, size_t size) = 0; 986 987 /** 988 * Return string representation of the N-best result. 989 * Result is saved in the specified. 990 * @param N how many results you want to obtain 991 * @param buf output buffer 992 * @param size output buffer size 993 * @return string representation of the lattice 994 */ 995 virtual const char *enumNBestAsString(size_t N, char *buf, size_t size) = 0; 996 #endif 997 998 /** 999 * Returns true if any parsing constraint is set 1000 */ 1001 virtual bool has_constraint() const = 0; 1002 1003 /** 1004 * Returns the boundary constraint at the position. 1005 * @param pos the position of constraint 1006 * @return boundary constraint type 1007 */ 1008 virtual int boundary_constraint(size_t pos) const = 0; 1009 1010 /** 1011 * Returns the token constraint at the position. 1012 * @param pos the beginning position of constraint. 1013 * @return constrained node starting at the position. 1014 */ 1015 virtual const char *feature_constraint(size_t pos) const = 0; 1016 1017 /** 1018 * Set parsing constraint for partial parsing mode. 1019 * @param pos the position of the boundary 1020 * @param boundary_constraint_type the type of boundary 1021 */ 1022 virtual void set_boundary_constraint(size_t pos, 1023 int boundary_constraint_type) = 0; 1024 1025 /** 1026 * Set parsing constraint for partial parsing mode. 1027 * @param begin_pos the starting position of the constrained token. 1028 * @param end_pos the the ending position of the constrained token. 1029 * @param feature the feature of the constrained token. 1030 */ 1031 virtual void set_feature_constraint( 1032 size_t begin_pos, size_t end_pos, 1033 const char *feature) = 0; 1034 1035 /** 1036 * Set golden parsing results for unittesting. 1037 * @param result the parsing result written in the standard mecab output. 1038 */ 1039 virtual void set_result(const char *result) = 0; 1040 1041 /** 1042 * Return error string. 1043 * @return error string 1044 */ 1045 virtual const char *what() const = 0; 1046 1047 /** 1048 * Set error string. given string is copied to the internal buffer. 1049 * @param str new error string 1050 */ 1051 virtual void set_what(const char *str) = 0; 1052 1053 #ifndef SWIG 1054 /** 1055 * Create new Lattice object 1056 * @return new Lattice object 1057 */ 1058 static Lattice *create(); 1059 #endif 1060 ~Lattice()1061 virtual ~Lattice() {} 1062 }; 1063 1064 /** 1065 * Model class 1066 */ 1067 class MECAB_DLL_CLASS_EXTERN Model { 1068 public: 1069 /** 1070 * Return DictionaryInfo linked list. 1071 * @return DictionaryInfo linked list 1072 */ 1073 virtual const DictionaryInfo *dictionary_info() const = 0; 1074 1075 /** 1076 * Return transtion cost from rcAttr to lcAttr. 1077 * @return transtion cost 1078 */ 1079 virtual int transition_cost(unsigned short rcAttr, 1080 unsigned short lcAttr) const = 0; 1081 1082 /** 1083 * perform common prefix search from the range [begin, end). 1084 * |lattice| takes the ownership of return value. 1085 * @return node linked list. 1086 */ 1087 virtual Node *lookup(const char *begin, const char *end, 1088 Lattice *lattice) const = 0; 1089 1090 /** 1091 * Create a new Tagger object. 1092 * All returned tagger object shares this model object as a parsing model. 1093 * Never delete this model object before deleting tagger object. 1094 * @return new Tagger object 1095 */ 1096 virtual Tagger *createTagger() const = 0; 1097 1098 /** 1099 * Create a new Lattice object. 1100 * @return new Lattice object 1101 */ 1102 virtual Lattice *createLattice() const = 0; 1103 1104 /** 1105 * Swap the instance with |model|. 1106 * The ownership of |model| always moves to this instance, 1107 * meaning that passed |model| will no longer be accessible after calling this method. 1108 * return true if new model is swapped successfully. 1109 * This method is thread safe. All taggers created by 1110 * Model::createTagger() method will also be updated asynchronously. 1111 * No need to stop the parsing thread excplicitly before swapping model object. 1112 * @return boolean 1113 * @param model new model which is going to be swapped with the current model. 1114 */ 1115 virtual bool swap(Model *model) = 0; 1116 1117 /** 1118 * Return a version string 1119 * @return version string 1120 */ 1121 static const char *version(); 1122 ~Model()1123 virtual ~Model() {} 1124 1125 #ifndef SIWG 1126 /** 1127 * Factory method to create a new Model with a specified main's argc/argv-style parameters. 1128 * Return NULL if new model cannot be initialized. Use MeCab::getLastError() to obtain the 1129 * cause of the errors. 1130 * @return new Model object 1131 * @param argc number of parameters 1132 * @param argv parameter list 1133 */ 1134 static Model* create(int argc, char **argv); 1135 1136 /** 1137 * Factory method to create a new Model with a string parameter representation, i.e., 1138 * "-d /user/local/mecab/dic/ipadic -Ochasen". 1139 * Return NULL if new model cannot be initialized. Use MeCab::getLastError() to obtain the 1140 * cause of the errors. 1141 * @return new Model object 1142 * @param arg single string representation of the argment. 1143 */ 1144 static Model* create(const char *arg); 1145 #endif 1146 }; 1147 1148 /** 1149 * Tagger class 1150 */ 1151 class MECAB_DLL_CLASS_EXTERN Tagger { 1152 public: 1153 /** 1154 * Handy static method. 1155 * Return true if lattice is parsed successfully. 1156 * This function is equivalent to 1157 * { 1158 * Tagger *tagger = model.createModel(); 1159 * cosnt bool result = tagger->parse(lattice); 1160 * delete tagger; 1161 * return result; 1162 * } 1163 * @return boolean 1164 */ 1165 static bool parse(const Model &model, Lattice *lattice); 1166 1167 /** 1168 * Parse lattice object. 1169 * Return true if lattice is parsed successfully. 1170 * A sentence must be set to the lattice with Lattice:set_sentence object before calling this method. 1171 * Parsed node object can be obtained with Lattice:bos_node. 1172 * This method is thread safe. 1173 * @return lattice lattice object 1174 * @return boolean 1175 */ 1176 virtual bool parse(Lattice *lattice) const = 0; 1177 1178 /** 1179 * Parse given sentence and return parsed result as string. 1180 * You should not delete the returned string. The returned buffer 1181 * is overwritten when parse method is called again. 1182 * This method is NOT thread safe. 1183 * @param str sentence 1184 * @return parsed result 1185 */ 1186 virtual const char* parse(const char *str) = 0; 1187 1188 /** 1189 * Parse given sentence and return Node object. 1190 * You should not delete the returned node object. The returned buffer 1191 * is overwritten when parse method is called again. 1192 * You can traverse all nodes via Node::next member. 1193 * This method is NOT thread safe. 1194 * @param str sentence 1195 * @return bos node object 1196 */ 1197 virtual const Node* parseToNode(const char *str) = 0; 1198 1199 /** 1200 * Parse given sentence and obtain N-best results as a string format. 1201 * Currently, N must be 1 <= N <= 512 due to the limitation of the buffer size. 1202 * You should not delete the returned string. The returned buffer 1203 * is overwritten when parse method is called again. 1204 * This method is DEPRECATED. Use Lattice class. 1205 * @param N how many results you want to obtain 1206 * @param str sentence 1207 * @return parsed result 1208 */ 1209 virtual const char* parseNBest(size_t N, const char *str) = 0; 1210 1211 /** 1212 * Initialize N-best enumeration with a sentence. 1213 * Return true if initialization finishes successfully. 1214 * N-best result is obtained by calling next() or nextNode() in sequence. 1215 * This method is NOT thread safe. 1216 * This method is DEPRECATED. Use Lattice class. 1217 * @param str sentence 1218 * @return boolean 1219 */ 1220 virtual bool parseNBestInit(const char *str) = 0; 1221 1222 /** 1223 * Return next-best parsed result. You must call parseNBestInit() in advance. 1224 * Return NULL if no more reuslt is available. 1225 * This method is NOT thread safe. 1226 * This method is DEPRECATED. Use Lattice class. 1227 * @return node object 1228 */ 1229 virtual const Node* nextNode() = 0; 1230 1231 /** 1232 * Return next-best parsed result. You must call parseNBestInit() in advance. 1233 * Return NULL if no more reuslt is available. 1234 * This method is NOT thread safe. 1235 * This method is DEPRECATED. Use Lattice class. 1236 * @return parsed result 1237 */ 1238 virtual const char* next() = 0; 1239 1240 /** 1241 * Return formatted node object. The format is specified with 1242 * --unk-format, --bos-format, --eos-format, and --eon-format respectively. 1243 * You should not delete the returned string. The returned buffer 1244 * is overwritten when parse method is called again. 1245 * This method is NOT thread safe. 1246 * This method is DEPRECATED. Use Lattice class. 1247 * @param node node object. 1248 * @return parsed result 1249 */ 1250 virtual const char* formatNode(const Node *node) = 0; 1251 1252 #ifndef SWIG 1253 /** 1254 * The same as parse() method, but input length and output buffer are passed. 1255 * Return parsed result as string. The result pointer is the same as |ostr|. 1256 * Return NULL, if parsed result string cannot be stored within |olen| bytes. 1257 * @param str sentence 1258 * @param len sentence length 1259 * @param ostr output buffer 1260 * @param olen output buffer length 1261 * @return parsed result 1262 */ 1263 virtual const char* parse(const char *str, size_t len, char *ostr, size_t olen) = 0; 1264 1265 /** 1266 * The same as parse() method, but input length can be passed. 1267 * @param str sentence 1268 * @param len sentence length 1269 * @return parsed result 1270 */ 1271 virtual const char* parse(const char *str, size_t len) = 0; 1272 1273 /** 1274 * The same as parseToNode(), but input lenth can be passed. 1275 * @param str sentence 1276 * @param len sentence length 1277 * @return node object 1278 */ 1279 virtual const Node* parseToNode(const char *str, size_t len) = 0; 1280 1281 /** 1282 * The same as parseNBest(), but input length can be passed. 1283 * @param N how many results you want to obtain 1284 * @param str sentence 1285 * @param len sentence length 1286 * @return parsed result 1287 */ 1288 virtual const char* parseNBest(size_t N, const char *str, size_t len) = 0; 1289 1290 /** 1291 * The same as parseNBestInit(), but input length can be passed. 1292 * @param str sentence 1293 * @param len sentence length 1294 * @return boolean 1295 * @return parsed result 1296 */ 1297 virtual bool parseNBestInit(const char *str, size_t len) = 0; 1298 1299 /** 1300 * The same as next(), but output buffer can be passed. 1301 * Return NULL if more than |olen| buffer is required to store output string. 1302 * @param ostr output buffer 1303 * @param olen output buffer length 1304 * @return parsed result 1305 */ 1306 virtual const char* next(char *ostr , size_t olen) = 0; 1307 1308 /** 1309 * The same as parseNBest(), but input length and output buffer can be passed. 1310 * Return NULL if more than |olen| buffer is required to store output string. 1311 * @param N how many results you want to obtain 1312 * @param str input sentence 1313 * @param len input sentence length 1314 * @param ostr output buffer 1315 * @param olen output buffer length 1316 * @return parsed result 1317 */ 1318 virtual const char* parseNBest(size_t N, const char *str, 1319 size_t len, char *ostr, size_t olen) = 0; 1320 1321 /** 1322 * The same as formatNode(), but output buffer can be passed. 1323 * Return NULL if more than |olen| buffer is required to store output string. 1324 * @param node node object 1325 * @param ostr output buffer 1326 * @param olen output buffer length 1327 * @return parsed result 1328 */ 1329 virtual const char* formatNode(const Node *node, char *ostr, size_t olen) = 0; 1330 #endif 1331 1332 /** 1333 * Set request type. 1334 * This method is DEPRECATED. Use Lattice::set_request_type(MECAB_PARTIAL). 1335 * @param request_type new request type assigned 1336 */ 1337 virtual void set_request_type(int request_type) = 0; 1338 1339 /** 1340 * Return the current request type. 1341 * This method is DEPRECATED. Use Lattice class. 1342 * @return request type 1343 */ 1344 virtual int request_type() const = 0; 1345 1346 /** 1347 * Return true if partial parsing mode is on. 1348 * This method is DEPRECATED. Use Lattice::has_request_type(MECAB_PARTIAL). 1349 * @return boolean 1350 */ 1351 virtual bool partial() const = 0; 1352 1353 /** 1354 * set partial parsing mode. 1355 * This method is DEPRECATED. Use Lattice::add_request_type(MECAB_PARTIAL) or Lattice::remove_request_type(MECAB_PARTIAL) 1356 * @param partial partial mode 1357 */ 1358 virtual void set_partial(bool partial) = 0; 1359 1360 /** 1361 * Return lattice level. 1362 * This method is DEPRECATED. Use Lattice::*_request_type() 1363 * @return int lattice level 1364 */ 1365 virtual int lattice_level() const = 0; 1366 1367 /** 1368 * Set lattice level. 1369 * This method is DEPRECATED. Use Lattice::*_request_type() 1370 * @param level lattice level 1371 */ 1372 virtual void set_lattice_level(int level) = 0; 1373 1374 /** 1375 * Return true if all morphs output mode is on. 1376 * This method is DEPRECATED. Use Lattice::has_request_type(MECAB_ALL_MORPHS). 1377 * @return boolean 1378 */ 1379 virtual bool all_morphs() const = 0; 1380 1381 /** 1382 * set all-morphs output mode. 1383 * This method is DEPRECATED. Use Lattice::add_request_type(MECAB_ALL_MORPHS) or Lattice::remove_request_type(MECAB_ALL_MORPHS) 1384 * @param all_morphs 1385 */ 1386 virtual void set_all_morphs(bool all_morphs) = 0; 1387 1388 /** 1389 * Set temparature parameter theta. 1390 * @param theta temparature parameter. 1391 */ 1392 virtual void set_theta(float theta) = 0; 1393 1394 /** 1395 * Return temparature parameter theta. 1396 * @return temparature parameter. 1397 */ 1398 virtual float theta() const = 0; 1399 1400 /** 1401 * Return DictionaryInfo linked list. 1402 * @return DictionaryInfo linked list 1403 */ 1404 virtual const DictionaryInfo* dictionary_info() const = 0; 1405 1406 /** 1407 * Return error string. 1408 * @return error string 1409 */ 1410 virtual const char* what() const = 0; 1411 ~Tagger()1412 virtual ~Tagger() {} 1413 1414 #ifndef SIWG 1415 /** 1416 * Factory method to create a new Tagger with a specified main's argc/argv-style parameters. 1417 * Return NULL if new model cannot be initialized. Use MeCab::getLastError() to obtain the 1418 * cause of the errors. 1419 * @return new Tagger object 1420 * @param argc number of parameters 1421 * @param argv parameter list 1422 */ 1423 static Tagger *create(int argc, char **argv); 1424 1425 /** 1426 * Factory method to create a new Tagger with a string parameter representation, i.e., 1427 * "-d /user/local/mecab/dic/ipadic -Ochasen". 1428 * Return NULL if new model cannot be initialized. Use MeCab::getLastError() to obtain the 1429 * cause of the errors. 1430 * @return new Model object 1431 * @param arg single string representation of the argment. 1432 */ 1433 static Tagger *create(const char *arg); 1434 #endif 1435 1436 /** 1437 * Return a version string 1438 * @return version string 1439 */ 1440 static const char *version(); 1441 }; 1442 1443 #ifndef SWIG 1444 /** 1445 * Alias of Lattice::create() 1446 */ 1447 MECAB_DLL_EXTERN Lattice *createLattice(); 1448 1449 /** 1450 * Alias of Mode::create(argc, argv) 1451 */ 1452 MECAB_DLL_EXTERN Model *createModel(int argc, char **argv); 1453 1454 /** 1455 * Alias of Mode::create(arg) 1456 */ 1457 MECAB_DLL_EXTERN Model *createModel(const char *arg); 1458 1459 /** 1460 * Alias of Tagger::create(argc, argv) 1461 */ 1462 MECAB_DLL_EXTERN Tagger *createTagger(int argc, char **argv); 1463 1464 /** 1465 * Alias of Tagger::create(arg) 1466 */ 1467 MECAB_DLL_EXTERN Tagger *createTagger(const char *arg); 1468 1469 /** 1470 * delete Lattice object. 1471 * This method calles "delete lattice". 1472 * In some environment, e.g., MS-Windows, an object allocated inside a DLL must be deleted in the same DLL too. 1473 * @param lattice lattice object 1474 */ 1475 MECAB_DLL_EXTERN void deleteLattice(Lattice *lattice); 1476 1477 1478 /** 1479 * delete Model object. 1480 * This method calles "delete model". 1481 * In some environment, e.g., MS-Windows, an object allocated inside a DLL must be deleted in the same DLL too. 1482 * @param model model object 1483 */ 1484 MECAB_DLL_EXTERN void deleteModel(Model *model); 1485 1486 /** 1487 * delete Tagger object. 1488 * This method calles "delete tagger". 1489 * In some environment, e.g., MS-Windows, an object allocated inside a DLL must be deleted in the same DLL too. 1490 * @param tagger tagger object 1491 */ 1492 MECAB_DLL_EXTERN void deleteTagger(Tagger *tagger); 1493 1494 /** 1495 * Return last error string. 1496 * @return error string 1497 */ 1498 MECAB_DLL_EXTERN const char* getLastError(); 1499 1500 /** 1501 * An alias of getLastError. 1502 * It is kept for backward compatibility. 1503 * @return error string 1504 */ 1505 MECAB_DLL_EXTERN const char* getTaggerError(); 1506 #endif 1507 } 1508 #endif 1509 #endif /* MECAB_MECAB_H_ */ 1510