1 /* $NetBSD: plural_parser.c,v 1.2 2007/01/17 23:24:22 hubertf Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 */ 29 30 #include <sys/cdefs.h> 31 __RCSID("$NetBSD: plural_parser.c,v 1.2 2007/01/17 23:24:22 hubertf Exp $"); 32 33 #include <assert.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <string.h> 37 #include <citrus/citrus_namespace.h> 38 #include <citrus/citrus_region.h> 39 #include <citrus/citrus_memstream.h> 40 #include <citrus/citrus_bcs.h> 41 #include "plural_parser.h" 42 43 #if defined(TEST_TOKENIZER) || defined(TEST_PARSER) 44 #define ALLOW_EMPTY 45 #define ALLOW_ARBITRARY_IDENTIFIER 46 #endif 47 48 #define MAX_LEN_ATOM 10 49 #define MAX_NUM_OPERANDS 3 50 51 #define T_EOF EOF 52 #define T_NONE 0x100 53 #define T_LAND 0x101 /* && */ 54 #define T_LOR 0x102 /* || */ 55 #define T_EQUALITY 0x103 /* == or != */ 56 #define T_RELATIONAL 0x104 /* <, >, <= or >= */ 57 #define T_ADDITIVE 0x105 /* + or - */ 58 #define T_MULTIPLICATIVE 0x106 /* *, / or % */ 59 #define T_IDENTIFIER 0x200 60 #define T_CONSTANT 0x201 61 #define T_ILCHAR 0x300 62 #define T_TOOLONG 0x301 63 #define T_ILTOKEN 0x302 64 #define T_ILEND 0x303 65 #define T_NOMEM 0x304 66 #define T_NOTFOUND 0x305 67 #define T_ILPLURAL 0x306 68 #define T_IS_OPERATOR(t) ((t) < 0x200) 69 #define T_IS_ERROR(t) ((t) >= 0x300) 70 71 #define OP_EQ ('='+'=') 72 #define OP_NEQ ('!'+'=') 73 #define OP_LTEQ ('<'+'=') 74 #define OP_GTEQ ('>'+'=') 75 76 #define PLURAL_NUMBER_SYMBOL "n" 77 #define NPLURALS_SYMBOL "nplurals" 78 #define LEN_NPLURAL_SYMBOL (sizeof (NPLURALS_SYMBOL) -1) 79 #define PLURAL_SYMBOL "plural" 80 #define LEN_PLURAL_SYMBOL (sizeof (PLURAL_SYMBOL) -1) 81 #define PLURAL_FORMS "Plural-Forms:" 82 #define LEN_PLURAL_FORMS (sizeof (PLURAL_FORMS) -1) 83 84 /* ---------------------------------------------------------------------- 85 * tokenizer part 86 */ 87 88 union token_data 89 { 90 unsigned long constant; 91 #ifdef ALLOW_ARBITRARY_IDENTIFIER 92 char identifier[MAX_LEN_ATOM+1]; 93 #endif 94 char op; 95 }; 96 97 struct tokenizer_context 98 { 99 struct _memstream memstream; 100 struct { 101 int token; 102 union token_data token_data; 103 } token0; 104 }; 105 106 /* initialize a tokenizer context */ 107 static void 108 init_tokenizer_context(struct tokenizer_context *tcx) 109 { 110 tcx->token0.token = T_NONE; 111 } 112 113 /* get an atom (identifier or constant) */ 114 static int 115 tokenize_atom(struct tokenizer_context *tcx, union token_data *token_data) 116 { 117 int ch, len; 118 char buf[MAX_LEN_ATOM+1]; 119 120 len = 0; 121 while (/*CONSTCOND*/1) { 122 ch = _memstream_getc(&tcx->memstream); 123 if (!(_bcs_isalnum(ch) || ch == '_')) { 124 _memstream_ungetc(&tcx->memstream, ch); 125 break; 126 } 127 if (len == MAX_LEN_ATOM) 128 return T_TOOLONG; 129 buf[len++] = ch; 130 } 131 buf[len] = '\0'; 132 if (len == 0) 133 return T_ILCHAR; 134 135 if (_bcs_isdigit((int)(unsigned char)buf[0])) { 136 unsigned long ul; 137 char *post; 138 ul = strtoul(buf, &post, 0); 139 if (buf+len != post) 140 return T_ILCHAR; 141 token_data->constant = ul; 142 return T_CONSTANT; 143 } 144 145 #ifdef ALLOW_ARBITRARY_IDENTIFIER 146 strcpy(token_data->identifier, buf); 147 return T_IDENTIFIER; 148 #else 149 if (!strcmp(buf, PLURAL_NUMBER_SYMBOL)) 150 return T_IDENTIFIER; 151 return T_ILCHAR; 152 #endif 153 } 154 155 /* tokenizer main routine */ 156 static int 157 tokenize(struct tokenizer_context *tcx, union token_data *token_data) 158 { 159 int ch, prevch; 160 161 retry: 162 ch = _memstream_getc(&tcx->memstream); 163 if (_bcs_isspace(ch)) 164 goto retry; 165 166 switch (ch) { 167 case T_EOF: 168 return ch; 169 case '+': case '-': 170 token_data->op = ch; 171 return T_ADDITIVE; 172 case '*': case '/': case '%': 173 token_data->op = ch; 174 return T_MULTIPLICATIVE; 175 case '?': case ':': case '(': case ')': 176 token_data->op = ch; 177 return ch; 178 case '&': case '|': 179 prevch = ch; 180 ch = _memstream_getc(&tcx->memstream); 181 if (ch != prevch) { 182 _memstream_ungetc(&tcx->memstream, ch); 183 return T_ILCHAR; 184 } 185 token_data->op = ch; 186 switch (ch) { 187 case '&': 188 return T_LAND; 189 case '|': 190 return T_LOR; 191 } 192 /*NOTREACHED*/ 193 case '=': case '!': case '<': case '>': 194 prevch = ch; 195 ch = _memstream_getc(&tcx->memstream); 196 if (ch != '=') { 197 _memstream_ungetc(&tcx->memstream, ch); 198 switch (prevch) { 199 case '=': 200 return T_ILCHAR; 201 case '!': 202 return '!'; 203 case '<': 204 case '>': 205 token_data->op = prevch; /* OP_LT or OP_GT */ 206 return T_RELATIONAL; 207 } 208 } 209 /* '==', '!=', '<=' or '>=' */ 210 token_data->op = ch+prevch; 211 switch (prevch) { 212 case '=': 213 case '!': 214 return T_EQUALITY; 215 case '<': 216 case '>': 217 return T_RELATIONAL; 218 } 219 /*NOTREACHED*/ 220 } 221 222 _memstream_ungetc(&tcx->memstream, ch); 223 return tokenize_atom(tcx, token_data); 224 } 225 226 /* get the next token */ 227 static int 228 get_token(struct tokenizer_context *tcx, union token_data *token_data) 229 { 230 if (tcx->token0.token != T_NONE) { 231 int token = tcx->token0.token; 232 tcx->token0.token = T_NONE; 233 *token_data = tcx->token0.token_data; 234 return token; 235 } 236 return tokenize(tcx, token_data); 237 } 238 239 /* push back the last token */ 240 static void 241 unget_token(struct tokenizer_context *tcx, 242 int token, union token_data *token_data) 243 { 244 tcx->token0.token = token; 245 tcx->token0.token_data = *token_data; 246 } 247 248 #ifdef TEST_TOKENIZER 249 250 int 251 main(int argc, char **argv) 252 { 253 struct tokenizer_context tcx; 254 union token_data token_data; 255 int token; 256 257 if (argc != 2) { 258 fprintf(stderr, "usage: %s <expression>\n", argv[0]); 259 return EXIT_FAILURE; 260 } 261 262 init_tokenizer_context(&tcx); 263 _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1])); 264 265 while (1) { 266 token = get_token(&tcx, &token_data); 267 switch (token) { 268 case T_EOF: 269 goto quit; 270 case T_ILCHAR: 271 printf("illegal character.\n"); 272 goto quit; 273 case T_TOOLONG: 274 printf("too long atom.\n"); 275 goto quit; 276 case T_CONSTANT: 277 printf("constant: %lu\n", token_data.constant); 278 break; 279 case T_IDENTIFIER: 280 printf("symbol: %s\n", token_data.identifier); 281 break; 282 default: 283 printf("operator: "); 284 switch (token) { 285 case T_LAND: 286 printf("&&\n"); 287 break; 288 case T_LOR: 289 printf("||\n"); 290 break; 291 case T_EQUALITY: 292 printf("%c=\n", token_data.op-'='); 293 break; 294 case T_RELATIONAL: 295 switch(token_data.op) { 296 case OP_LTEQ: 297 case OP_GTEQ: 298 printf("%c=\n", token_data.op-'='); 299 break; 300 default: 301 printf("%c\n", token_data.op); 302 break; 303 } 304 break; 305 case T_ADDITIVE: 306 case T_MULTIPLICATIVE: 307 printf("%c\n", token_data.op); 308 break; 309 default: 310 printf("operator: %c\n", token); 311 } 312 } 313 } 314 quit: 315 return 0; 316 } 317 #endif /* TEST_TOKENIZER */ 318 319 320 /* ---------------------------------------------------------------------- 321 * parser part 322 * 323 * exp := cond 324 * 325 * cond := lor | lor '?' cond ':' cond 326 * 327 * lor := land ( '||' land )* 328 * 329 * land := equality ( '&&' equality )* 330 * 331 * equality := relational ( equalityops relational )* 332 * equalityops := '==' | '!=' 333 * 334 * relational := additive ( relationalops additive )* 335 * relationalops := '<' | '>' | '<=' | '>=' 336 * 337 * additive := multiplicative ( additiveops multiplicative )* 338 * additiveops := '+' | '-' 339 * 340 * multiplicative := lnot ( multiplicativeops lnot )* 341 * multiplicativeops := '*' | '/' | '%' 342 * 343 * lnot := '!' lnot | term 344 * 345 * term := literal | identifier | '(' exp ')' 346 * 347 */ 348 349 #define T_ENSURE_OK(token, label) \ 350 do { \ 351 if (T_IS_ERROR(token)) \ 352 goto label; \ 353 } while (/*CONSTCOND*/0) 354 #define T_ENSURE_SOMETHING(token, label) \ 355 do { \ 356 if ((token) == T_EOF) { \ 357 token = T_ILEND; \ 358 goto label; \ 359 } else if (T_IS_ERROR(token)) \ 360 goto label; \ 361 } while (/*CONSTCOND*/0) 362 363 #define parser_element plural_element 364 365 struct parser_element; 366 struct parser_op 367 { 368 char op; 369 struct parser_element *operands[MAX_NUM_OPERANDS]; 370 }; 371 struct parser_element 372 { 373 int kind; 374 union 375 { 376 struct parser_op parser_op; 377 union token_data token_data; 378 } u; 379 }; 380 381 struct parser_op2_transition 382 { 383 int kind; 384 const struct parser_op2_transition *next; 385 }; 386 387 /* prototypes */ 388 static int parse_cond(struct tokenizer_context *, struct parser_element *); 389 390 391 /* transition table for the 2-operand operators */ 392 #define DEF_TR(t, k, n) \ 393 static struct parser_op2_transition exp_tr_##t = { \ 394 k, &exp_tr_##n \ 395 } 396 #define DEF_TR0(t, k) \ 397 static struct parser_op2_transition exp_tr_##t = { \ 398 k, NULL /* expect lnot */ \ 399 } 400 401 DEF_TR0(multiplicative, T_MULTIPLICATIVE); 402 DEF_TR(additive, T_ADDITIVE, multiplicative); 403 DEF_TR(relational, T_RELATIONAL, additive); 404 DEF_TR(equality, T_EQUALITY, relational); 405 DEF_TR(land, T_LAND, equality); 406 DEF_TR(lor, T_LOR, land); 407 408 /* init a parser element structure */ 409 static void 410 init_parser_element(struct parser_element *pe) 411 { 412 int i; 413 414 pe->kind = T_NONE; 415 for (i=0; i<MAX_NUM_OPERANDS; i++) 416 pe->u.parser_op.operands[i] = NULL; 417 } 418 419 /* uninitialize a parser element structure with freeing children */ 420 static void free_parser_element(struct parser_element *); 421 static void 422 uninit_parser_element(struct parser_element *pe) 423 { 424 int i; 425 426 if (T_IS_OPERATOR(pe->kind)) 427 for (i=0; i<MAX_NUM_OPERANDS; i++) 428 if (pe->u.parser_op.operands[i]) 429 free_parser_element( 430 pe->u.parser_op.operands[i]); 431 } 432 433 /* free a parser element structure with freeing children */ 434 static void 435 free_parser_element(struct parser_element *pe) 436 { 437 if (pe) { 438 uninit_parser_element(pe); 439 free(pe); 440 } 441 } 442 443 444 /* copy a parser element structure shallowly */ 445 static void 446 copy_parser_element(struct parser_element *dpe, 447 const struct parser_element *spe) 448 { 449 memcpy(dpe, spe, sizeof *dpe); 450 } 451 452 /* duplicate a parser element structure shallowly */ 453 static struct parser_element * 454 dup_parser_element(const struct parser_element *pe) 455 { 456 struct parser_element *dpe = malloc(sizeof *dpe); 457 if (dpe) 458 copy_parser_element(dpe, pe); 459 return dpe; 460 } 461 462 /* term := identifier | constant | '(' exp ')' */ 463 static int 464 parse_term(struct tokenizer_context *tcx, struct parser_element *pelem) 465 { 466 struct parser_element pe0; 467 int token; 468 union token_data token_data; 469 470 token = get_token(tcx, &token_data); 471 switch (token) { 472 case '(': 473 /* '(' exp ')' */ 474 init_parser_element(&pe0); 475 /* expect exp */ 476 token = parse_cond(tcx, &pe0); 477 T_ENSURE_OK(token, err); 478 /* expect ')' */ 479 token = get_token(tcx, &token_data); 480 T_ENSURE_SOMETHING(token, err); 481 if (token != ')') { 482 unget_token(tcx, token, &token_data); 483 token = T_ILTOKEN; 484 goto err; 485 } 486 copy_parser_element(pelem, &pe0); 487 return token; 488 err: 489 uninit_parser_element(&pe0); 490 return token; 491 case T_IDENTIFIER: 492 case T_CONSTANT: 493 pelem->kind = token; 494 pelem->u.token_data = token_data; 495 return token; 496 case T_EOF: 497 return T_ILEND; 498 default: 499 return T_ILTOKEN; 500 } 501 } 502 503 /* lnot := '!' lnot | term */ 504 static int 505 parse_lnot(struct tokenizer_context *tcx, struct parser_element *pelem) 506 { 507 struct parser_element pe0; 508 int token; 509 union token_data token_data; 510 511 init_parser_element(&pe0); 512 513 /* '!' or not */ 514 token = get_token(tcx, &token_data); 515 if (token != '!') { 516 /* stop: term */ 517 unget_token(tcx, token, &token_data); 518 return parse_term(tcx, pelem); 519 } 520 521 /* '!' term */ 522 token = parse_lnot(tcx, &pe0); 523 T_ENSURE_OK(token, err); 524 525 pelem->kind = '!'; 526 pelem->u.parser_op.operands[0] = dup_parser_element(&pe0); 527 return pelem->kind; 528 err: 529 uninit_parser_element(&pe0); 530 return token; 531 } 532 533 /* ext_op := ext_next ( op ext_next )* */ 534 static int 535 parse_op2(struct tokenizer_context *tcx, struct parser_element *pelem, 536 const struct parser_op2_transition *tr) 537 { 538 struct parser_element pe0, pe1, peop; 539 int token; 540 union token_data token_data; 541 char op; 542 543 /* special case: expect lnot */ 544 if (tr == NULL) 545 return parse_lnot(tcx, pelem); 546 547 init_parser_element(&pe0); 548 init_parser_element(&pe1); 549 token = parse_op2(tcx, &pe0, tr->next); 550 T_ENSURE_OK(token, err); 551 552 while (/*CONSTCOND*/1) { 553 /* expect op or empty */ 554 token = get_token(tcx, &token_data); 555 if (token != tr->kind) { 556 /* stop */ 557 unget_token(tcx, token, &token_data); 558 copy_parser_element(pelem, &pe0); 559 break; 560 } 561 op = token_data.op; 562 /* right hand */ 563 token = parse_op2(tcx, &pe1, tr->next); 564 T_ENSURE_OK(token, err); 565 566 init_parser_element(&peop); 567 peop.kind = tr->kind; 568 peop.u.parser_op.op = op; 569 peop.u.parser_op.operands[0] = dup_parser_element(&pe0); 570 init_parser_element(&pe0); 571 peop.u.parser_op.operands[1] = dup_parser_element(&pe1); 572 init_parser_element(&pe1); 573 copy_parser_element(&pe0, &peop); 574 } 575 return pelem->kind; 576 err: 577 uninit_parser_element(&pe1); 578 uninit_parser_element(&pe0); 579 return token; 580 } 581 582 /* cond := lor | lor '?' cond ':' cond */ 583 static int 584 parse_cond(struct tokenizer_context *tcx, struct parser_element *pelem) 585 { 586 struct parser_element pe0, pe1, pe2; 587 int token; 588 union token_data token_data; 589 590 init_parser_element(&pe0); 591 init_parser_element(&pe1); 592 init_parser_element(&pe2); 593 594 /* expect lor or empty */ 595 token = parse_op2(tcx, &pe0, &exp_tr_lor); 596 T_ENSURE_OK(token, err); 597 598 /* '?' or not */ 599 token = get_token(tcx, &token_data); 600 if (token != '?') { 601 /* stop: lor */ 602 unget_token(tcx, token, &token_data); 603 copy_parser_element(pelem, &pe0); 604 return pe0.kind; 605 } 606 607 /* lor '?' cond ':' cond */ 608 /* expect cond */ 609 token = parse_cond(tcx, &pe1); 610 T_ENSURE_OK(token, err); 611 612 /* expect ':' */ 613 token = get_token(tcx, &token_data); 614 T_ENSURE_OK(token, err); 615 if (token != ':') { 616 unget_token(tcx, token, &token_data); 617 token = T_ILTOKEN; 618 goto err; 619 } 620 621 /* expect cond */ 622 token = parse_cond(tcx, &pe2); 623 T_ENSURE_OK(token, err); 624 625 pelem->kind = '?'; 626 pelem->u.parser_op.operands[0] = dup_parser_element(&pe0); 627 pelem->u.parser_op.operands[1] = dup_parser_element(&pe1); 628 pelem->u.parser_op.operands[2] = dup_parser_element(&pe2); 629 return pelem->kind; 630 err: 631 uninit_parser_element(&pe2); 632 uninit_parser_element(&pe1); 633 uninit_parser_element(&pe0); 634 return token; 635 } 636 637 static int 638 parse_exp(struct tokenizer_context *tcx, struct parser_element *pelem) 639 { 640 int token, token1; 641 union token_data token_data; 642 643 #ifdef ALLOW_EMPTY 644 /* empty check */ 645 token = get_token(tcx, &token_data); 646 if (token == T_EOF) 647 return token; 648 unget_token(tcx, token, &token_data); 649 #endif 650 651 token = parse_cond(tcx, pelem); 652 if (!T_IS_ERROR(token)) { 653 /* termination check */ 654 token1 = get_token(tcx, &token_data); 655 if (token1 == T_EOF) 656 return token; 657 else if (!T_IS_ERROR(token)) 658 unget_token(tcx, token1, &token_data); 659 return T_ILTOKEN; 660 } 661 return token; 662 } 663 664 665 #if defined(TEST_PARSER) || defined(TEST_PARSE_PLURAL) 666 #include <stdio.h> 667 668 static void dump_elem(struct parser_element *); 669 670 static void 671 dump_op2(struct parser_element *pelem) 672 { 673 dump_elem(pelem->u.parser_op.operands[0]); 674 printf(" "); 675 dump_elem(pelem->u.parser_op.operands[1]); 676 printf(")"); 677 } 678 679 static void 680 dump_op3(struct parser_element *pelem) 681 { 682 dump_elem(pelem->u.parser_op.operands[0]); 683 printf(" "); 684 dump_elem(pelem->u.parser_op.operands[1]); 685 printf(" "); 686 dump_elem(pelem->u.parser_op.operands[2]); 687 printf(")"); 688 } 689 690 static void 691 dump_elem(struct parser_element *pelem) 692 { 693 switch (pelem->kind) { 694 case T_LAND: 695 printf("(&& "); 696 dump_op2(pelem); 697 break; 698 case T_LOR: 699 printf("(|| "); 700 dump_op2(pelem); 701 break; 702 case T_EQUALITY: 703 switch (pelem->u.parser_op.op) { 704 case OP_EQ: 705 printf("(== "); 706 break; 707 case OP_NEQ: 708 printf("(!= "); 709 break; 710 } 711 dump_op2(pelem); 712 break; 713 case T_RELATIONAL: 714 switch (pelem->u.parser_op.op) { 715 case '<': 716 case '>': 717 printf("(%c ", pelem->u.parser_op.op); 718 break; 719 case OP_LTEQ: 720 case OP_GTEQ: 721 printf("(%c= ", pelem->u.parser_op.op-'='); 722 break; 723 } 724 dump_op2(pelem); 725 break; 726 case T_ADDITIVE: 727 case T_MULTIPLICATIVE: 728 printf("(%c ", pelem->u.parser_op.op); 729 dump_op2(pelem); 730 break; 731 case '!': 732 printf("(! "); 733 dump_elem(pelem->u.parser_op.operands[0]); 734 printf(")"); 735 break; 736 case '?': 737 printf("(? "); 738 dump_op3(pelem); 739 break; 740 case T_CONSTANT: 741 printf("%d", pelem->u.token_data.constant); 742 break; 743 case T_IDENTIFIER: 744 #ifdef ALLOW_ARBITRARY_IDENTIFIER 745 printf("%s", pelem->u.token_data.identifier); 746 #else 747 printf(PLURAL_NUMBER_SYMBOL); 748 #endif 749 break; 750 } 751 } 752 #endif 753 #ifdef TEST_PARSER 754 int 755 main(int argc, char **argv) 756 { 757 struct tokenizer_context tcx; 758 struct parser_element pelem; 759 int token; 760 761 if (argc != 2) { 762 fprintf(stderr, "usage: %s <expression>\n", argv[0]); 763 return EXIT_FAILURE; 764 } 765 766 init_tokenizer_context(&tcx); 767 _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1])); 768 769 init_parser_element(&pelem); 770 token = parse_exp(&tcx, &pelem); 771 772 if (token == T_EOF) 773 printf("none"); 774 else if (T_IS_ERROR(token)) 775 printf("error: 0x%X", token); 776 else 777 dump_elem(&pelem); 778 printf("\n"); 779 780 uninit_parser_element(&pelem); 781 782 return EXIT_SUCCESS; 783 } 784 #endif /* TEST_PARSER */ 785 786 /* ---------------------------------------------------------------------- 787 * calcurate plural number 788 */ 789 static unsigned long 790 calculate_plural(const struct parser_element *pe, unsigned long n) 791 { 792 unsigned long val0, val1; 793 switch (pe->kind) { 794 case T_IDENTIFIER: 795 return n; 796 case T_CONSTANT: 797 return pe->u.token_data.constant; 798 case '?': 799 val0 = calculate_plural(pe->u.parser_op.operands[0], n); 800 if (val0) 801 val1=calculate_plural(pe->u.parser_op.operands[1], n); 802 else 803 val1=calculate_plural(pe->u.parser_op.operands[2], n); 804 return val1; 805 case '!': 806 return !calculate_plural(pe->u.parser_op.operands[0], n); 807 case T_MULTIPLICATIVE: 808 case T_ADDITIVE: 809 case T_RELATIONAL: 810 case T_EQUALITY: 811 case T_LOR: 812 case T_LAND: 813 val0 = calculate_plural(pe->u.parser_op.operands[0], n); 814 val1 = calculate_plural(pe->u.parser_op.operands[1], n); 815 switch (pe->u.parser_op.op) { 816 case '*': 817 return val0*val1; 818 case '/': 819 return val0/val1; 820 case '%': 821 return val0%val1; 822 case '+': 823 return val0+val1; 824 case '-': 825 return val0-val1; 826 case '<': 827 return val0<val1; 828 case '>': 829 return val0>val1; 830 case OP_LTEQ: 831 return val0<=val1; 832 case OP_GTEQ: 833 return val0>=val1; 834 case OP_EQ: 835 return val0==val1; 836 case OP_NEQ: 837 return val0!=val1; 838 case '|': 839 return val0||val1; 840 case '&': 841 return val0&&val1; 842 } 843 } 844 return 0; 845 } 846 847 #ifdef TEST_CALC_PLURAL 848 #include <stdio.h> 849 850 int 851 main(int argc, char **argv) 852 { 853 struct tokenizer_context tcx; 854 struct parser_element pelem; 855 int token; 856 857 if (argc != 3) { 858 fprintf(stderr, "usage: %s <expression> <n>\n", argv[0]); 859 return EXIT_FAILURE; 860 } 861 862 init_tokenizer_context(&tcx); 863 _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1])); 864 865 init_parser_element(&pelem); 866 token = parse_exp(&tcx, &pelem); 867 868 if (token == T_EOF) 869 printf("none"); 870 else if (T_IS_ERROR(token)) 871 printf("error: 0x%X", token); 872 else { 873 printf("plural = %lu", 874 calculate_plural(&pelem, atoi(argv[2]))); 875 } 876 printf("\n"); 877 878 uninit_parser_element(&pelem); 879 880 return EXIT_SUCCESS; 881 } 882 #endif /* TEST_CALC_PLURAL */ 883 884 885 /* ---------------------------------------------------------------------- 886 * parse plural forms 887 */ 888 889 static void 890 region_skip_ws(struct _region *r) 891 { 892 const char *str = _region_head(r); 893 size_t len = _region_size(r); 894 895 str = _bcs_skip_ws_len(str, &len); 896 _region_init(r, __UNCONST(str), len); 897 } 898 899 static void 900 region_trunc_rws(struct _region *r) 901 { 902 const char *str = _region_head(r); 903 size_t len = _region_size(r); 904 905 _bcs_trunc_rws_len(str, &len); 906 _region_init(r, __UNCONST(str), len); 907 } 908 909 static int 910 region_check_prefix(struct _region *r, const char *pre, size_t prelen, 911 int ignorecase) 912 { 913 if (_region_size(r) < prelen) 914 return -1; 915 916 if (ignorecase) { 917 if (_bcs_strncasecmp(_region_head(r), pre, prelen)) 918 return -1; 919 } else { 920 if (memcmp(_region_head(r), pre, prelen)) 921 return -1; 922 } 923 return 0; 924 } 925 926 static int 927 cut_trailing_semicolon(struct _region *r) 928 { 929 930 region_trunc_rws(r); 931 if (_region_size(r) == 0 || _region_peek8(r, _region_size(r)-1) != ';') 932 return -1; 933 _region_get_subregion(r, r, 0, _region_size(r)-1); 934 return 0; 935 } 936 937 static int 938 find_plural_forms(struct _region *r) 939 { 940 struct _memstream ms; 941 struct _region rr; 942 943 _memstream_bind(&ms, r); 944 945 while (!_memstream_getln_region(&ms, &rr)) { 946 if (!region_check_prefix(&rr, 947 PLURAL_FORMS, LEN_PLURAL_FORMS, 1)) { 948 _region_get_subregion( 949 r, &rr, LEN_PLURAL_FORMS, 950 _region_size(&rr)-LEN_PLURAL_FORMS); 951 region_skip_ws(r); 952 region_trunc_rws(r); 953 return 0; 954 } 955 } 956 return -1; 957 } 958 959 static int 960 skip_assignment(struct _region *r, const char *sym, size_t symlen) 961 { 962 region_skip_ws(r); 963 if (region_check_prefix(r, sym, symlen, 0)) 964 return -1; 965 _region_get_subregion(r, r, symlen, _region_size(r)-symlen); 966 region_skip_ws(r); 967 if (_region_size(r) == 0 || _region_peek8(r, 0) != '=') 968 return -1; 969 _region_get_subregion(r, r, 1, _region_size(r)-1); 970 region_skip_ws(r); 971 return 0; 972 } 973 974 static int 975 skip_nplurals(struct _region *r, unsigned long *rnp) 976 { 977 unsigned long np; 978 char buf[MAX_LEN_ATOM+2], *endptr; 979 const char *endptrconst; 980 size_t ofs; 981 982 if (skip_assignment(r, NPLURALS_SYMBOL, LEN_NPLURAL_SYMBOL)) 983 return -1; 984 if (_region_size(r) == 0 || !_bcs_isdigit(_region_peek8(r, 0))) 985 return -1; 986 strlcpy(buf, _region_head(r), sizeof (buf)); 987 np = strtoul(buf, &endptr, 0); 988 endptrconst = _bcs_skip_ws(endptr); 989 if (*endptrconst != ';') 990 return -1; 991 ofs = endptrconst+1-buf; 992 if (_region_get_subregion(r, r, ofs, _region_size(r)-ofs)) 993 return -1; 994 if (rnp) 995 *rnp = np; 996 return 0; 997 } 998 999 static int 1000 parse_plural_body(struct _region *r, struct parser_element **rpe) 1001 { 1002 int token; 1003 struct tokenizer_context tcx; 1004 struct parser_element pelem, *ppe; 1005 1006 init_tokenizer_context(&tcx); 1007 _memstream_bind(&tcx.memstream, r); 1008 1009 init_parser_element(&pelem); 1010 token = parse_exp(&tcx, &pelem); 1011 if (T_IS_ERROR(token)) 1012 return token; 1013 1014 ppe = dup_parser_element(&pelem); 1015 if (ppe == NULL) { 1016 uninit_parser_element(&pelem); 1017 return T_NOMEM; 1018 } 1019 1020 *rpe = ppe; 1021 1022 return 0; 1023 } 1024 1025 static int 1026 parse_plural(struct parser_element **rpe, unsigned long *rnp, 1027 const char *str, size_t len) 1028 { 1029 struct _region r; 1030 1031 _region_init(&r, __UNCONST(str), len); 1032 1033 if (find_plural_forms(&r)) 1034 return T_NOTFOUND; 1035 if (skip_nplurals(&r, rnp)) 1036 return T_ILPLURAL; 1037 if (skip_assignment(&r, PLURAL_SYMBOL, LEN_PLURAL_SYMBOL)) 1038 return T_ILPLURAL; 1039 if (cut_trailing_semicolon(&r)) 1040 return T_ILPLURAL; 1041 return parse_plural_body(&r, rpe); 1042 } 1043 1044 #ifdef TEST_PARSE_PLURAL 1045 int 1046 main(int argc, char **argv) 1047 { 1048 int ret; 1049 struct parser_element *pelem; 1050 unsigned long np; 1051 1052 if (argc != 2 && argc != 3) { 1053 fprintf(stderr, "usage: %s <mime-header> [n]\n", argv[0]); 1054 return EXIT_FAILURE; 1055 } 1056 1057 ret = parse_plural(&pelem, &np, argv[1], strlen(argv[1])); 1058 1059 if (ret == T_EOF) 1060 printf("none"); 1061 else if (T_IS_ERROR(ret)) 1062 printf("error: 0x%X", ret); 1063 else { 1064 printf("syntax tree: "); 1065 dump_elem(pelem); 1066 printf("\nnplurals = %lu", np); 1067 if (argv[2]) 1068 printf(", plural = %lu", 1069 calculate_plural(pelem, atoi(argv[2]))); 1070 free_parser_element(pelem); 1071 } 1072 printf("\n"); 1073 1074 1075 return EXIT_SUCCESS; 1076 } 1077 #endif /* TEST_PARSE_PLURAL */ 1078 1079 /* 1080 * external interface 1081 */ 1082 1083 int 1084 _gettext_parse_plural(struct gettext_plural **rpe, unsigned long *rnp, 1085 const char *str, size_t len) 1086 { 1087 return parse_plural((struct parser_element **)rpe, rnp, str, len); 1088 } 1089 1090 unsigned long 1091 _gettext_calculate_plural(const struct gettext_plural *pe, unsigned long n) 1092 { 1093 return calculate_plural((void *)__UNCONST(pe), n); 1094 } 1095 1096 void 1097 _gettext_free_plural(struct gettext_plural *pe) 1098 { 1099 free_parser_element((void *)pe); 1100 } 1101 1102 #ifdef TEST_PLURAL 1103 #include <libintl.h> 1104 #include <locale.h> 1105 1106 #define PR(n) printf("n=%d: \"%s\"\n", n, dngettext("test", "1", "2", n)) 1107 1108 int 1109 main(void) 1110 { 1111 bindtextdomain("test", "."); /* ./LANG/LC_MESSAGES/test.mo */ 1112 PR(1); 1113 PR(2); 1114 PR(3); 1115 PR(4); 1116 1117 return 0; 1118 } 1119 #endif 1120