xref: /netbsd/lib/libintl/plural_parser.c (revision 6550d01e)
1 /*	$NetBSD: plural_parser.c,v 1.2 2007/01/17 23:24:22 hubertf Exp $	*/
2 
3 /*-
4  * Copyright (c) 2005 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 
30 #include <sys/cdefs.h>
31 __RCSID("$NetBSD: plural_parser.c,v 1.2 2007/01/17 23:24:22 hubertf Exp $");
32 
33 #include <assert.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <citrus/citrus_namespace.h>
38 #include <citrus/citrus_region.h>
39 #include <citrus/citrus_memstream.h>
40 #include <citrus/citrus_bcs.h>
41 #include "plural_parser.h"
42 
43 #if defined(TEST_TOKENIZER) || defined(TEST_PARSER)
44 #define ALLOW_EMPTY
45 #define ALLOW_ARBITRARY_IDENTIFIER
46 #endif
47 
48 #define MAX_LEN_ATOM		10
49 #define MAX_NUM_OPERANDS	3
50 
51 #define T_EOF			EOF
52 #define T_NONE			0x100
53 #define T_LAND			0x101	/* && */
54 #define T_LOR			0x102	/* || */
55 #define T_EQUALITY		0x103	/* == or != */
56 #define T_RELATIONAL		0x104	/* <, >, <= or >= */
57 #define T_ADDITIVE		0x105	/* + or - */
58 #define T_MULTIPLICATIVE	0x106	/* *, / or % */
59 #define T_IDENTIFIER		0x200
60 #define T_CONSTANT		0x201
61 #define T_ILCHAR		0x300
62 #define T_TOOLONG		0x301
63 #define T_ILTOKEN		0x302
64 #define T_ILEND			0x303
65 #define T_NOMEM			0x304
66 #define T_NOTFOUND		0x305
67 #define T_ILPLURAL		0x306
68 #define T_IS_OPERATOR(t)	((t) < 0x200)
69 #define T_IS_ERROR(t)		((t) >= 0x300)
70 
71 #define OP_EQ			('='+'=')
72 #define OP_NEQ			('!'+'=')
73 #define OP_LTEQ			('<'+'=')
74 #define OP_GTEQ			('>'+'=')
75 
76 #define PLURAL_NUMBER_SYMBOL	"n"
77 #define NPLURALS_SYMBOL		"nplurals"
78 #define LEN_NPLURAL_SYMBOL	(sizeof (NPLURALS_SYMBOL) -1)
79 #define PLURAL_SYMBOL		"plural"
80 #define LEN_PLURAL_SYMBOL	(sizeof (PLURAL_SYMBOL) -1)
81 #define PLURAL_FORMS		"Plural-Forms:"
82 #define LEN_PLURAL_FORMS	(sizeof (PLURAL_FORMS) -1)
83 
84 /* ----------------------------------------------------------------------
85  * tokenizer part
86  */
87 
88 union token_data
89 {
90 	unsigned long constant;
91 #ifdef ALLOW_ARBITRARY_IDENTIFIER
92 	char identifier[MAX_LEN_ATOM+1];
93 #endif
94 	char op;
95 };
96 
97 struct tokenizer_context
98 {
99 	struct _memstream memstream;
100 	struct {
101 		int token;
102 		union token_data token_data;
103 	} token0;
104 };
105 
106 /* initialize a tokenizer context */
107 static void
108 init_tokenizer_context(struct tokenizer_context *tcx)
109 {
110 	tcx->token0.token = T_NONE;
111 }
112 
113 /* get an atom (identifier or constant) */
114 static int
115 tokenize_atom(struct tokenizer_context *tcx, union token_data *token_data)
116 {
117 	int ch, len;
118 	char buf[MAX_LEN_ATOM+1];
119 
120 	len = 0;
121 	while (/*CONSTCOND*/1) {
122 		ch = _memstream_getc(&tcx->memstream);
123 		if (!(_bcs_isalnum(ch) || ch == '_')) {
124 			_memstream_ungetc(&tcx->memstream, ch);
125 			break;
126 		}
127 		if (len == MAX_LEN_ATOM)
128 			return T_TOOLONG;
129 		buf[len++] = ch;
130 	}
131 	buf[len] = '\0';
132 	if (len == 0)
133 		return T_ILCHAR;
134 
135 	if (_bcs_isdigit((int)(unsigned char)buf[0])) {
136 		unsigned long ul;
137 		char *post;
138 		ul = strtoul(buf, &post, 0);
139 		if (buf+len != post)
140 			return T_ILCHAR;
141 		token_data->constant = ul;
142 		return T_CONSTANT;
143 	}
144 
145 #ifdef ALLOW_ARBITRARY_IDENTIFIER
146 	strcpy(token_data->identifier, buf);
147 	return T_IDENTIFIER;
148 #else
149 	if (!strcmp(buf, PLURAL_NUMBER_SYMBOL))
150 		return T_IDENTIFIER;
151 	return T_ILCHAR;
152 #endif
153 }
154 
155 /* tokenizer main routine */
156 static int
157 tokenize(struct tokenizer_context *tcx, union token_data *token_data)
158 {
159 	int ch, prevch;
160 
161 retry:
162 	ch = _memstream_getc(&tcx->memstream);
163 	if (_bcs_isspace(ch))
164 		goto retry;
165 
166 	switch (ch) {
167 	case T_EOF:
168 		return ch;
169 	case '+': case '-':
170 		token_data->op = ch;
171 		return T_ADDITIVE;
172 	case '*': case '/': case '%':
173 		token_data->op = ch;
174 		return T_MULTIPLICATIVE;
175 	case '?': case ':': case '(': case ')':
176 		token_data->op = ch;
177 		return ch;
178 	case '&': case '|':
179 		prevch = ch;
180 		ch = _memstream_getc(&tcx->memstream);
181 		if (ch != prevch) {
182 			_memstream_ungetc(&tcx->memstream, ch);
183 			return T_ILCHAR;
184 		}
185 		token_data->op = ch;
186 		switch (ch) {
187 		case '&':
188 			return T_LAND;
189 		case '|':
190 			return T_LOR;
191 		}
192 		/*NOTREACHED*/
193 	case '=': case '!': case '<': case '>':
194 		prevch = ch;
195 		ch = _memstream_getc(&tcx->memstream);
196 		if (ch != '=') {
197 			_memstream_ungetc(&tcx->memstream, ch);
198 			switch (prevch) {
199 			case '=':
200 				return T_ILCHAR;
201 			case '!':
202 				return '!';
203 			case '<':
204 			case '>':
205 				token_data->op = prevch; /* OP_LT or OP_GT */
206 				return T_RELATIONAL;
207 			}
208 		}
209 		/* '==', '!=', '<=' or '>=' */
210 		token_data->op = ch+prevch;
211 		switch (prevch) {
212 		case '=':
213 		case '!':
214 			return T_EQUALITY;
215 		case '<':
216 		case '>':
217 			return T_RELATIONAL;
218 		}
219 		/*NOTREACHED*/
220 	}
221 
222 	_memstream_ungetc(&tcx->memstream, ch);
223 	return tokenize_atom(tcx, token_data);
224 }
225 
226 /* get the next token */
227 static int
228 get_token(struct tokenizer_context *tcx, union token_data *token_data)
229 {
230 	if (tcx->token0.token != T_NONE) {
231 		int token = tcx->token0.token;
232 		tcx->token0.token = T_NONE;
233 		*token_data = tcx->token0.token_data;
234 		return token;
235 	}
236 	return tokenize(tcx, token_data);
237 }
238 
239 /* push back the last token */
240 static void
241 unget_token(struct tokenizer_context *tcx,
242 	    int token, union token_data *token_data)
243 {
244 	tcx->token0.token = token;
245 	tcx->token0.token_data = *token_data;
246 }
247 
248 #ifdef TEST_TOKENIZER
249 
250 int
251 main(int argc, char **argv)
252 {
253 	struct tokenizer_context tcx;
254 	union token_data token_data;
255 	int token;
256 
257 	if (argc != 2) {
258 		fprintf(stderr, "usage: %s <expression>\n", argv[0]);
259 		return EXIT_FAILURE;
260 	}
261 
262 	init_tokenizer_context(&tcx);
263 	_memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
264 
265 	while (1) {
266 		token = get_token(&tcx, &token_data);
267 		switch (token) {
268 		case T_EOF:
269 			goto quit;
270 		case T_ILCHAR:
271 			printf("illegal character.\n");
272 			goto quit;
273 		case T_TOOLONG:
274 			printf("too long atom.\n");
275 			goto quit;
276 		case T_CONSTANT:
277 			printf("constant: %lu\n", token_data.constant);
278 			break;
279 		case T_IDENTIFIER:
280 			printf("symbol: %s\n", token_data.identifier);
281 			break;
282 		default:
283 			printf("operator: ");
284 			switch (token) {
285 			case T_LAND:
286 				printf("&&\n");
287 				break;
288 			case T_LOR:
289 				printf("||\n");
290 				break;
291 			case T_EQUALITY:
292 				printf("%c=\n", token_data.op-'=');
293 				break;
294 			case T_RELATIONAL:
295 				switch(token_data.op) {
296 				case OP_LTEQ:
297 				case OP_GTEQ:
298 					printf("%c=\n", token_data.op-'=');
299 					break;
300 				default:
301 					printf("%c\n", token_data.op);
302 					break;
303 				}
304 				break;
305 			case T_ADDITIVE:
306 			case T_MULTIPLICATIVE:
307 				printf("%c\n", token_data.op);
308 				break;
309 			default:
310 				printf("operator: %c\n", token);
311 			}
312 		}
313 	}
314 quit:
315 	return 0;
316 }
317 #endif /* TEST_TOKENIZER */
318 
319 
320 /* ----------------------------------------------------------------------
321  * parser part
322  *
323  * exp := cond
324  *
325  * cond := lor | lor '?' cond ':' cond
326  *
327  * lor := land ( '||' land )*
328  *
329  * land := equality ( '&&' equality )*
330  *
331  * equality := relational ( equalityops relational )*
332  * equalityops := '==' | '!='
333  *
334  * relational := additive ( relationalops additive )*
335  * relationalops := '<' | '>' | '<=' | '>='
336  *
337  * additive := multiplicative ( additiveops multiplicative )*
338  * additiveops := '+' | '-'
339  *
340  * multiplicative := lnot ( multiplicativeops lnot )*
341  * multiplicativeops := '*' | '/' | '%'
342  *
343  * lnot := '!' lnot | term
344  *
345  * term := literal | identifier | '(' exp ')'
346  *
347  */
348 
349 #define T_ENSURE_OK(token, label)					      \
350 do {									      \
351 	if (T_IS_ERROR(token))						      \
352 		goto label;						      \
353 } while (/*CONSTCOND*/0)
354 #define T_ENSURE_SOMETHING(token, label)				      \
355 do {									      \
356 	if ((token) == T_EOF) {						      \
357 		token = T_ILEND;					      \
358 		goto label;						      \
359 	} else if (T_IS_ERROR(token))					      \
360 		goto label;						      \
361 } while (/*CONSTCOND*/0)
362 
363 #define parser_element	plural_element
364 
365 struct parser_element;
366 struct parser_op
367 {
368 	char op;
369 	struct parser_element *operands[MAX_NUM_OPERANDS];
370 };
371 struct parser_element
372 {
373 	int kind;
374 	union
375 	{
376 		struct parser_op parser_op;
377 		union token_data token_data;
378 	} u;
379 };
380 
381 struct parser_op2_transition
382 {
383 	int					kind;
384 	const struct parser_op2_transition	*next;
385 };
386 
387 /* prototypes */
388 static int parse_cond(struct tokenizer_context *, struct parser_element *);
389 
390 
391 /* transition table for the 2-operand operators */
392 #define DEF_TR(t, k, n)							      \
393 static struct parser_op2_transition exp_tr_##t = {			      \
394 	k, &exp_tr_##n							      \
395 }
396 #define DEF_TR0(t, k)							      \
397 static struct parser_op2_transition exp_tr_##t = {			      \
398 	k, NULL /* expect lnot */					      \
399 }
400 
401 DEF_TR0(multiplicative, T_MULTIPLICATIVE);
402 DEF_TR(additive, T_ADDITIVE, multiplicative);
403 DEF_TR(relational, T_RELATIONAL, additive);
404 DEF_TR(equality, T_EQUALITY, relational);
405 DEF_TR(land, T_LAND, equality);
406 DEF_TR(lor, T_LOR, land);
407 
408 /* init a parser element structure */
409 static void
410 init_parser_element(struct parser_element *pe)
411 {
412 	int i;
413 
414 	pe->kind = T_NONE;
415 	for (i=0; i<MAX_NUM_OPERANDS; i++)
416 		pe->u.parser_op.operands[i] = NULL;
417 }
418 
419 /* uninitialize a parser element structure with freeing children */
420 static void free_parser_element(struct parser_element *);
421 static void
422 uninit_parser_element(struct parser_element *pe)
423 {
424 	int i;
425 
426 	if (T_IS_OPERATOR(pe->kind))
427 		for (i=0; i<MAX_NUM_OPERANDS; i++)
428 			if (pe->u.parser_op.operands[i])
429 				free_parser_element(
430 					pe->u.parser_op.operands[i]);
431 }
432 
433 /* free a parser element structure with freeing children */
434 static void
435 free_parser_element(struct parser_element *pe)
436 {
437 	if (pe) {
438 		uninit_parser_element(pe);
439 		free(pe);
440 	}
441 }
442 
443 
444 /* copy a parser element structure shallowly */
445 static void
446 copy_parser_element(struct parser_element *dpe,
447 		    const struct parser_element *spe)
448 {
449 	memcpy(dpe, spe, sizeof *dpe);
450 }
451 
452 /* duplicate a parser element structure shallowly */
453 static struct parser_element *
454 dup_parser_element(const struct parser_element *pe)
455 {
456 	struct parser_element *dpe = malloc(sizeof *dpe);
457 	if (dpe)
458 		copy_parser_element(dpe, pe);
459 	return dpe;
460 }
461 
462 /* term := identifier | constant | '(' exp ')' */
463 static int
464 parse_term(struct tokenizer_context *tcx, struct parser_element *pelem)
465 {
466 	struct parser_element pe0;
467 	int token;
468 	union token_data token_data;
469 
470 	token = get_token(tcx, &token_data);
471 	switch (token) {
472 	case '(':
473 		/* '(' exp ')' */
474 		init_parser_element(&pe0);
475 		/* expect exp */
476 		token = parse_cond(tcx, &pe0);
477 		T_ENSURE_OK(token, err);
478 		/* expect ')' */
479 		token = get_token(tcx, &token_data);
480 		T_ENSURE_SOMETHING(token, err);
481 		if (token != ')') {
482 			unget_token(tcx, token, &token_data);
483 			token = T_ILTOKEN;
484 			goto err;
485 		}
486 		copy_parser_element(pelem, &pe0);
487 		return token;
488 err:
489 		uninit_parser_element(&pe0);
490 		return token;
491 	case T_IDENTIFIER:
492 	case T_CONSTANT:
493 		pelem->kind = token;
494 		pelem->u.token_data = token_data;
495 		return token;
496 	case T_EOF:
497 		return T_ILEND;
498 	default:
499 		return T_ILTOKEN;
500 	}
501 }
502 
503 /* lnot := '!' lnot | term */
504 static int
505 parse_lnot(struct tokenizer_context *tcx, struct parser_element *pelem)
506 {
507 	struct parser_element pe0;
508 	int token;
509 	union token_data token_data;
510 
511 	init_parser_element(&pe0);
512 
513 	/* '!' or not */
514 	token = get_token(tcx, &token_data);
515 	if (token != '!') {
516 		/* stop: term */
517 		unget_token(tcx, token, &token_data);
518 		return parse_term(tcx, pelem);
519 	}
520 
521 	/* '!' term */
522 	token = parse_lnot(tcx, &pe0);
523 	T_ENSURE_OK(token, err);
524 
525 	pelem->kind = '!';
526 	pelem->u.parser_op.operands[0] = dup_parser_element(&pe0);
527 	return pelem->kind;
528 err:
529 	uninit_parser_element(&pe0);
530 	return token;
531 }
532 
533 /* ext_op := ext_next ( op ext_next )* */
534 static int
535 parse_op2(struct tokenizer_context *tcx, struct parser_element *pelem,
536 	  const struct parser_op2_transition *tr)
537 {
538 	struct parser_element pe0, pe1, peop;
539 	int token;
540 	union token_data token_data;
541 	char op;
542 
543 	/* special case: expect lnot */
544 	if (tr == NULL)
545 		return parse_lnot(tcx, pelem);
546 
547 	init_parser_element(&pe0);
548 	init_parser_element(&pe1);
549 	token = parse_op2(tcx, &pe0, tr->next);
550 	T_ENSURE_OK(token, err);
551 
552 	while (/*CONSTCOND*/1) {
553 		/* expect op or empty */
554 		token = get_token(tcx, &token_data);
555 		if (token != tr->kind) {
556 			/* stop */
557 			unget_token(tcx, token, &token_data);
558 			copy_parser_element(pelem, &pe0);
559 			break;
560 		}
561 		op = token_data.op;
562 		/* right hand */
563 		token = parse_op2(tcx, &pe1, tr->next);
564 		T_ENSURE_OK(token, err);
565 
566 		init_parser_element(&peop);
567 		peop.kind = tr->kind;
568 		peop.u.parser_op.op = op;
569 		peop.u.parser_op.operands[0] = dup_parser_element(&pe0);
570 		init_parser_element(&pe0);
571 		peop.u.parser_op.operands[1] = dup_parser_element(&pe1);
572 		init_parser_element(&pe1);
573 		copy_parser_element(&pe0, &peop);
574 	}
575 	return pelem->kind;
576 err:
577 	uninit_parser_element(&pe1);
578 	uninit_parser_element(&pe0);
579 	return token;
580 }
581 
582 /* cond := lor | lor '?' cond ':' cond */
583 static int
584 parse_cond(struct tokenizer_context *tcx, struct parser_element *pelem)
585 {
586 	struct parser_element pe0, pe1, pe2;
587 	int token;
588 	union token_data token_data;
589 
590 	init_parser_element(&pe0);
591 	init_parser_element(&pe1);
592 	init_parser_element(&pe2);
593 
594 	/* expect lor or empty */
595 	token = parse_op2(tcx, &pe0, &exp_tr_lor);
596 	T_ENSURE_OK(token, err);
597 
598 	/* '?' or not */
599 	token = get_token(tcx, &token_data);
600 	if (token != '?') {
601 		/* stop: lor */
602 		unget_token(tcx, token, &token_data);
603 		copy_parser_element(pelem, &pe0);
604 		return pe0.kind;
605 	}
606 
607 	/* lor '?' cond ':' cond */
608 	/* expect cond */
609 	token = parse_cond(tcx, &pe1);
610 	T_ENSURE_OK(token, err);
611 
612 	/* expect ':' */
613 	token = get_token(tcx, &token_data);
614 	T_ENSURE_OK(token, err);
615 	if (token != ':') {
616 		unget_token(tcx, token, &token_data);
617 		token = T_ILTOKEN;
618 		goto err;
619 	}
620 
621 	/* expect cond */
622 	token = parse_cond(tcx, &pe2);
623 	T_ENSURE_OK(token, err);
624 
625 	pelem->kind = '?';
626 	pelem->u.parser_op.operands[0] = dup_parser_element(&pe0);
627 	pelem->u.parser_op.operands[1] = dup_parser_element(&pe1);
628 	pelem->u.parser_op.operands[2] = dup_parser_element(&pe2);
629 	return pelem->kind;
630 err:
631 	uninit_parser_element(&pe2);
632 	uninit_parser_element(&pe1);
633 	uninit_parser_element(&pe0);
634 	return token;
635 }
636 
637 static int
638 parse_exp(struct tokenizer_context *tcx, struct parser_element *pelem)
639 {
640 	int token, token1;
641 	union token_data token_data;
642 
643 #ifdef ALLOW_EMPTY
644 	/* empty check */
645 	token = get_token(tcx, &token_data);
646 	if (token == T_EOF)
647 		return token;
648 	unget_token(tcx, token, &token_data);
649 #endif
650 
651 	token = parse_cond(tcx, pelem);
652 	if (!T_IS_ERROR(token)) {
653 		/* termination check */
654 		token1 = get_token(tcx, &token_data);
655 		if (token1 == T_EOF)
656 			return token;
657 		else if (!T_IS_ERROR(token))
658 			 unget_token(tcx, token1, &token_data);
659 		return T_ILTOKEN;
660 	}
661 	return token;
662 }
663 
664 
665 #if defined(TEST_PARSER) || defined(TEST_PARSE_PLURAL)
666 #include <stdio.h>
667 
668 static void dump_elem(struct parser_element *);
669 
670 static void
671 dump_op2(struct parser_element *pelem)
672 {
673 	dump_elem(pelem->u.parser_op.operands[0]);
674 	printf(" ");
675 	dump_elem(pelem->u.parser_op.operands[1]);
676 	printf(")");
677 }
678 
679 static void
680 dump_op3(struct parser_element *pelem)
681 {
682 	dump_elem(pelem->u.parser_op.operands[0]);
683 	printf(" ");
684 	dump_elem(pelem->u.parser_op.operands[1]);
685 	printf(" ");
686 	dump_elem(pelem->u.parser_op.operands[2]);
687 	printf(")");
688 }
689 
690 static void
691 dump_elem(struct parser_element *pelem)
692 {
693 	switch (pelem->kind) {
694 	case T_LAND:
695 		printf("(&& ");
696 		dump_op2(pelem);
697 		break;
698 	case T_LOR:
699 		printf("(|| ");
700 		dump_op2(pelem);
701 		break;
702 	case T_EQUALITY:
703 		switch (pelem->u.parser_op.op) {
704 		case OP_EQ:
705 			printf("(== ");
706 			break;
707 		case OP_NEQ:
708 			printf("(!= ");
709 			break;
710 		}
711 		dump_op2(pelem);
712 		break;
713 	case T_RELATIONAL:
714 		switch (pelem->u.parser_op.op) {
715 		case '<':
716 		case '>':
717 			printf("(%c ", pelem->u.parser_op.op);
718 			break;
719 		case OP_LTEQ:
720 		case OP_GTEQ:
721 			printf("(%c= ", pelem->u.parser_op.op-'=');
722 			break;
723 		}
724 		dump_op2(pelem);
725 		break;
726 	case T_ADDITIVE:
727 	case T_MULTIPLICATIVE:
728 		printf("(%c ", pelem->u.parser_op.op);
729 		dump_op2(pelem);
730 		break;
731 	case '!':
732 		printf("(! ");
733 		dump_elem(pelem->u.parser_op.operands[0]);
734 		printf(")");
735 		break;
736 	case '?':
737 		printf("(? ");
738 		dump_op3(pelem);
739 		break;
740 	case T_CONSTANT:
741 		printf("%d", pelem->u.token_data.constant);
742 		break;
743 	case T_IDENTIFIER:
744 #ifdef ALLOW_ARBITRARY_IDENTIFIER
745 		printf("%s", pelem->u.token_data.identifier);
746 #else
747 		printf(PLURAL_NUMBER_SYMBOL);
748 #endif
749 		break;
750 	}
751 }
752 #endif
753 #ifdef TEST_PARSER
754 int
755 main(int argc, char **argv)
756 {
757 	struct tokenizer_context tcx;
758 	struct parser_element pelem;
759 	int token;
760 
761 	if (argc != 2) {
762 		fprintf(stderr, "usage: %s <expression>\n", argv[0]);
763 		return EXIT_FAILURE;
764 	}
765 
766 	init_tokenizer_context(&tcx);
767 	_memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
768 
769 	init_parser_element(&pelem);
770 	token = parse_exp(&tcx, &pelem);
771 
772 	if (token == T_EOF)
773 		printf("none");
774 	else if (T_IS_ERROR(token))
775 		printf("error: 0x%X", token);
776 	else
777 		dump_elem(&pelem);
778 	printf("\n");
779 
780 	uninit_parser_element(&pelem);
781 
782 	return EXIT_SUCCESS;
783 }
784 #endif /* TEST_PARSER */
785 
786 /* ----------------------------------------------------------------------
787  * calcurate plural number
788  */
789 static unsigned long
790 calculate_plural(const struct parser_element *pe, unsigned long n)
791 {
792 	unsigned long val0, val1;
793 	switch (pe->kind) {
794 	case T_IDENTIFIER:
795 		return n;
796 	case T_CONSTANT:
797 		return pe->u.token_data.constant;
798 	case '?':
799 		val0 = calculate_plural(pe->u.parser_op.operands[0], n);
800 		if (val0)
801 			val1=calculate_plural(pe->u.parser_op.operands[1], n);
802 		else
803 			val1=calculate_plural(pe->u.parser_op.operands[2], n);
804 		return val1;
805 	case '!':
806 		return !calculate_plural(pe->u.parser_op.operands[0], n);
807 	case T_MULTIPLICATIVE:
808 	case T_ADDITIVE:
809 	case T_RELATIONAL:
810 	case T_EQUALITY:
811 	case T_LOR:
812 	case T_LAND:
813 		val0 = calculate_plural(pe->u.parser_op.operands[0], n);
814 		val1 = calculate_plural(pe->u.parser_op.operands[1], n);
815 		switch (pe->u.parser_op.op) {
816 		case '*':
817 			return val0*val1;
818 		case '/':
819 			return val0/val1;
820 		case '%':
821 			return val0%val1;
822 		case '+':
823 			return val0+val1;
824 		case '-':
825 			return val0-val1;
826 		case '<':
827 			return val0<val1;
828 		case '>':
829 			return val0>val1;
830 		case OP_LTEQ:
831 			return val0<=val1;
832 		case OP_GTEQ:
833 			return val0>=val1;
834 		case OP_EQ:
835 			return val0==val1;
836 		case OP_NEQ:
837 			return val0!=val1;
838 		case '|':
839 			return val0||val1;
840 		case '&':
841 			return val0&&val1;
842 		}
843 	}
844 	return 0;
845 }
846 
847 #ifdef TEST_CALC_PLURAL
848 #include <stdio.h>
849 
850 int
851 main(int argc, char **argv)
852 {
853 	struct tokenizer_context tcx;
854 	struct parser_element pelem;
855 	int token;
856 
857 	if (argc != 3) {
858 		fprintf(stderr, "usage: %s <expression> <n>\n", argv[0]);
859 		return EXIT_FAILURE;
860 	}
861 
862 	init_tokenizer_context(&tcx);
863 	_memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
864 
865 	init_parser_element(&pelem);
866 	token = parse_exp(&tcx, &pelem);
867 
868 	if (token == T_EOF)
869 		printf("none");
870 	else if (T_IS_ERROR(token))
871 		printf("error: 0x%X", token);
872 	else {
873 		printf("plural = %lu",
874 		       calculate_plural(&pelem, atoi(argv[2])));
875 	}
876 	printf("\n");
877 
878 	uninit_parser_element(&pelem);
879 
880 	return EXIT_SUCCESS;
881 }
882 #endif /* TEST_CALC_PLURAL */
883 
884 
885 /* ----------------------------------------------------------------------
886  * parse plural forms
887  */
888 
889 static void
890 region_skip_ws(struct _region *r)
891 {
892 	const char *str = _region_head(r);
893 	size_t len = _region_size(r);
894 
895 	str = _bcs_skip_ws_len(str, &len);
896 	_region_init(r, __UNCONST(str), len);
897 }
898 
899 static void
900 region_trunc_rws(struct _region *r)
901 {
902 	const char *str = _region_head(r);
903 	size_t len = _region_size(r);
904 
905 	_bcs_trunc_rws_len(str, &len);
906 	_region_init(r, __UNCONST(str), len);
907 }
908 
909 static int
910 region_check_prefix(struct _region *r, const char *pre, size_t prelen,
911 		    int ignorecase)
912 {
913 	if (_region_size(r) < prelen)
914 		return -1;
915 
916 	if (ignorecase) {
917 		if (_bcs_strncasecmp(_region_head(r), pre, prelen))
918 			return -1;
919 	} else {
920 		if (memcmp(_region_head(r), pre, prelen))
921 			return -1;
922 	}
923 	return 0;
924 }
925 
926 static int
927 cut_trailing_semicolon(struct _region *r)
928 {
929 
930 	region_trunc_rws(r);
931 	if (_region_size(r) == 0 || _region_peek8(r, _region_size(r)-1) != ';')
932 		return -1;
933 	_region_get_subregion(r, r, 0, _region_size(r)-1);
934 	return 0;
935 }
936 
937 static int
938 find_plural_forms(struct _region *r)
939 {
940 	struct _memstream ms;
941 	struct _region rr;
942 
943 	_memstream_bind(&ms, r);
944 
945 	while (!_memstream_getln_region(&ms, &rr)) {
946 		if (!region_check_prefix(&rr,
947 					 PLURAL_FORMS, LEN_PLURAL_FORMS, 1)) {
948 			_region_get_subregion(
949 				r, &rr, LEN_PLURAL_FORMS,
950 				_region_size(&rr)-LEN_PLURAL_FORMS);
951 			region_skip_ws(r);
952 			region_trunc_rws(r);
953 			return 0;
954 		}
955 	}
956 	return -1;
957 }
958 
959 static int
960 skip_assignment(struct _region *r, const char *sym, size_t symlen)
961 {
962 	region_skip_ws(r);
963 	if (region_check_prefix(r, sym, symlen, 0))
964 		return -1;
965 	_region_get_subregion(r, r, symlen, _region_size(r)-symlen);
966 	region_skip_ws(r);
967 	if (_region_size(r) == 0 || _region_peek8(r, 0) != '=')
968 		return -1;
969 	_region_get_subregion(r, r, 1, _region_size(r)-1);
970 	region_skip_ws(r);
971 	return 0;
972 }
973 
974 static int
975 skip_nplurals(struct _region *r, unsigned long *rnp)
976 {
977 	unsigned long np;
978 	char buf[MAX_LEN_ATOM+2], *endptr;
979 	const char *endptrconst;
980 	size_t ofs;
981 
982 	if (skip_assignment(r, NPLURALS_SYMBOL, LEN_NPLURAL_SYMBOL))
983 		return -1;
984 	if (_region_size(r) == 0 || !_bcs_isdigit(_region_peek8(r, 0)))
985 		return -1;
986 	strlcpy(buf, _region_head(r), sizeof (buf));
987 	np = strtoul(buf, &endptr, 0);
988 	endptrconst = _bcs_skip_ws(endptr);
989 	if (*endptrconst != ';')
990 		return -1;
991 	ofs = endptrconst+1-buf;
992 	if (_region_get_subregion(r, r, ofs, _region_size(r)-ofs))
993 		return -1;
994 	if (rnp)
995 		*rnp = np;
996 	return 0;
997 }
998 
999 static int
1000 parse_plural_body(struct _region *r, struct parser_element **rpe)
1001 {
1002 	int token;
1003 	struct tokenizer_context tcx;
1004 	struct parser_element pelem, *ppe;
1005 
1006 	init_tokenizer_context(&tcx);
1007 	_memstream_bind(&tcx.memstream, r);
1008 
1009 	init_parser_element(&pelem);
1010 	token = parse_exp(&tcx, &pelem);
1011 	if (T_IS_ERROR(token))
1012 		return token;
1013 
1014 	ppe = dup_parser_element(&pelem);
1015 	if (ppe == NULL) {
1016 		uninit_parser_element(&pelem);
1017 		return T_NOMEM;
1018 	}
1019 
1020 	*rpe = ppe;
1021 
1022 	return 0;
1023 }
1024 
1025 static int
1026 parse_plural(struct parser_element **rpe, unsigned long *rnp,
1027 	     const char *str, size_t len)
1028 {
1029 	struct _region r;
1030 
1031 	_region_init(&r, __UNCONST(str), len);
1032 
1033 	if (find_plural_forms(&r))
1034 		return T_NOTFOUND;
1035 	if (skip_nplurals(&r, rnp))
1036 		return T_ILPLURAL;
1037 	if (skip_assignment(&r, PLURAL_SYMBOL, LEN_PLURAL_SYMBOL))
1038 		return T_ILPLURAL;
1039 	if (cut_trailing_semicolon(&r))
1040 		return T_ILPLURAL;
1041 	return parse_plural_body(&r, rpe);
1042 }
1043 
1044 #ifdef TEST_PARSE_PLURAL
1045 int
1046 main(int argc, char **argv)
1047 {
1048 	int ret;
1049 	struct parser_element *pelem;
1050 	unsigned long np;
1051 
1052 	if (argc != 2 && argc != 3) {
1053 		fprintf(stderr, "usage: %s <mime-header> [n]\n", argv[0]);
1054 		return EXIT_FAILURE;
1055 	}
1056 
1057 	ret = parse_plural(&pelem, &np, argv[1], strlen(argv[1]));
1058 
1059 	if (ret == T_EOF)
1060 		printf("none");
1061 	else if (T_IS_ERROR(ret))
1062 		printf("error: 0x%X", ret);
1063 	else {
1064 		printf("syntax tree: ");
1065 		dump_elem(pelem);
1066 		printf("\nnplurals = %lu", np);
1067 		if (argv[2])
1068 			printf(", plural = %lu",
1069 			       calculate_plural(pelem, atoi(argv[2])));
1070 		free_parser_element(pelem);
1071 	}
1072 	printf("\n");
1073 
1074 
1075 	return EXIT_SUCCESS;
1076 }
1077 #endif /* TEST_PARSE_PLURAL */
1078 
1079 /*
1080  * external interface
1081  */
1082 
1083 int
1084 _gettext_parse_plural(struct gettext_plural **rpe, unsigned long *rnp,
1085 		      const char *str, size_t len)
1086 {
1087 	return parse_plural((struct parser_element **)rpe, rnp, str, len);
1088 }
1089 
1090 unsigned long
1091 _gettext_calculate_plural(const struct gettext_plural *pe, unsigned long n)
1092 {
1093 	return calculate_plural((void *)__UNCONST(pe), n);
1094 }
1095 
1096 void
1097 _gettext_free_plural(struct gettext_plural *pe)
1098 {
1099 	free_parser_element((void *)pe);
1100 }
1101 
1102 #ifdef TEST_PLURAL
1103 #include <libintl.h>
1104 #include <locale.h>
1105 
1106 #define PR(n)	printf("n=%d: \"%s\"\n", n, dngettext("test", "1", "2", n))
1107 
1108 int
1109 main(void)
1110 {
1111 	bindtextdomain("test", "."); /* ./LANG/LC_MESSAGES/test.mo */
1112 	PR(1);
1113 	PR(2);
1114 	PR(3);
1115 	PR(4);
1116 
1117 	return 0;
1118 }
1119 #endif
1120