1 /* GNU gettext - internationalization aids
2 Copyright (C) 1995-1998, 2000-2010, 2012, 2014-2015, 2018-2019 Free Software
3 Foundation, Inc.
4
5 This file was written by Peter Miller <millerp@canb.auug.org.au>
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <https://www.gnu.org/licenses/>. */
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23 #include <alloca.h>
24
25 /* Specification. */
26 #include "write-po.h"
27
28 #include <errno.h>
29 #include <limits.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33
34 #if HAVE_ICONV
35 # include <iconv.h>
36 #endif
37
38 #include <textstyle.h>
39
40 #include "c-ctype.h"
41 #include "po-charset.h"
42 #include "format.h"
43 #include "unilbrk.h"
44 #include "msgl-ascii.h"
45 #include "write-catalog.h"
46 #include "xalloc.h"
47 #include "xmalloca.h"
48 #include "c-strstr.h"
49 #include "xvasprintf.h"
50 #include "po-xerror.h"
51 #include "gettext.h"
52
53 /* Our regular abbreviation. */
54 #define _(str) gettext (str)
55
56 #if HAVE_DECL_PUTC_UNLOCKED
57 # undef putc
58 # define putc putc_unlocked
59 #endif
60
61
62 /* =================== Putting together a #, flags line. =================== */
63
64
65 /* Convert IS_FORMAT in the context of programming language LANG to a flag
66 string for use in #, flags. */
67
68 const char *
make_format_description_string(enum is_format is_format,const char * lang,bool debug)69 make_format_description_string (enum is_format is_format, const char *lang,
70 bool debug)
71 {
72 static char result[100];
73
74 switch (is_format)
75 {
76 case possible:
77 if (debug)
78 {
79 sprintf (result, "possible-%s-format", lang);
80 break;
81 }
82 /* FALLTHROUGH */
83 case yes_according_to_context:
84 case yes:
85 sprintf (result, "%s-format", lang);
86 break;
87 case no:
88 sprintf (result, "no-%s-format", lang);
89 break;
90 default:
91 /* The others have already been filtered out by significant_format_p. */
92 abort ();
93 }
94
95 return result;
96 }
97
98
99 /* Return true if IS_FORMAT is worth mentioning in a #, flags list. */
100
101 bool
significant_format_p(enum is_format is_format)102 significant_format_p (enum is_format is_format)
103 {
104 return is_format != undecided && is_format != impossible;
105 }
106
107
108 /* Return true if one of IS_FORMAT is worth mentioning in a #, flags list. */
109
110 static bool
has_significant_format_p(const enum is_format is_format[NFORMATS])111 has_significant_format_p (const enum is_format is_format[NFORMATS])
112 {
113 size_t i;
114
115 for (i = 0; i < NFORMATS; i++)
116 if (significant_format_p (is_format[i]))
117 return true;
118 return false;
119 }
120
121
122 /* Convert a RANGE to a freshly allocated string for use in #, flags. */
123
124 char *
make_range_description_string(struct argument_range range)125 make_range_description_string (struct argument_range range)
126 {
127 return xasprintf ("range: %d..%d", range.min, range.max);
128 }
129
130
131 /* Convert a wrapping flag DO_WRAP to a string for use in #, flags. */
132
133 static const char *
make_c_width_description_string(enum is_wrap do_wrap)134 make_c_width_description_string (enum is_wrap do_wrap)
135 {
136 const char *result = NULL;
137
138 switch (do_wrap)
139 {
140 case yes:
141 result = "wrap";
142 break;
143 case no:
144 result = "no-wrap";
145 break;
146 default:
147 abort ();
148 }
149
150 return result;
151 }
152
153
154 /* ========================== Styling primitives. ========================== */
155
156
157 /* When compiled in src, enable styling support.
158 When compiled in libgettextpo, don't enable styling support. */
159 #ifdef GETTEXTDATADIR
160
161 /* All ostream_t instances are in fact styled_ostream_t instances. */
162
163 /* Start a run of text belonging to a given CSS class. */
164 static inline void
begin_css_class(ostream_t stream,const char * classname)165 begin_css_class (ostream_t stream, const char *classname)
166 {
167 styled_ostream_begin_use_class ((styled_ostream_t) stream, classname);
168 }
169
170 /* End a run of text belonging to a given CSS class. */
171 static inline void
end_css_class(ostream_t stream,const char * classname)172 end_css_class (ostream_t stream, const char *classname)
173 {
174 styled_ostream_end_use_class ((styled_ostream_t) stream, classname);
175 }
176
177 #else
178
179 #define is_stylable(stream) false
180 #define begin_css_class(stream,classname) (void)(classname)
181 #define end_css_class(stream,classname) (void)(classname)
182
183 #endif
184
185 /* CSS classes at message level. */
186 static const char class_header[] = "header";
187 static const char class_translated[] = "translated";
188 static const char class_untranslated[] = "untranslated";
189 static const char class_fuzzy[] = "fuzzy";
190 static const char class_obsolete[] = "obsolete";
191
192 /* CSS classes describing the parts of a message. */
193 static const char class_comment[] = "comment";
194 static const char class_translator_comment[] = "translator-comment";
195 static const char class_extracted_comment[] = "extracted-comment";
196 static const char class_reference_comment[] = "reference-comment";
197 static const char class_reference[] = "reference";
198 static const char class_flag_comment[] = "flag-comment";
199 static const char class_flag[] = "flag";
200 static const char class_fuzzy_flag[] = "fuzzy-flag";
201 static const char class_previous_comment[] = "previous-comment";
202 static const char class_previous[] = "previous";
203 static const char class_msgid[] = "msgid";
204 static const char class_msgstr[] = "msgstr";
205 static const char class_keyword[] = "keyword";
206 static const char class_string[] = "string";
207
208 /* CSS classes for the contents of strings. */
209 static const char class_text[] = "text";
210 static const char class_escape_sequence[] = "escape-sequence";
211 static const char class_format_directive[] = "format-directive";
212 static const char class_invalid_format_directive[] = "invalid-format-directive";
213 #if 0
214 static const char class_added[] = "added";
215 static const char class_changed[] = "changed";
216 static const char class_removed[] = "removed";
217 #endif
218
219 /* Per-character attributes. */
220 enum
221 {
222 ATTR_ESCAPE_SEQUENCE = 1 << 0,
223 /* The following two are exclusive. */
224 ATTR_FORMAT_DIRECTIVE = 1 << 1,
225 ATTR_INVALID_FORMAT_DIRECTIVE = 1 << 2
226 };
227
228
229 /* ================ Output parts of a message, as comments. ================ */
230
231
232 /* Output mp->comment as a set of comment lines. */
233
234 static bool print_comment = true;
235
236 void
message_print_style_comment(bool flag)237 message_print_style_comment (bool flag)
238 {
239 print_comment = flag;
240 }
241
242 void
message_print_comment(const message_ty * mp,ostream_t stream)243 message_print_comment (const message_ty *mp, ostream_t stream)
244 {
245 if (print_comment && mp->comment != NULL)
246 {
247 size_t j;
248
249 begin_css_class (stream, class_translator_comment);
250
251 for (j = 0; j < mp->comment->nitems; ++j)
252 {
253 const char *s = mp->comment->item[j];
254 do
255 {
256 const char *e;
257 ostream_write_str (stream, "#");
258 if (*s != '\0')
259 ostream_write_str (stream, " ");
260 e = strchr (s, '\n');
261 if (e == NULL)
262 {
263 ostream_write_str (stream, s);
264 s = NULL;
265 }
266 else
267 {
268 ostream_write_mem (stream, s, e - s);
269 s = e + 1;
270 }
271 ostream_write_str (stream, "\n");
272 }
273 while (s != NULL);
274 }
275
276 end_css_class (stream, class_translator_comment);
277 }
278 }
279
280
281 /* Output mp->comment_dot as a set of comment lines. */
282
283 void
message_print_comment_dot(const message_ty * mp,ostream_t stream)284 message_print_comment_dot (const message_ty *mp, ostream_t stream)
285 {
286 if (mp->comment_dot != NULL)
287 {
288 size_t j;
289
290 begin_css_class (stream, class_extracted_comment);
291
292 for (j = 0; j < mp->comment_dot->nitems; ++j)
293 {
294 const char *s = mp->comment_dot->item[j];
295 ostream_write_str (stream, "#.");
296 if (*s != '\0')
297 ostream_write_str (stream, " ");
298 ostream_write_str (stream, s);
299 ostream_write_str (stream, "\n");
300 }
301
302 end_css_class (stream, class_extracted_comment);
303 }
304 }
305
306
307 /* Output mp->filepos as a set of comment lines. */
308
309 static enum filepos_comment_type filepos_comment_type = filepos_comment_full;
310
311 void
message_print_comment_filepos(const message_ty * mp,ostream_t stream,bool uniforum,size_t page_width)312 message_print_comment_filepos (const message_ty *mp, ostream_t stream,
313 bool uniforum, size_t page_width)
314 {
315 if (filepos_comment_type != filepos_comment_none
316 && mp->filepos_count != 0)
317 {
318 size_t filepos_count;
319 lex_pos_ty *filepos;
320
321 begin_css_class (stream, class_reference_comment);
322
323 if (filepos_comment_type == filepos_comment_file)
324 {
325 size_t i;
326
327 filepos_count = 0;
328 filepos = XNMALLOC (mp->filepos_count, lex_pos_ty);
329
330 for (i = 0; i < mp->filepos_count; ++i)
331 {
332 lex_pos_ty *pp = &mp->filepos[i];
333 size_t j;
334
335 for (j = 0; j < filepos_count; j++)
336 if (strcmp (filepos[j].file_name, pp->file_name) == 0)
337 break;
338
339 if (j == filepos_count)
340 {
341 filepos[filepos_count].file_name = pp->file_name;
342 filepos[filepos_count].line_number = (size_t)-1;
343 filepos_count++;
344 }
345 }
346 }
347 else
348 {
349 filepos = mp->filepos;
350 filepos_count = mp->filepos_count;
351 }
352
353 if (uniforum)
354 {
355 size_t j;
356
357 for (j = 0; j < filepos_count; ++j)
358 {
359 lex_pos_ty *pp = &filepos[j];
360 const char *cp = pp->file_name;
361 char *str;
362
363 while (cp[0] == '.' && cp[1] == '/')
364 cp += 2;
365 ostream_write_str (stream, "# ");
366 begin_css_class (stream, class_reference);
367 /* There are two Sun formats to choose from: SunOS and
368 Solaris. Use the Solaris form here. */
369 str = xasprintf ("File: %s, line: %ld",
370 cp, (long) pp->line_number);
371 ostream_write_str (stream, str);
372 end_css_class (stream, class_reference);
373 ostream_write_str (stream, "\n");
374 free (str);
375 }
376 }
377 else
378 {
379 size_t column;
380 size_t j;
381
382 ostream_write_str (stream, "#:");
383 column = 2;
384 for (j = 0; j < filepos_count; ++j)
385 {
386 lex_pos_ty *pp;
387 char buffer[21];
388 const char *cp;
389 size_t len;
390
391 pp = &filepos[j];
392 cp = pp->file_name;
393 while (cp[0] == '.' && cp[1] == '/')
394 cp += 2;
395 if (filepos_comment_type == filepos_comment_file
396 /* Some xgettext input formats, like RST, lack line
397 numbers. */
398 || pp->line_number == (size_t)(-1))
399 buffer[0] = '\0';
400 else
401 sprintf (buffer, ":%ld", (long) pp->line_number);
402 len = strlen (cp) + strlen (buffer) + 1;
403 if (column > 2 && column + len > page_width)
404 {
405 ostream_write_str (stream, "\n#:");
406 column = 2;
407 }
408 ostream_write_str (stream, " ");
409 begin_css_class (stream, class_reference);
410 ostream_write_str (stream, cp);
411 ostream_write_str (stream, buffer);
412 end_css_class (stream, class_reference);
413 column += len;
414 }
415 ostream_write_str (stream, "\n");
416 }
417
418 if (filepos != mp->filepos)
419 free (filepos);
420
421 end_css_class (stream, class_reference_comment);
422 }
423 }
424
425
426 /* Output mp->is_fuzzy, mp->is_format, mp->range, mp->do_wrap as a comment
427 line. */
428
429 void
message_print_comment_flags(const message_ty * mp,ostream_t stream,bool debug)430 message_print_comment_flags (const message_ty *mp, ostream_t stream, bool debug)
431 {
432 if ((mp->is_fuzzy && mp->msgstr[0] != '\0')
433 || has_significant_format_p (mp->is_format)
434 || has_range_p (mp->range)
435 || mp->do_wrap == no)
436 {
437 bool first_flag = true;
438 size_t i;
439
440 begin_css_class (stream, class_flag_comment);
441
442 ostream_write_str (stream, "#,");
443
444 /* We don't print the fuzzy flag if the msgstr is empty. This
445 might be introduced by the user but we want to normalize the
446 output. */
447 if (mp->is_fuzzy && mp->msgstr[0] != '\0')
448 {
449 ostream_write_str (stream, " ");
450 begin_css_class (stream, class_flag);
451 begin_css_class (stream, class_fuzzy_flag);
452 ostream_write_str (stream, "fuzzy");
453 end_css_class (stream, class_fuzzy_flag);
454 end_css_class (stream, class_flag);
455 first_flag = false;
456 }
457
458 for (i = 0; i < NFORMATS; i++)
459 if (significant_format_p (mp->is_format[i]))
460 {
461 if (!first_flag)
462 ostream_write_str (stream, ",");
463
464 ostream_write_str (stream, " ");
465 begin_css_class (stream, class_flag);
466 ostream_write_str (stream,
467 make_format_description_string (mp->is_format[i],
468 format_language[i],
469 debug));
470 end_css_class (stream, class_flag);
471 first_flag = false;
472 }
473
474 if (has_range_p (mp->range))
475 {
476 char *string;
477
478 if (!first_flag)
479 ostream_write_str (stream, ",");
480
481 ostream_write_str (stream, " ");
482 begin_css_class (stream, class_flag);
483 string = make_range_description_string (mp->range);
484 ostream_write_str (stream, string);
485 free (string);
486 end_css_class (stream, class_flag);
487 first_flag = false;
488 }
489
490 if (mp->do_wrap == no)
491 {
492 if (!first_flag)
493 ostream_write_str (stream, ",");
494
495 ostream_write_str (stream, " ");
496 begin_css_class (stream, class_flag);
497 ostream_write_str (stream,
498 make_c_width_description_string (mp->do_wrap));
499 end_css_class (stream, class_flag);
500 first_flag = false;
501 }
502
503 ostream_write_str (stream, "\n");
504
505 end_css_class (stream, class_flag_comment);
506 }
507 }
508
509
510 /* ========= Some parameters for use by 'msgdomain_list_print_po'. ========= */
511
512
513 /* This variable controls the extent to which the page width applies.
514 True means it applies to message strings and file reference lines.
515 False means it applies to file reference lines only. */
516 static bool wrap_strings = true;
517
518 void
message_page_width_ignore()519 message_page_width_ignore ()
520 {
521 wrap_strings = false;
522 }
523
524
525 /* These three variables control the output style of the message_print
526 function. Interface functions for them are to be used. */
527 static bool indent = false;
528 static bool uniforum = false;
529 static bool escape = false;
530
531 void
message_print_style_indent()532 message_print_style_indent ()
533 {
534 indent = true;
535 }
536
537 void
message_print_style_uniforum()538 message_print_style_uniforum ()
539 {
540 uniforum = true;
541 }
542
543 void
message_print_style_escape(bool flag)544 message_print_style_escape (bool flag)
545 {
546 escape = flag;
547 }
548
549 void
message_print_style_filepos(enum filepos_comment_type type)550 message_print_style_filepos (enum filepos_comment_type type)
551 {
552 filepos_comment_type = type;
553 }
554
555
556 /* --add-location argument handling. Return an error indicator. */
557 bool
handle_filepos_comment_option(const char * option)558 handle_filepos_comment_option (const char *option)
559 {
560 if (option != NULL)
561 {
562 if (strcmp (option, "never") == 0 || strcmp (option, "no") == 0)
563 message_print_style_filepos (filepos_comment_none);
564 else if (strcmp (option, "full") == 0 || strcmp (option, "yes") == 0)
565 message_print_style_filepos (filepos_comment_full);
566 else if (strcmp (option, "file") == 0)
567 message_print_style_filepos (filepos_comment_file);
568 else
569 {
570 fprintf (stderr, "invalid --add-location argument: %s\n", option);
571 return true;
572 }
573 }
574 else
575 /* --add-location is equivalent to --add-location=full. */
576 message_print_style_filepos (filepos_comment_full);
577 return false;
578 }
579
580
581 /* =============== msgdomain_list_print_po() and subroutines. =============== */
582
583
584 /* A version of memcpy optimized for the case n <= 1. */
585 static inline void
memcpy_small(void * dst,const void * src,size_t n)586 memcpy_small (void *dst, const void *src, size_t n)
587 {
588 if (n > 0)
589 {
590 char *q = (char *) dst;
591 const char *p = (const char *) src;
592
593 *q = *p;
594 if (--n > 0)
595 do *++q = *++p; while (--n > 0);
596 }
597 }
598
599
600 /* A version of memset optimized for the case n <= 1. */
601 static inline void
memset_small(void * dst,char c,size_t n)602 memset_small (void *dst, char c, size_t n)
603 {
604 if (n > 0)
605 {
606 char *p = (char *) dst;
607
608 *p = c;
609 if (--n > 0)
610 do *++p = c; while (--n > 0);
611 }
612 }
613
614
615 static void
wrap(const message_ty * mp,ostream_t stream,const char * line_prefix,int extra_indent,const char * css_class,const char * name,const char * value,enum is_wrap do_wrap,size_t page_width,const char * charset)616 wrap (const message_ty *mp, ostream_t stream,
617 const char *line_prefix, int extra_indent, const char *css_class,
618 const char *name, const char *value,
619 enum is_wrap do_wrap, size_t page_width,
620 const char *charset)
621 {
622 const char *canon_charset;
623 char *fmtdir;
624 char *fmtdirattr;
625 const char *s;
626 bool first_line;
627 #if HAVE_ICONV
628 const char *envval;
629 iconv_t conv;
630 #endif
631 bool weird_cjk;
632
633 canon_charset = po_charset_canonicalize (charset);
634
635 #if HAVE_ICONV
636 /* The old Solaris/openwin msgfmt and GNU msgfmt <= 0.10.35 don't know
637 about multibyte encodings, and require a spurious backslash after
638 every multibyte character whose last byte is 0x5C. Some programs,
639 like vim, distribute PO files in this broken format. It is important
640 for such programs that GNU msgmerge continues to support this old
641 PO file format when the Makefile requests it. */
642 envval = getenv ("OLD_PO_FILE_OUTPUT");
643 if (envval != NULL && *envval != '\0')
644 /* Write a PO file in old format, with extraneous backslashes. */
645 conv = (iconv_t)(-1);
646 else
647 if (canon_charset == NULL)
648 /* Invalid PO file encoding. */
649 conv = (iconv_t)(-1);
650 else
651 /* Avoid glibc-2.1 bug with EUC-KR. */
652 # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
653 && !defined _LIBICONV_VERSION
654 if (strcmp (canon_charset, "EUC-KR") == 0)
655 conv = (iconv_t)(-1);
656 else
657 # endif
658 /* Avoid Solaris 2.9 bug with GB2312, EUC-TW, BIG5, BIG5-HKSCS, GBK,
659 GB18030. */
660 # if defined __sun && !defined _LIBICONV_VERSION
661 if ( strcmp (canon_charset, "GB2312") == 0
662 || strcmp (canon_charset, "EUC-TW") == 0
663 || strcmp (canon_charset, "BIG5") == 0
664 || strcmp (canon_charset, "BIG5-HKSCS") == 0
665 || strcmp (canon_charset, "GBK") == 0
666 || strcmp (canon_charset, "GB18030") == 0)
667 conv = (iconv_t)(-1);
668 else
669 # endif
670 /* Use iconv() to parse multibyte characters. */
671 conv = iconv_open ("UTF-8", canon_charset);
672
673 if (conv != (iconv_t)(-1))
674 weird_cjk = false;
675 else
676 #endif
677 if (canon_charset == NULL)
678 weird_cjk = false;
679 else
680 weird_cjk = po_is_charset_weird_cjk (canon_charset);
681
682 if (canon_charset == NULL)
683 canon_charset = po_charset_ascii;
684
685 /* Determine the extent of format string directives. */
686 fmtdir = NULL;
687 fmtdirattr = NULL;
688 if (value[0] != '\0')
689 {
690 bool is_msgstr =
691 (strlen (name) >= 6 && memcmp (name, "msgstr", 6) == 0);
692 /* or equivalent: = (css_class == class_msgstr) */
693 size_t i;
694
695 for (i = 0; i < NFORMATS; i++)
696 if (possible_format_p (mp->is_format[i]))
697 {
698 size_t len = strlen (value);
699 struct formatstring_parser *parser = formatstring_parsers[i];
700 char *invalid_reason = NULL;
701 void *descr;
702 const char *fdp;
703 const char *fd_end;
704 char *fdap;
705
706 fmtdir = XCALLOC (len, char);
707 descr = parser->parse (value, is_msgstr, fmtdir, &invalid_reason);
708 if (descr != NULL)
709 parser->free (descr);
710
711 /* Locate the FMTDIR_* bits and transform the array to an array
712 of attributes. */
713 fmtdirattr = XCALLOC (len, char);
714 fd_end = fmtdir + len;
715 for (fdp = fmtdir, fdap = fmtdirattr; fdp < fd_end; fdp++, fdap++)
716 if (*fdp & FMTDIR_START)
717 {
718 const char *fdq;
719 for (fdq = fdp; fdq < fd_end; fdq++)
720 if (*fdq & (FMTDIR_END | FMTDIR_ERROR))
721 break;
722 if (!(fdq < fd_end))
723 /* The ->parse method has determined the start of a
724 formatstring directive but not stored a bit indicating
725 its end. It is a bug in the ->parse method. */
726 abort ();
727 if (*fdq & FMTDIR_ERROR)
728 memset (fdap, ATTR_INVALID_FORMAT_DIRECTIVE, fdq - fdp + 1);
729 else
730 memset (fdap, ATTR_FORMAT_DIRECTIVE, fdq - fdp + 1);
731 fdap += fdq - fdp;
732 fdp = fdq;
733 }
734 else
735 *fdap = 0;
736
737 break;
738 }
739 }
740
741 /* Loop over the '\n' delimited portions of value. */
742 s = value;
743 first_line = true;
744 do
745 {
746 /* The usual escapes, as defined by the ANSI C Standard. */
747 # define is_escape(c) \
748 ((c) == '\a' || (c) == '\b' || (c) == '\f' || (c) == '\n' \
749 || (c) == '\r' || (c) == '\t' || (c) == '\v')
750
751 const char *es;
752 const char *ep;
753 size_t portion_len;
754 char *portion;
755 char *overrides;
756 char *attributes;
757 char *linebreaks;
758 char *pp;
759 char *op;
760 char *ap;
761 int startcol, startcol_after_break, width;
762 size_t i;
763
764 for (es = s; *es != '\0'; )
765 if (*es++ == '\n')
766 break;
767
768 /* Expand escape sequences in each portion. */
769 for (ep = s, portion_len = 0; ep < es; ep++)
770 {
771 char c = *ep;
772 if (is_escape (c))
773 portion_len += 2;
774 else if (escape && !c_isprint ((unsigned char) c))
775 portion_len += 4;
776 else if (c == '\\' || c == '"')
777 portion_len += 2;
778 else
779 {
780 #if HAVE_ICONV
781 if (conv != (iconv_t)(-1))
782 {
783 /* Skip over a complete multi-byte character. Don't
784 interpret the second byte of a multi-byte character as
785 ASCII. This is needed for the BIG5, BIG5-HKSCS, GBK,
786 GB18030, SHIFT_JIS, JOHAB encodings. */
787 char scratchbuf[64];
788 const char *inptr = ep;
789 size_t insize;
790 char *outptr = &scratchbuf[0];
791 size_t outsize = sizeof (scratchbuf);
792 size_t res;
793
794 res = (size_t)(-1);
795 for (insize = 1; inptr + insize <= es; insize++)
796 {
797 res = iconv (conv,
798 (ICONV_CONST char **) &inptr, &insize,
799 &outptr, &outsize);
800 if (!(res == (size_t)(-1) && errno == EINVAL))
801 break;
802 /* We expect that no input bytes have been consumed
803 so far. */
804 if (inptr != ep)
805 abort ();
806 }
807 if (res == (size_t)(-1))
808 {
809 if (errno == EILSEQ)
810 {
811 po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
812 _("invalid multibyte sequence"));
813 continue;
814 }
815 else if (errno == EINVAL)
816 {
817 /* This could happen if an incomplete
818 multibyte sequence at the end of input
819 bytes. */
820 po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
821 _("incomplete multibyte sequence"));
822 continue;
823 }
824 else
825 abort ();
826 }
827 insize = inptr - ep;
828 portion_len += insize;
829 ep += insize - 1;
830 }
831 else
832 #endif
833 {
834 if (weird_cjk
835 /* Special handling of encodings with CJK structure. */
836 && ep + 2 <= es
837 && (unsigned char) ep[0] >= 0x80
838 && (unsigned char) ep[1] >= 0x30)
839 {
840 portion_len += 2;
841 ep += 1;
842 }
843 else
844 portion_len += 1;
845 }
846 }
847 }
848 portion = XNMALLOC (portion_len, char);
849 overrides = XNMALLOC (portion_len, char);
850 attributes = XNMALLOC (portion_len, char);
851 for (ep = s, pp = portion, op = overrides, ap = attributes; ep < es; ep++)
852 {
853 char c = *ep;
854 char attr = (fmtdirattr != NULL ? fmtdirattr[ep - value] : 0);
855 char brk = UC_BREAK_UNDEFINED;
856 /* Don't break inside format directives. */
857 if (attr == ATTR_FORMAT_DIRECTIVE
858 && (fmtdir[ep - value] & FMTDIR_START) == 0)
859 brk = UC_BREAK_PROHIBITED;
860 if (is_escape (c))
861 {
862 switch (c)
863 {
864 case '\a': c = 'a'; break;
865 case '\b': c = 'b'; break;
866 case '\f': c = 'f'; break;
867 case '\n': c = 'n'; break;
868 case '\r': c = 'r'; break;
869 case '\t': c = 't'; break;
870 case '\v': c = 'v'; break;
871 default: abort ();
872 }
873 *pp++ = '\\';
874 *pp++ = c;
875 *op++ = brk;
876 *op++ = UC_BREAK_PROHIBITED;
877 *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
878 *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
879 /* We warn about any use of escape sequences beside
880 '\n' and '\t'. */
881 if (c != 'n' && c != 't')
882 {
883 char *error_message =
884 xasprintf (_("internationalized messages should not contain the '\\%c' escape sequence"),
885 c);
886 po_xerror (PO_SEVERITY_WARNING, mp, NULL, 0, 0, false,
887 error_message);
888 free (error_message);
889 }
890 }
891 else if (escape && !c_isprint ((unsigned char) c))
892 {
893 *pp++ = '\\';
894 *pp++ = '0' + (((unsigned char) c >> 6) & 7);
895 *pp++ = '0' + (((unsigned char) c >> 3) & 7);
896 *pp++ = '0' + ((unsigned char) c & 7);
897 *op++ = brk;
898 *op++ = UC_BREAK_PROHIBITED;
899 *op++ = UC_BREAK_PROHIBITED;
900 *op++ = UC_BREAK_PROHIBITED;
901 *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
902 *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
903 *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
904 *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
905 }
906 else if (c == '\\' || c == '"')
907 {
908 *pp++ = '\\';
909 *pp++ = c;
910 *op++ = brk;
911 *op++ = UC_BREAK_PROHIBITED;
912 *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
913 *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
914 }
915 else
916 {
917 #if HAVE_ICONV
918 if (conv != (iconv_t)(-1))
919 {
920 /* Copy a complete multi-byte character. Don't
921 interpret the second byte of a multi-byte character as
922 ASCII. This is needed for the BIG5, BIG5-HKSCS, GBK,
923 GB18030, SHIFT_JIS, JOHAB encodings. */
924 char scratchbuf[64];
925 const char *inptr = ep;
926 size_t insize;
927 char *outptr = &scratchbuf[0];
928 size_t outsize = sizeof (scratchbuf);
929 size_t res;
930
931 res = (size_t)(-1);
932 for (insize = 1; inptr + insize <= es; insize++)
933 {
934 res = iconv (conv,
935 (ICONV_CONST char **) &inptr, &insize,
936 &outptr, &outsize);
937 if (!(res == (size_t)(-1) && errno == EINVAL))
938 break;
939 /* We expect that no input bytes have been consumed
940 so far. */
941 if (inptr != ep)
942 abort ();
943 }
944 if (res == (size_t)(-1))
945 {
946 if (errno == EILSEQ)
947 {
948 po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0,
949 false, _("invalid multibyte sequence"));
950 continue;
951 }
952 else
953 abort ();
954 }
955 insize = inptr - ep;
956 memcpy_small (pp, ep, insize);
957 pp += insize;
958 *op = brk;
959 memset_small (op + 1, UC_BREAK_PROHIBITED, insize - 1);
960 op += insize;
961 memset_small (ap, attr, insize);
962 ap += insize;
963 ep += insize - 1;
964 }
965 else
966 #endif
967 {
968 if (weird_cjk
969 /* Special handling of encodings with CJK structure. */
970 && ep + 2 <= es
971 && (unsigned char) c >= 0x80
972 && (unsigned char) ep[1] >= 0x30)
973 {
974 *pp++ = c;
975 ep += 1;
976 *pp++ = *ep;
977 *op++ = brk;
978 *op++ = UC_BREAK_PROHIBITED;
979 *ap++ = attr;
980 *ap++ = attr;
981 }
982 else
983 {
984 *pp++ = c;
985 *op++ = brk;
986 *ap++ = attr;
987 }
988 }
989 }
990 }
991
992 /* Don't break immediately before the "\n" at the end. */
993 if (es > s && es[-1] == '\n')
994 overrides[portion_len - 2] = UC_BREAK_PROHIBITED;
995
996 linebreaks = XNMALLOC (portion_len, char);
997
998 /* Subsequent lines after a break are all indented.
999 See INDENT-S. */
1000 startcol_after_break = (line_prefix ? strlen (line_prefix) : 0);
1001 if (indent)
1002 startcol_after_break = (startcol_after_break + extra_indent + 8) & ~7;
1003 startcol_after_break++;
1004
1005 /* The line width. Allow room for the closing quote character. */
1006 width = (wrap_strings && do_wrap != no ? page_width : INT_MAX) - 1;
1007 /* Adjust for indentation of subsequent lines. */
1008 width -= startcol_after_break;
1009
1010 recompute:
1011 /* The line starts with different things depending on whether it
1012 is the first line, and if we are using the indented style.
1013 See INDENT-F. */
1014 startcol = (line_prefix ? strlen (line_prefix) : 0);
1015 if (first_line)
1016 {
1017 startcol += strlen (name);
1018 if (indent)
1019 startcol = (startcol + extra_indent + 8) & ~7;
1020 else
1021 startcol++;
1022 }
1023 else
1024 {
1025 if (indent)
1026 startcol = (startcol + extra_indent + 8) & ~7;
1027 }
1028 /* Allow room for the opening quote character. */
1029 startcol++;
1030 /* Adjust for indentation of subsequent lines. */
1031 startcol -= startcol_after_break;
1032
1033 /* Do line breaking on the portion. */
1034 ulc_width_linebreaks (portion, portion_len, width, startcol, 0,
1035 overrides, canon_charset, linebreaks);
1036
1037 /* If this is the first line, and we are not using the indented
1038 style, and the line would wrap, then use an empty first line
1039 and restart. */
1040 if (first_line && !indent
1041 && portion_len > 0
1042 && (*es != '\0'
1043 || startcol > width
1044 || memchr (linebreaks, UC_BREAK_POSSIBLE, portion_len) != NULL))
1045 {
1046 if (line_prefix != NULL)
1047 ostream_write_str (stream, line_prefix);
1048 begin_css_class (stream, css_class);
1049 begin_css_class (stream, class_keyword);
1050 ostream_write_str (stream, name);
1051 end_css_class (stream, class_keyword);
1052 ostream_write_str (stream, " ");
1053 begin_css_class (stream, class_string);
1054 ostream_write_str (stream, "\"\"");
1055 end_css_class (stream, class_string);
1056 end_css_class (stream, css_class);
1057 ostream_write_str (stream, "\n");
1058 first_line = false;
1059 /* Recompute startcol and linebreaks. */
1060 goto recompute;
1061 }
1062
1063 /* Print the beginning of the line. This will depend on whether
1064 this is the first line, and if the indented style is being
1065 used. INDENT-F. */
1066 {
1067 int currcol = 0;
1068
1069 if (line_prefix != NULL)
1070 {
1071 ostream_write_str (stream, line_prefix);
1072 currcol = strlen (line_prefix);
1073 }
1074 begin_css_class (stream, css_class);
1075 if (first_line)
1076 {
1077 begin_css_class (stream, class_keyword);
1078 ostream_write_str (stream, name);
1079 currcol += strlen (name);
1080 end_css_class (stream, class_keyword);
1081 if (indent)
1082 {
1083 if (extra_indent > 0)
1084 ostream_write_mem (stream, " ", extra_indent);
1085 currcol += extra_indent;
1086 ostream_write_mem (stream, " ", 8 - (currcol & 7));
1087 currcol = (currcol + 8) & ~7;
1088 }
1089 else
1090 {
1091 ostream_write_str (stream, " ");
1092 currcol++;
1093 }
1094 first_line = false;
1095 }
1096 else
1097 {
1098 if (indent)
1099 {
1100 if (extra_indent > 0)
1101 ostream_write_mem (stream, " ", extra_indent);
1102 currcol += extra_indent;
1103 ostream_write_mem (stream, " ", 8 - (currcol & 7));
1104 currcol = (currcol + 8) & ~7;
1105 }
1106 }
1107 }
1108
1109 /* Print the portion itself, with linebreaks where necessary. */
1110 {
1111 char currattr = 0;
1112
1113 begin_css_class (stream, class_string);
1114 ostream_write_str (stream, "\"");
1115 begin_css_class (stream, class_text);
1116
1117 for (i = 0; i < portion_len; i++)
1118 {
1119 if (linebreaks[i] == UC_BREAK_POSSIBLE)
1120 {
1121 int currcol;
1122
1123 /* Change currattr so that it becomes 0. */
1124 if (currattr & ATTR_ESCAPE_SEQUENCE)
1125 {
1126 end_css_class (stream, class_escape_sequence);
1127 currattr &= ~ATTR_ESCAPE_SEQUENCE;
1128 }
1129 if (currattr & ATTR_FORMAT_DIRECTIVE)
1130 {
1131 end_css_class (stream, class_format_directive);
1132 currattr &= ~ATTR_FORMAT_DIRECTIVE;
1133 }
1134 else if (currattr & ATTR_INVALID_FORMAT_DIRECTIVE)
1135 {
1136 end_css_class (stream, class_invalid_format_directive);
1137 currattr &= ~ATTR_INVALID_FORMAT_DIRECTIVE;
1138 }
1139 if (!(currattr == 0))
1140 abort ();
1141
1142 end_css_class (stream, class_text);
1143 ostream_write_str (stream, "\"");
1144 end_css_class (stream, class_string);
1145 end_css_class (stream, css_class);
1146 ostream_write_str (stream, "\n");
1147 currcol = 0;
1148 /* INDENT-S. */
1149 if (line_prefix != NULL)
1150 {
1151 ostream_write_str (stream, line_prefix);
1152 currcol = strlen (line_prefix);
1153 }
1154 begin_css_class (stream, css_class);
1155 if (indent)
1156 {
1157 ostream_write_mem (stream, " ", 8 - (currcol & 7));
1158 currcol = (currcol + 8) & ~7;
1159 }
1160 begin_css_class (stream, class_string);
1161 ostream_write_str (stream, "\"");
1162 begin_css_class (stream, class_text);
1163 }
1164 /* Change currattr so that it matches attributes[i]. */
1165 if (attributes[i] != currattr)
1166 {
1167 /* class_escape_sequence occurs inside class_format_directive
1168 and class_invalid_format_directive, so clear it first. */
1169 if (currattr & ATTR_ESCAPE_SEQUENCE)
1170 {
1171 end_css_class (stream, class_escape_sequence);
1172 currattr &= ~ATTR_ESCAPE_SEQUENCE;
1173 }
1174 if (~attributes[i] & currattr & ATTR_FORMAT_DIRECTIVE)
1175 {
1176 end_css_class (stream, class_format_directive);
1177 currattr &= ~ATTR_FORMAT_DIRECTIVE;
1178 }
1179 else if (~attributes[i] & currattr & ATTR_INVALID_FORMAT_DIRECTIVE)
1180 {
1181 end_css_class (stream, class_invalid_format_directive);
1182 currattr &= ~ATTR_INVALID_FORMAT_DIRECTIVE;
1183 }
1184 if (attributes[i] & ~currattr & ATTR_FORMAT_DIRECTIVE)
1185 {
1186 begin_css_class (stream, class_format_directive);
1187 currattr |= ATTR_FORMAT_DIRECTIVE;
1188 }
1189 else if (attributes[i] & ~currattr & ATTR_INVALID_FORMAT_DIRECTIVE)
1190 {
1191 begin_css_class (stream, class_invalid_format_directive);
1192 currattr |= ATTR_INVALID_FORMAT_DIRECTIVE;
1193 }
1194 /* class_escape_sequence occurs inside class_format_directive
1195 and class_invalid_format_directive, so set it last. */
1196 if (attributes[i] & ~currattr & ATTR_ESCAPE_SEQUENCE)
1197 {
1198 begin_css_class (stream, class_escape_sequence);
1199 currattr |= ATTR_ESCAPE_SEQUENCE;
1200 }
1201 }
1202 ostream_write_mem (stream, &portion[i], 1);
1203 }
1204
1205 /* Change currattr so that it becomes 0. */
1206 if (currattr & ATTR_ESCAPE_SEQUENCE)
1207 {
1208 end_css_class (stream, class_escape_sequence);
1209 currattr &= ~ATTR_ESCAPE_SEQUENCE;
1210 }
1211 if (currattr & ATTR_FORMAT_DIRECTIVE)
1212 {
1213 end_css_class (stream, class_format_directive);
1214 currattr &= ~ATTR_FORMAT_DIRECTIVE;
1215 }
1216 else if (currattr & ATTR_INVALID_FORMAT_DIRECTIVE)
1217 {
1218 end_css_class (stream, class_invalid_format_directive);
1219 currattr &= ~ATTR_INVALID_FORMAT_DIRECTIVE;
1220 }
1221 if (!(currattr == 0))
1222 abort ();
1223
1224 end_css_class (stream, class_text);
1225 ostream_write_str (stream, "\"");
1226 end_css_class (stream, class_string);
1227 end_css_class (stream, css_class);
1228 ostream_write_str (stream, "\n");
1229 }
1230
1231 free (linebreaks);
1232 free (attributes);
1233 free (overrides);
1234 free (portion);
1235
1236 s = es;
1237 # undef is_escape
1238 }
1239 while (*s);
1240
1241 if (fmtdirattr != NULL)
1242 free (fmtdirattr);
1243 if (fmtdir != NULL)
1244 free (fmtdir);
1245
1246 #if HAVE_ICONV
1247 if (conv != (iconv_t)(-1))
1248 iconv_close (conv);
1249 #endif
1250 }
1251
1252
1253 static void
print_blank_line(ostream_t stream)1254 print_blank_line (ostream_t stream)
1255 {
1256 if (uniforum)
1257 {
1258 begin_css_class (stream, class_comment);
1259 ostream_write_str (stream, "#\n");
1260 end_css_class (stream, class_comment);
1261 }
1262 else
1263 ostream_write_str (stream, "\n");
1264 }
1265
1266
1267 static void
message_print(const message_ty * mp,ostream_t stream,const char * charset,size_t page_width,bool blank_line,bool debug)1268 message_print (const message_ty *mp, ostream_t stream,
1269 const char *charset, size_t page_width, bool blank_line,
1270 bool debug)
1271 {
1272 int extra_indent;
1273
1274 /* Separate messages with a blank line. Uniforum doesn't like blank
1275 lines, so use an empty comment (unless there already is one). */
1276 if (blank_line && (!uniforum
1277 || mp->comment == NULL
1278 || mp->comment->nitems == 0
1279 || mp->comment->item[0][0] != '\0'))
1280 print_blank_line (stream);
1281
1282 if (is_header (mp))
1283 begin_css_class (stream, class_header);
1284 else if (mp->msgstr[0] == '\0')
1285 begin_css_class (stream, class_untranslated);
1286 else if (mp->is_fuzzy)
1287 begin_css_class (stream, class_fuzzy);
1288 else
1289 begin_css_class (stream, class_translated);
1290
1291 begin_css_class (stream, class_comment);
1292
1293 /* Print translator comment if available. */
1294 message_print_comment (mp, stream);
1295
1296 /* Print xgettext extracted comments. */
1297 message_print_comment_dot (mp, stream);
1298
1299 /* Print the file position comments. This will help a human who is
1300 trying to navigate the sources. There is no problem of getting
1301 repeated positions, because duplicates are checked for. */
1302 message_print_comment_filepos (mp, stream, uniforum, page_width);
1303
1304 /* Print flag information in special comment. */
1305 message_print_comment_flags (mp, stream, debug);
1306
1307 /* Print the previous msgid. This helps the translator when the msgid has
1308 only slightly changed. */
1309 begin_css_class (stream, class_previous_comment);
1310 if (mp->prev_msgctxt != NULL)
1311 wrap (mp, stream, "#| ", 0, class_previous, "msgctxt", mp->prev_msgctxt,
1312 mp->do_wrap, page_width, charset);
1313 if (mp->prev_msgid != NULL)
1314 wrap (mp, stream, "#| ", 0, class_previous, "msgid", mp->prev_msgid,
1315 mp->do_wrap, page_width, charset);
1316 if (mp->prev_msgid_plural != NULL)
1317 wrap (mp, stream, "#| ", 0, class_previous, "msgid_plural",
1318 mp->prev_msgid_plural, mp->do_wrap, page_width, charset);
1319 end_css_class (stream, class_previous_comment);
1320 extra_indent = (mp->prev_msgctxt != NULL || mp->prev_msgid != NULL
1321 || mp->prev_msgid_plural != NULL
1322 ? 3
1323 : 0);
1324
1325 end_css_class (stream, class_comment);
1326
1327 /* Print each of the message components. Wrap them nicely so they
1328 are as readable as possible. If there is no recorded msgstr for
1329 this domain, emit an empty string. */
1330 if (mp->msgctxt != NULL && !is_ascii_string (mp->msgctxt)
1331 && po_charset_canonicalize (charset) != po_charset_utf8)
1332 {
1333 char *warning_message =
1334 xasprintf (_("\
1335 The following msgctxt contains non-ASCII characters.\n\
1336 This will cause problems to translators who use a character encoding\n\
1337 different from yours. Consider using a pure ASCII msgctxt instead.\n\
1338 %s\n"), mp->msgctxt);
1339 po_xerror (PO_SEVERITY_WARNING, mp, NULL, 0, 0, true, warning_message);
1340 free (warning_message);
1341 }
1342 if (!is_ascii_string (mp->msgid)
1343 && po_charset_canonicalize (charset) != po_charset_utf8)
1344 {
1345 char *warning_message =
1346 xasprintf (_("\
1347 The following msgid contains non-ASCII characters.\n\
1348 This will cause problems to translators who use a character encoding\n\
1349 different from yours. Consider using a pure ASCII msgid instead.\n\
1350 %s\n"), mp->msgid);
1351 po_xerror (PO_SEVERITY_WARNING, mp, NULL, 0, 0, true, warning_message);
1352 free (warning_message);
1353 }
1354 if (mp->msgctxt != NULL)
1355 wrap (mp, stream, NULL, extra_indent, class_msgid, "msgctxt", mp->msgctxt,
1356 mp->do_wrap, page_width, charset);
1357 wrap (mp, stream, NULL, extra_indent, class_msgid, "msgid", mp->msgid,
1358 mp->do_wrap, page_width, charset);
1359 if (mp->msgid_plural != NULL)
1360 wrap (mp, stream, NULL, extra_indent, class_msgid, "msgid_plural",
1361 mp->msgid_plural, mp->do_wrap, page_width, charset);
1362
1363 if (mp->msgid_plural == NULL)
1364 wrap (mp, stream, NULL, extra_indent, class_msgstr, "msgstr", mp->msgstr,
1365 mp->do_wrap, page_width, charset);
1366 else
1367 {
1368 char prefix_buf[20];
1369 unsigned int i;
1370 const char *p;
1371
1372 for (p = mp->msgstr, i = 0;
1373 p < mp->msgstr + mp->msgstr_len;
1374 p += strlen (p) + 1, i++)
1375 {
1376 sprintf (prefix_buf, "msgstr[%u]", i);
1377 wrap (mp, stream, NULL, extra_indent, class_msgstr, prefix_buf, p,
1378 mp->do_wrap, page_width, charset);
1379 }
1380 }
1381
1382 if (is_header (mp))
1383 end_css_class (stream, class_header);
1384 else if (mp->msgstr[0] == '\0')
1385 end_css_class (stream, class_untranslated);
1386 else if (mp->is_fuzzy)
1387 end_css_class (stream, class_fuzzy);
1388 else
1389 end_css_class (stream, class_translated);
1390 }
1391
1392
1393 static void
message_print_obsolete(const message_ty * mp,ostream_t stream,const char * charset,size_t page_width,bool blank_line,bool debug)1394 message_print_obsolete (const message_ty *mp, ostream_t stream,
1395 const char *charset, size_t page_width, bool blank_line,
1396 bool debug)
1397 {
1398 int extra_indent;
1399
1400 /* If msgstr is the empty string we print nothing. */
1401 if (mp->msgstr[0] == '\0')
1402 return;
1403
1404 /* Separate messages with a blank line. Uniforum doesn't like blank
1405 lines, so use an empty comment (unless there already is one). */
1406 if (blank_line)
1407 print_blank_line (stream);
1408
1409 begin_css_class (stream, class_obsolete);
1410
1411 begin_css_class (stream, class_comment);
1412
1413 /* Print translator comment if available. */
1414 message_print_comment (mp, stream);
1415
1416 /* Print xgettext extracted comments (normally empty). */
1417 message_print_comment_dot (mp, stream);
1418
1419 /* Print the file position comments (normally empty). */
1420 message_print_comment_filepos (mp, stream, uniforum, page_width);
1421
1422 /* Print flag information in special comment.
1423 Preserve only
1424 - the fuzzy flag, because it is important for the translator when the
1425 message becomes active again,
1426 - the no-wrap flag, because we use mp->do_wrap below for the wrapping,
1427 therefore further processing through 'msgcat' needs to use the same
1428 value of do_wrap,
1429 - the *-format flags, because the wrapping depends on these flags (see
1430 'Don't break inside format directives' comment), therefore further
1431 processing through 'msgcat' needs to use the same values of is_format.
1432 This is a trimmed-down variant of message_print_comment_flags. */
1433 if (mp->is_fuzzy
1434 || has_significant_format_p (mp->is_format)
1435 || mp->do_wrap == no)
1436 {
1437 bool first_flag = true;
1438 size_t i;
1439
1440 ostream_write_str (stream, "#,");
1441
1442 if (mp->is_fuzzy)
1443 {
1444 ostream_write_str (stream, " fuzzy");
1445 first_flag = false;
1446 }
1447
1448 for (i = 0; i < NFORMATS; i++)
1449 if (significant_format_p (mp->is_format[i]))
1450 {
1451 if (!first_flag)
1452 ostream_write_str (stream, ",");
1453
1454 ostream_write_str (stream, " ");
1455 ostream_write_str (stream,
1456 make_format_description_string (mp->is_format[i],
1457 format_language[i],
1458 debug));
1459 first_flag = false;
1460 }
1461
1462 if (mp->do_wrap == no)
1463 {
1464 if (!first_flag)
1465 ostream_write_str (stream, ",");
1466
1467 ostream_write_str (stream, " ");
1468 ostream_write_str (stream,
1469 make_c_width_description_string (mp->do_wrap));
1470 first_flag = false;
1471 }
1472
1473 ostream_write_str (stream, "\n");
1474 }
1475
1476 /* Print the previous msgid. This helps the translator when the msgid has
1477 only slightly changed. */
1478 begin_css_class (stream, class_previous_comment);
1479 if (mp->prev_msgctxt != NULL)
1480 wrap (mp, stream, "#~| ", 0, class_previous, "msgctxt", mp->prev_msgctxt,
1481 mp->do_wrap, page_width, charset);
1482 if (mp->prev_msgid != NULL)
1483 wrap (mp, stream, "#~| ", 0, class_previous, "msgid", mp->prev_msgid,
1484 mp->do_wrap, page_width, charset);
1485 if (mp->prev_msgid_plural != NULL)
1486 wrap (mp, stream, "#~| ", 0, class_previous, "msgid_plural",
1487 mp->prev_msgid_plural, mp->do_wrap, page_width, charset);
1488 end_css_class (stream, class_previous_comment);
1489 extra_indent = (mp->prev_msgctxt != NULL || mp->prev_msgid != NULL
1490 || mp->prev_msgid_plural != NULL
1491 ? 1
1492 : 0);
1493
1494 end_css_class (stream, class_comment);
1495
1496 /* Print each of the message components. Wrap them nicely so they
1497 are as readable as possible. */
1498 if (mp->msgctxt != NULL && !is_ascii_string (mp->msgctxt)
1499 && po_charset_canonicalize (charset) != po_charset_utf8)
1500 {
1501 char *warning_message =
1502 xasprintf (_("\
1503 The following msgctxt contains non-ASCII characters.\n\
1504 This will cause problems to translators who use a character encoding\n\
1505 different from yours. Consider using a pure ASCII msgctxt instead.\n\
1506 %s\n"), mp->msgctxt);
1507 po_xerror (PO_SEVERITY_WARNING, mp, NULL, 0, 0, true, warning_message);
1508 free (warning_message);
1509 }
1510 if (!is_ascii_string (mp->msgid)
1511 && po_charset_canonicalize (charset) != po_charset_utf8)
1512 {
1513 char *warning_message =
1514 xasprintf (_("\
1515 The following msgid contains non-ASCII characters.\n\
1516 This will cause problems to translators who use a character encoding\n\
1517 different from yours. Consider using a pure ASCII msgid instead.\n\
1518 %s\n"), mp->msgid);
1519 po_xerror (PO_SEVERITY_WARNING, mp, NULL, 0, 0, true, warning_message);
1520 free (warning_message);
1521 }
1522 if (mp->msgctxt != NULL)
1523 wrap (mp, stream, "#~ ", extra_indent, class_msgid, "msgctxt", mp->msgctxt,
1524 mp->do_wrap, page_width, charset);
1525 wrap (mp, stream, "#~ ", extra_indent, class_msgid, "msgid", mp->msgid,
1526 mp->do_wrap, page_width, charset);
1527 if (mp->msgid_plural != NULL)
1528 wrap (mp, stream, "#~ ", extra_indent, class_msgid, "msgid_plural",
1529 mp->msgid_plural, mp->do_wrap, page_width, charset);
1530
1531 if (mp->msgid_plural == NULL)
1532 wrap (mp, stream, "#~ ", extra_indent, class_msgstr, "msgstr", mp->msgstr,
1533 mp->do_wrap, page_width, charset);
1534 else
1535 {
1536 char prefix_buf[20];
1537 unsigned int i;
1538 const char *p;
1539
1540 for (p = mp->msgstr, i = 0;
1541 p < mp->msgstr + mp->msgstr_len;
1542 p += strlen (p) + 1, i++)
1543 {
1544 sprintf (prefix_buf, "msgstr[%u]", i);
1545 wrap (mp, stream, "#~ ", extra_indent, class_msgstr, prefix_buf, p,
1546 mp->do_wrap, page_width, charset);
1547 }
1548 }
1549
1550 end_css_class (stream, class_obsolete);
1551 }
1552
1553
1554 static void
msgdomain_list_print_po(msgdomain_list_ty * mdlp,ostream_t stream,size_t page_width,bool debug)1555 msgdomain_list_print_po (msgdomain_list_ty *mdlp, ostream_t stream,
1556 size_t page_width, bool debug)
1557 {
1558 size_t j, k;
1559 bool blank_line;
1560
1561 /* Write out the messages for each domain. */
1562 blank_line = false;
1563 for (k = 0; k < mdlp->nitems; k++)
1564 {
1565 message_list_ty *mlp;
1566 const char *header;
1567 const char *charset;
1568 char *allocated_charset;
1569
1570 /* If the first domain is the default, don't bother emitting
1571 the domain name, because it is the default. */
1572 if (!(k == 0
1573 && strcmp (mdlp->item[k]->domain, MESSAGE_DOMAIN_DEFAULT) == 0))
1574 {
1575 if (blank_line)
1576 print_blank_line (stream);
1577 begin_css_class (stream, class_keyword);
1578 ostream_write_str (stream, "domain");
1579 end_css_class (stream, class_keyword);
1580 ostream_write_str (stream, " ");
1581 begin_css_class (stream, class_string);
1582 ostream_write_str (stream, "\"");
1583 begin_css_class (stream, class_text);
1584 ostream_write_str (stream, mdlp->item[k]->domain);
1585 end_css_class (stream, class_text);
1586 ostream_write_str (stream, "\"");
1587 end_css_class (stream, class_string);
1588 ostream_write_str (stream, "\n");
1589 blank_line = true;
1590 }
1591
1592 mlp = mdlp->item[k]->messages;
1593
1594 /* Search the header entry. */
1595 header = NULL;
1596 for (j = 0; j < mlp->nitems; ++j)
1597 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
1598 {
1599 header = mlp->item[j]->msgstr;
1600 break;
1601 }
1602
1603 /* Extract the charset name. */
1604 charset = "ASCII";
1605 allocated_charset = NULL;
1606 if (header != NULL)
1607 {
1608 const char *charsetstr = c_strstr (header, "charset=");
1609
1610 if (charsetstr != NULL)
1611 {
1612 size_t len;
1613
1614 charsetstr += strlen ("charset=");
1615 len = strcspn (charsetstr, " \t\n");
1616 allocated_charset = (char *) xmalloca (len + 1);
1617 memcpy (allocated_charset, charsetstr, len);
1618 allocated_charset[len] = '\0';
1619 charset = allocated_charset;
1620
1621 /* Treat the dummy default value as if it were absent. */
1622 if (strcmp (charset, "CHARSET") == 0)
1623 charset = "ASCII";
1624 }
1625 }
1626
1627 /* Write out each of the messages for this domain. */
1628 for (j = 0; j < mlp->nitems; ++j)
1629 if (!mlp->item[j]->obsolete)
1630 {
1631 message_print (mlp->item[j], stream, charset, page_width,
1632 blank_line, debug);
1633 blank_line = true;
1634 }
1635
1636 /* Write out each of the obsolete messages for this domain. */
1637 for (j = 0; j < mlp->nitems; ++j)
1638 if (mlp->item[j]->obsolete)
1639 {
1640 message_print_obsolete (mlp->item[j], stream, charset, page_width,
1641 blank_line, debug);
1642 blank_line = true;
1643 }
1644
1645 if (allocated_charset != NULL)
1646 freea (allocated_charset);
1647 }
1648 }
1649
1650
1651 /* Describes a PO file in .po syntax. */
1652 const struct catalog_output_format output_format_po =
1653 {
1654 msgdomain_list_print_po, /* print */
1655 false, /* requires_utf8 */
1656 true, /* supports_color */
1657 true, /* supports_multiple_domains */
1658 true, /* supports_contexts */
1659 true, /* supports_plurals */
1660 true, /* sorts_obsoletes_to_end */
1661 false, /* alternative_is_po */
1662 false /* alternative_is_java_class */
1663 };
1664