1 /* GNU gettext - internationalization aids
2    Copyright (C) 1995-1998, 2000-2010, 2012, 2014-2015, 2018-2019 Free Software
3    Foundation, Inc.
4 
5    This file was written by Peter Miller <millerp@canb.auug.org.au>
6 
7    This program is free software: you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
19 
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23 #include <alloca.h>
24 
25 /* Specification.  */
26 #include "write-po.h"
27 
28 #include <errno.h>
29 #include <limits.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 
34 #if HAVE_ICONV
35 # include <iconv.h>
36 #endif
37 
38 #include <textstyle.h>
39 
40 #include "c-ctype.h"
41 #include "po-charset.h"
42 #include "format.h"
43 #include "unilbrk.h"
44 #include "msgl-ascii.h"
45 #include "write-catalog.h"
46 #include "xalloc.h"
47 #include "xmalloca.h"
48 #include "c-strstr.h"
49 #include "xvasprintf.h"
50 #include "po-xerror.h"
51 #include "gettext.h"
52 
53 /* Our regular abbreviation.  */
54 #define _(str) gettext (str)
55 
56 #if HAVE_DECL_PUTC_UNLOCKED
57 # undef putc
58 # define putc putc_unlocked
59 #endif
60 
61 
62 /* =================== Putting together a #, flags line. =================== */
63 
64 
65 /* Convert IS_FORMAT in the context of programming language LANG to a flag
66    string for use in #, flags.  */
67 
68 const char *
make_format_description_string(enum is_format is_format,const char * lang,bool debug)69 make_format_description_string (enum is_format is_format, const char *lang,
70                                 bool debug)
71 {
72   static char result[100];
73 
74   switch (is_format)
75     {
76     case possible:
77       if (debug)
78         {
79           sprintf (result, "possible-%s-format", lang);
80           break;
81         }
82       /* FALLTHROUGH */
83     case yes_according_to_context:
84     case yes:
85       sprintf (result, "%s-format", lang);
86       break;
87     case no:
88       sprintf (result, "no-%s-format", lang);
89       break;
90     default:
91       /* The others have already been filtered out by significant_format_p.  */
92       abort ();
93     }
94 
95   return result;
96 }
97 
98 
99 /* Return true if IS_FORMAT is worth mentioning in a #, flags list.  */
100 
101 bool
significant_format_p(enum is_format is_format)102 significant_format_p (enum is_format is_format)
103 {
104   return is_format != undecided && is_format != impossible;
105 }
106 
107 
108 /* Return true if one of IS_FORMAT is worth mentioning in a #, flags list.  */
109 
110 static bool
has_significant_format_p(const enum is_format is_format[NFORMATS])111 has_significant_format_p (const enum is_format is_format[NFORMATS])
112 {
113   size_t i;
114 
115   for (i = 0; i < NFORMATS; i++)
116     if (significant_format_p (is_format[i]))
117       return true;
118   return false;
119 }
120 
121 
122 /* Convert a RANGE to a freshly allocated string for use in #, flags.  */
123 
124 char *
make_range_description_string(struct argument_range range)125 make_range_description_string (struct argument_range range)
126 {
127   return xasprintf ("range: %d..%d", range.min, range.max);
128 }
129 
130 
131 /* Convert a wrapping flag DO_WRAP to a string for use in #, flags.  */
132 
133 static const char *
make_c_width_description_string(enum is_wrap do_wrap)134 make_c_width_description_string (enum is_wrap do_wrap)
135 {
136   const char *result = NULL;
137 
138   switch (do_wrap)
139     {
140     case yes:
141       result = "wrap";
142       break;
143     case no:
144       result = "no-wrap";
145       break;
146     default:
147       abort ();
148     }
149 
150   return result;
151 }
152 
153 
154 /* ========================== Styling primitives. ========================== */
155 
156 
157 /* When compiled in src, enable styling support.
158    When compiled in libgettextpo, don't enable styling support.  */
159 #ifdef GETTEXTDATADIR
160 
161 /* All ostream_t instances are in fact styled_ostream_t instances.  */
162 
163 /* Start a run of text belonging to a given CSS class.  */
164 static inline void
begin_css_class(ostream_t stream,const char * classname)165 begin_css_class (ostream_t stream, const char *classname)
166 {
167   styled_ostream_begin_use_class ((styled_ostream_t) stream, classname);
168 }
169 
170 /* End a run of text belonging to a given CSS class.  */
171 static inline void
end_css_class(ostream_t stream,const char * classname)172 end_css_class (ostream_t stream, const char *classname)
173 {
174   styled_ostream_end_use_class ((styled_ostream_t) stream, classname);
175 }
176 
177 #else
178 
179 #define is_stylable(stream) false
180 #define begin_css_class(stream,classname) (void)(classname)
181 #define end_css_class(stream,classname) (void)(classname)
182 
183 #endif
184 
185 /* CSS classes at message level.  */
186 static const char class_header[] = "header";
187 static const char class_translated[] = "translated";
188 static const char class_untranslated[] = "untranslated";
189 static const char class_fuzzy[] = "fuzzy";
190 static const char class_obsolete[] = "obsolete";
191 
192 /* CSS classes describing the parts of a message.  */
193 static const char class_comment[] = "comment";
194 static const char class_translator_comment[] = "translator-comment";
195 static const char class_extracted_comment[] = "extracted-comment";
196 static const char class_reference_comment[] = "reference-comment";
197 static const char class_reference[] = "reference";
198 static const char class_flag_comment[] = "flag-comment";
199 static const char class_flag[] = "flag";
200 static const char class_fuzzy_flag[] = "fuzzy-flag";
201 static const char class_previous_comment[] = "previous-comment";
202 static const char class_previous[] = "previous";
203 static const char class_msgid[] = "msgid";
204 static const char class_msgstr[] = "msgstr";
205 static const char class_keyword[] = "keyword";
206 static const char class_string[] = "string";
207 
208 /* CSS classes for the contents of strings.  */
209 static const char class_text[] = "text";
210 static const char class_escape_sequence[] = "escape-sequence";
211 static const char class_format_directive[] = "format-directive";
212 static const char class_invalid_format_directive[] = "invalid-format-directive";
213 #if 0
214 static const char class_added[] = "added";
215 static const char class_changed[] = "changed";
216 static const char class_removed[] = "removed";
217 #endif
218 
219 /* Per-character attributes.  */
220 enum
221 {
222   ATTR_ESCAPE_SEQUENCE          = 1 << 0,
223   /* The following two are exclusive.  */
224   ATTR_FORMAT_DIRECTIVE         = 1 << 1,
225   ATTR_INVALID_FORMAT_DIRECTIVE = 1 << 2
226 };
227 
228 
229 /* ================ Output parts of a message, as comments. ================ */
230 
231 
232 /* Output mp->comment as a set of comment lines.  */
233 
234 static bool print_comment = true;
235 
236 void
message_print_style_comment(bool flag)237 message_print_style_comment (bool flag)
238 {
239   print_comment = flag;
240 }
241 
242 void
message_print_comment(const message_ty * mp,ostream_t stream)243 message_print_comment (const message_ty *mp, ostream_t stream)
244 {
245   if (print_comment && mp->comment != NULL)
246     {
247       size_t j;
248 
249       begin_css_class (stream, class_translator_comment);
250 
251       for (j = 0; j < mp->comment->nitems; ++j)
252         {
253           const char *s = mp->comment->item[j];
254           do
255             {
256               const char *e;
257               ostream_write_str (stream, "#");
258               if (*s != '\0')
259                 ostream_write_str (stream, " ");
260               e = strchr (s, '\n');
261               if (e == NULL)
262                 {
263                   ostream_write_str (stream, s);
264                   s = NULL;
265                 }
266               else
267                 {
268                   ostream_write_mem (stream, s, e - s);
269                   s = e + 1;
270                 }
271               ostream_write_str (stream, "\n");
272             }
273           while (s != NULL);
274         }
275 
276       end_css_class (stream, class_translator_comment);
277     }
278 }
279 
280 
281 /* Output mp->comment_dot as a set of comment lines.  */
282 
283 void
message_print_comment_dot(const message_ty * mp,ostream_t stream)284 message_print_comment_dot (const message_ty *mp, ostream_t stream)
285 {
286   if (mp->comment_dot != NULL)
287     {
288       size_t j;
289 
290       begin_css_class (stream, class_extracted_comment);
291 
292       for (j = 0; j < mp->comment_dot->nitems; ++j)
293         {
294           const char *s = mp->comment_dot->item[j];
295           ostream_write_str (stream, "#.");
296           if (*s != '\0')
297             ostream_write_str (stream, " ");
298           ostream_write_str (stream, s);
299           ostream_write_str (stream, "\n");
300         }
301 
302       end_css_class (stream, class_extracted_comment);
303     }
304 }
305 
306 
307 /* Output mp->filepos as a set of comment lines.  */
308 
309 static enum filepos_comment_type filepos_comment_type = filepos_comment_full;
310 
311 void
message_print_comment_filepos(const message_ty * mp,ostream_t stream,bool uniforum,size_t page_width)312 message_print_comment_filepos (const message_ty *mp, ostream_t stream,
313                                bool uniforum, size_t page_width)
314 {
315   if (filepos_comment_type != filepos_comment_none
316       && mp->filepos_count != 0)
317     {
318       size_t filepos_count;
319       lex_pos_ty *filepos;
320 
321       begin_css_class (stream, class_reference_comment);
322 
323       if (filepos_comment_type == filepos_comment_file)
324         {
325           size_t i;
326 
327           filepos_count = 0;
328           filepos = XNMALLOC (mp->filepos_count, lex_pos_ty);
329 
330           for (i = 0; i < mp->filepos_count; ++i)
331             {
332               lex_pos_ty *pp = &mp->filepos[i];
333               size_t j;
334 
335               for (j = 0; j < filepos_count; j++)
336                 if (strcmp (filepos[j].file_name, pp->file_name) == 0)
337                   break;
338 
339               if (j == filepos_count)
340                 {
341                   filepos[filepos_count].file_name = pp->file_name;
342                   filepos[filepos_count].line_number = (size_t)-1;
343                   filepos_count++;
344                 }
345             }
346         }
347       else
348         {
349           filepos = mp->filepos;
350           filepos_count = mp->filepos_count;
351         }
352 
353       if (uniforum)
354         {
355           size_t j;
356 
357           for (j = 0; j < filepos_count; ++j)
358             {
359               lex_pos_ty *pp = &filepos[j];
360               const char *cp = pp->file_name;
361               char *str;
362 
363               while (cp[0] == '.' && cp[1] == '/')
364                 cp += 2;
365               ostream_write_str (stream, "# ");
366               begin_css_class (stream, class_reference);
367               /* There are two Sun formats to choose from: SunOS and
368                  Solaris.  Use the Solaris form here.  */
369               str = xasprintf ("File: %s, line: %ld",
370                                cp, (long) pp->line_number);
371               ostream_write_str (stream, str);
372               end_css_class (stream, class_reference);
373               ostream_write_str (stream, "\n");
374               free (str);
375             }
376         }
377       else
378         {
379           size_t column;
380           size_t j;
381 
382           ostream_write_str (stream, "#:");
383           column = 2;
384           for (j = 0; j < filepos_count; ++j)
385             {
386               lex_pos_ty *pp;
387               char buffer[21];
388               const char *cp;
389               size_t len;
390 
391               pp = &filepos[j];
392               cp = pp->file_name;
393               while (cp[0] == '.' && cp[1] == '/')
394                 cp += 2;
395               if (filepos_comment_type == filepos_comment_file
396                   /* Some xgettext input formats, like RST, lack line
397                      numbers.  */
398                   || pp->line_number == (size_t)(-1))
399                 buffer[0] = '\0';
400               else
401                 sprintf (buffer, ":%ld", (long) pp->line_number);
402               len = strlen (cp) + strlen (buffer) + 1;
403               if (column > 2 && column + len > page_width)
404                 {
405                   ostream_write_str (stream, "\n#:");
406                   column = 2;
407                 }
408               ostream_write_str (stream, " ");
409               begin_css_class (stream, class_reference);
410               ostream_write_str (stream, cp);
411               ostream_write_str (stream, buffer);
412               end_css_class (stream, class_reference);
413               column += len;
414             }
415           ostream_write_str (stream, "\n");
416         }
417 
418       if (filepos != mp->filepos)
419         free (filepos);
420 
421       end_css_class (stream, class_reference_comment);
422     }
423 }
424 
425 
426 /* Output mp->is_fuzzy, mp->is_format, mp->range, mp->do_wrap as a comment
427    line.  */
428 
429 void
message_print_comment_flags(const message_ty * mp,ostream_t stream,bool debug)430 message_print_comment_flags (const message_ty *mp, ostream_t stream, bool debug)
431 {
432   if ((mp->is_fuzzy && mp->msgstr[0] != '\0')
433       || has_significant_format_p (mp->is_format)
434       || has_range_p (mp->range)
435       || mp->do_wrap == no)
436     {
437       bool first_flag = true;
438       size_t i;
439 
440       begin_css_class (stream, class_flag_comment);
441 
442       ostream_write_str (stream, "#,");
443 
444       /* We don't print the fuzzy flag if the msgstr is empty.  This
445          might be introduced by the user but we want to normalize the
446          output.  */
447       if (mp->is_fuzzy && mp->msgstr[0] != '\0')
448         {
449           ostream_write_str (stream, " ");
450           begin_css_class (stream, class_flag);
451           begin_css_class (stream, class_fuzzy_flag);
452           ostream_write_str (stream, "fuzzy");
453           end_css_class (stream, class_fuzzy_flag);
454           end_css_class (stream, class_flag);
455           first_flag = false;
456         }
457 
458       for (i = 0; i < NFORMATS; i++)
459         if (significant_format_p (mp->is_format[i]))
460           {
461             if (!first_flag)
462               ostream_write_str (stream, ",");
463 
464             ostream_write_str (stream, " ");
465             begin_css_class (stream, class_flag);
466             ostream_write_str (stream,
467                                make_format_description_string (mp->is_format[i],
468                                                                format_language[i],
469                                                                debug));
470             end_css_class (stream, class_flag);
471             first_flag = false;
472           }
473 
474       if (has_range_p (mp->range))
475         {
476           char *string;
477 
478           if (!first_flag)
479             ostream_write_str (stream, ",");
480 
481           ostream_write_str (stream, " ");
482           begin_css_class (stream, class_flag);
483           string = make_range_description_string (mp->range);
484           ostream_write_str (stream, string);
485           free (string);
486           end_css_class (stream, class_flag);
487           first_flag = false;
488         }
489 
490       if (mp->do_wrap == no)
491         {
492           if (!first_flag)
493             ostream_write_str (stream, ",");
494 
495           ostream_write_str (stream, " ");
496           begin_css_class (stream, class_flag);
497           ostream_write_str (stream,
498                              make_c_width_description_string (mp->do_wrap));
499           end_css_class (stream, class_flag);
500           first_flag = false;
501         }
502 
503       ostream_write_str (stream, "\n");
504 
505       end_css_class (stream, class_flag_comment);
506     }
507 }
508 
509 
510 /* ========= Some parameters for use by 'msgdomain_list_print_po'. ========= */
511 
512 
513 /* This variable controls the extent to which the page width applies.
514    True means it applies to message strings and file reference lines.
515    False means it applies to file reference lines only.  */
516 static bool wrap_strings = true;
517 
518 void
message_page_width_ignore()519 message_page_width_ignore ()
520 {
521   wrap_strings = false;
522 }
523 
524 
525 /* These three variables control the output style of the message_print
526    function.  Interface functions for them are to be used.  */
527 static bool indent = false;
528 static bool uniforum = false;
529 static bool escape = false;
530 
531 void
message_print_style_indent()532 message_print_style_indent ()
533 {
534   indent = true;
535 }
536 
537 void
message_print_style_uniforum()538 message_print_style_uniforum ()
539 {
540   uniforum = true;
541 }
542 
543 void
message_print_style_escape(bool flag)544 message_print_style_escape (bool flag)
545 {
546   escape = flag;
547 }
548 
549 void
message_print_style_filepos(enum filepos_comment_type type)550 message_print_style_filepos (enum filepos_comment_type type)
551 {
552   filepos_comment_type = type;
553 }
554 
555 
556 /* --add-location argument handling.  Return an error indicator.  */
557 bool
handle_filepos_comment_option(const char * option)558 handle_filepos_comment_option (const char *option)
559 {
560   if (option != NULL)
561     {
562       if (strcmp (option, "never") == 0 || strcmp (option, "no") == 0)
563         message_print_style_filepos (filepos_comment_none);
564       else if (strcmp (option, "full") == 0 || strcmp (option, "yes") == 0)
565         message_print_style_filepos (filepos_comment_full);
566       else if (strcmp (option, "file") == 0)
567         message_print_style_filepos (filepos_comment_file);
568       else
569         {
570           fprintf (stderr, "invalid --add-location argument: %s\n", option);
571           return true;
572         }
573     }
574   else
575     /* --add-location is equivalent to --add-location=full.  */
576     message_print_style_filepos (filepos_comment_full);
577   return false;
578 }
579 
580 
581 /* =============== msgdomain_list_print_po() and subroutines. =============== */
582 
583 
584 /* A version of memcpy optimized for the case n <= 1.  */
585 static inline void
memcpy_small(void * dst,const void * src,size_t n)586 memcpy_small (void *dst, const void *src, size_t n)
587 {
588   if (n > 0)
589     {
590       char *q = (char *) dst;
591       const char *p = (const char *) src;
592 
593       *q = *p;
594       if (--n > 0)
595         do *++q = *++p; while (--n > 0);
596     }
597 }
598 
599 
600 /* A version of memset optimized for the case n <= 1.  */
601 static inline void
memset_small(void * dst,char c,size_t n)602 memset_small (void *dst, char c, size_t n)
603 {
604   if (n > 0)
605     {
606       char *p = (char *) dst;
607 
608       *p = c;
609       if (--n > 0)
610         do *++p = c; while (--n > 0);
611     }
612 }
613 
614 
615 static void
wrap(const message_ty * mp,ostream_t stream,const char * line_prefix,int extra_indent,const char * css_class,const char * name,const char * value,enum is_wrap do_wrap,size_t page_width,const char * charset)616 wrap (const message_ty *mp, ostream_t stream,
617       const char *line_prefix, int extra_indent, const char *css_class,
618       const char *name, const char *value,
619       enum is_wrap do_wrap, size_t page_width,
620       const char *charset)
621 {
622   const char *canon_charset;
623   char *fmtdir;
624   char *fmtdirattr;
625   const char *s;
626   bool first_line;
627 #if HAVE_ICONV
628   const char *envval;
629   iconv_t conv;
630 #endif
631   bool weird_cjk;
632 
633   canon_charset = po_charset_canonicalize (charset);
634 
635 #if HAVE_ICONV
636   /* The old Solaris/openwin msgfmt and GNU msgfmt <= 0.10.35 don't know
637      about multibyte encodings, and require a spurious backslash after
638      every multibyte character whose last byte is 0x5C.  Some programs,
639      like vim, distribute PO files in this broken format.  It is important
640      for such programs that GNU msgmerge continues to support this old
641      PO file format when the Makefile requests it.  */
642   envval = getenv ("OLD_PO_FILE_OUTPUT");
643   if (envval != NULL && *envval != '\0')
644     /* Write a PO file in old format, with extraneous backslashes.  */
645     conv = (iconv_t)(-1);
646   else
647     if (canon_charset == NULL)
648       /* Invalid PO file encoding.  */
649       conv = (iconv_t)(-1);
650     else
651       /* Avoid glibc-2.1 bug with EUC-KR.  */
652 # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
653      && !defined _LIBICONV_VERSION
654       if (strcmp (canon_charset, "EUC-KR") == 0)
655         conv = (iconv_t)(-1);
656       else
657 # endif
658       /* Avoid Solaris 2.9 bug with GB2312, EUC-TW, BIG5, BIG5-HKSCS, GBK,
659          GB18030.  */
660 # if defined __sun && !defined _LIBICONV_VERSION
661       if (   strcmp (canon_charset, "GB2312") == 0
662           || strcmp (canon_charset, "EUC-TW") == 0
663           || strcmp (canon_charset, "BIG5") == 0
664           || strcmp (canon_charset, "BIG5-HKSCS") == 0
665           || strcmp (canon_charset, "GBK") == 0
666           || strcmp (canon_charset, "GB18030") == 0)
667         conv = (iconv_t)(-1);
668       else
669 # endif
670       /* Use iconv() to parse multibyte characters.  */
671       conv = iconv_open ("UTF-8", canon_charset);
672 
673   if (conv != (iconv_t)(-1))
674     weird_cjk = false;
675   else
676 #endif
677     if (canon_charset == NULL)
678       weird_cjk = false;
679     else
680       weird_cjk = po_is_charset_weird_cjk (canon_charset);
681 
682   if (canon_charset == NULL)
683     canon_charset = po_charset_ascii;
684 
685   /* Determine the extent of format string directives.  */
686   fmtdir = NULL;
687   fmtdirattr = NULL;
688   if (value[0] != '\0')
689     {
690       bool is_msgstr =
691         (strlen (name) >= 6 && memcmp (name, "msgstr", 6) == 0);
692         /* or equivalent: = (css_class == class_msgstr) */
693       size_t i;
694 
695       for (i = 0; i < NFORMATS; i++)
696         if (possible_format_p (mp->is_format[i]))
697           {
698             size_t len = strlen (value);
699             struct formatstring_parser *parser = formatstring_parsers[i];
700             char *invalid_reason = NULL;
701             void *descr;
702             const char *fdp;
703             const char *fd_end;
704             char *fdap;
705 
706             fmtdir = XCALLOC (len, char);
707             descr = parser->parse (value, is_msgstr, fmtdir, &invalid_reason);
708             if (descr != NULL)
709               parser->free (descr);
710 
711             /* Locate the FMTDIR_* bits and transform the array to an array
712                of attributes.  */
713             fmtdirattr = XCALLOC (len, char);
714             fd_end = fmtdir + len;
715             for (fdp = fmtdir, fdap = fmtdirattr; fdp < fd_end; fdp++, fdap++)
716               if (*fdp & FMTDIR_START)
717                 {
718                   const char *fdq;
719                   for (fdq = fdp; fdq < fd_end; fdq++)
720                     if (*fdq & (FMTDIR_END | FMTDIR_ERROR))
721                       break;
722                   if (!(fdq < fd_end))
723                     /* The ->parse method has determined the start of a
724                        formatstring directive but not stored a bit indicating
725                        its end. It is a bug in the ->parse method.  */
726                     abort ();
727                   if (*fdq & FMTDIR_ERROR)
728                     memset (fdap, ATTR_INVALID_FORMAT_DIRECTIVE, fdq - fdp + 1);
729                   else
730                     memset (fdap, ATTR_FORMAT_DIRECTIVE, fdq - fdp + 1);
731                   fdap += fdq - fdp;
732                   fdp = fdq;
733                 }
734               else
735                 *fdap = 0;
736 
737             break;
738           }
739     }
740 
741   /* Loop over the '\n' delimited portions of value.  */
742   s = value;
743   first_line = true;
744   do
745     {
746       /* The usual escapes, as defined by the ANSI C Standard.  */
747 #     define is_escape(c) \
748         ((c) == '\a' || (c) == '\b' || (c) == '\f' || (c) == '\n' \
749          || (c) == '\r' || (c) == '\t' || (c) == '\v')
750 
751       const char *es;
752       const char *ep;
753       size_t portion_len;
754       char *portion;
755       char *overrides;
756       char *attributes;
757       char *linebreaks;
758       char *pp;
759       char *op;
760       char *ap;
761       int startcol, startcol_after_break, width;
762       size_t i;
763 
764       for (es = s; *es != '\0'; )
765         if (*es++ == '\n')
766           break;
767 
768       /* Expand escape sequences in each portion.  */
769       for (ep = s, portion_len = 0; ep < es; ep++)
770         {
771           char c = *ep;
772           if (is_escape (c))
773             portion_len += 2;
774           else if (escape && !c_isprint ((unsigned char) c))
775             portion_len += 4;
776           else if (c == '\\' || c == '"')
777             portion_len += 2;
778           else
779             {
780 #if HAVE_ICONV
781               if (conv != (iconv_t)(-1))
782                 {
783                   /* Skip over a complete multi-byte character.  Don't
784                      interpret the second byte of a multi-byte character as
785                      ASCII.  This is needed for the BIG5, BIG5-HKSCS, GBK,
786                      GB18030, SHIFT_JIS, JOHAB encodings.  */
787                   char scratchbuf[64];
788                   const char *inptr = ep;
789                   size_t insize;
790                   char *outptr = &scratchbuf[0];
791                   size_t outsize = sizeof (scratchbuf);
792                   size_t res;
793 
794                   res = (size_t)(-1);
795                   for (insize = 1; inptr + insize <= es; insize++)
796                     {
797                       res = iconv (conv,
798                                    (ICONV_CONST char **) &inptr, &insize,
799                                    &outptr, &outsize);
800                       if (!(res == (size_t)(-1) && errno == EINVAL))
801                         break;
802                       /* We expect that no input bytes have been consumed
803                          so far.  */
804                       if (inptr != ep)
805                         abort ();
806                     }
807                   if (res == (size_t)(-1))
808                     {
809                       if (errno == EILSEQ)
810                         {
811                           po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
812                                      _("invalid multibyte sequence"));
813                           continue;
814                         }
815                       else if (errno == EINVAL)
816                         {
817                           /* This could happen if an incomplete
818                              multibyte sequence at the end of input
819                              bytes.  */
820                           po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
821                                      _("incomplete multibyte sequence"));
822                           continue;
823                         }
824                       else
825                         abort ();
826                     }
827                   insize = inptr - ep;
828                   portion_len += insize;
829                   ep += insize - 1;
830                 }
831               else
832 #endif
833                 {
834                   if (weird_cjk
835                       /* Special handling of encodings with CJK structure.  */
836                       && ep + 2 <= es
837                       && (unsigned char) ep[0] >= 0x80
838                       && (unsigned char) ep[1] >= 0x30)
839                     {
840                       portion_len += 2;
841                       ep += 1;
842                     }
843                   else
844                     portion_len += 1;
845                 }
846             }
847         }
848       portion = XNMALLOC (portion_len, char);
849       overrides = XNMALLOC (portion_len, char);
850       attributes = XNMALLOC (portion_len, char);
851       for (ep = s, pp = portion, op = overrides, ap = attributes; ep < es; ep++)
852         {
853           char c = *ep;
854           char attr = (fmtdirattr != NULL ? fmtdirattr[ep - value] : 0);
855           char brk = UC_BREAK_UNDEFINED;
856           /* Don't break inside format directives.  */
857           if (attr == ATTR_FORMAT_DIRECTIVE
858               && (fmtdir[ep - value] & FMTDIR_START) == 0)
859             brk = UC_BREAK_PROHIBITED;
860           if (is_escape (c))
861             {
862               switch (c)
863                 {
864                 case '\a': c = 'a'; break;
865                 case '\b': c = 'b'; break;
866                 case '\f': c = 'f'; break;
867                 case '\n': c = 'n'; break;
868                 case '\r': c = 'r'; break;
869                 case '\t': c = 't'; break;
870                 case '\v': c = 'v'; break;
871                 default: abort ();
872                 }
873               *pp++ = '\\';
874               *pp++ = c;
875               *op++ = brk;
876               *op++ = UC_BREAK_PROHIBITED;
877               *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
878               *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
879               /* We warn about any use of escape sequences beside
880                  '\n' and '\t'.  */
881               if (c != 'n' && c != 't')
882                 {
883                   char *error_message =
884                     xasprintf (_("internationalized messages should not contain the '\\%c' escape sequence"),
885                                c);
886                   po_xerror (PO_SEVERITY_WARNING, mp, NULL, 0, 0, false,
887                              error_message);
888                   free (error_message);
889                 }
890             }
891           else if (escape && !c_isprint ((unsigned char) c))
892             {
893               *pp++ = '\\';
894               *pp++ = '0' + (((unsigned char) c >> 6) & 7);
895               *pp++ = '0' + (((unsigned char) c >> 3) & 7);
896               *pp++ = '0' + ((unsigned char) c & 7);
897               *op++ = brk;
898               *op++ = UC_BREAK_PROHIBITED;
899               *op++ = UC_BREAK_PROHIBITED;
900               *op++ = UC_BREAK_PROHIBITED;
901               *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
902               *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
903               *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
904               *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
905             }
906           else if (c == '\\' || c == '"')
907             {
908               *pp++ = '\\';
909               *pp++ = c;
910               *op++ = brk;
911               *op++ = UC_BREAK_PROHIBITED;
912               *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
913               *ap++ = attr | ATTR_ESCAPE_SEQUENCE;
914             }
915           else
916             {
917 #if HAVE_ICONV
918               if (conv != (iconv_t)(-1))
919                 {
920                   /* Copy a complete multi-byte character.  Don't
921                      interpret the second byte of a multi-byte character as
922                      ASCII.  This is needed for the BIG5, BIG5-HKSCS, GBK,
923                      GB18030, SHIFT_JIS, JOHAB encodings.  */
924                   char scratchbuf[64];
925                   const char *inptr = ep;
926                   size_t insize;
927                   char *outptr = &scratchbuf[0];
928                   size_t outsize = sizeof (scratchbuf);
929                   size_t res;
930 
931                   res = (size_t)(-1);
932                   for (insize = 1; inptr + insize <= es; insize++)
933                     {
934                       res = iconv (conv,
935                                    (ICONV_CONST char **) &inptr, &insize,
936                                    &outptr, &outsize);
937                       if (!(res == (size_t)(-1) && errno == EINVAL))
938                         break;
939                       /* We expect that no input bytes have been consumed
940                          so far.  */
941                       if (inptr != ep)
942                         abort ();
943                     }
944                   if (res == (size_t)(-1))
945                     {
946                       if (errno == EILSEQ)
947                         {
948                           po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0,
949                                      false, _("invalid multibyte sequence"));
950                           continue;
951                         }
952                       else
953                         abort ();
954                     }
955                   insize = inptr - ep;
956                   memcpy_small (pp, ep, insize);
957                   pp += insize;
958                   *op = brk;
959                   memset_small (op + 1, UC_BREAK_PROHIBITED, insize - 1);
960                   op += insize;
961                   memset_small (ap, attr, insize);
962                   ap += insize;
963                   ep += insize - 1;
964                 }
965               else
966 #endif
967                 {
968                   if (weird_cjk
969                       /* Special handling of encodings with CJK structure.  */
970                       && ep + 2 <= es
971                       && (unsigned char) c >= 0x80
972                       && (unsigned char) ep[1] >= 0x30)
973                     {
974                       *pp++ = c;
975                       ep += 1;
976                       *pp++ = *ep;
977                       *op++ = brk;
978                       *op++ = UC_BREAK_PROHIBITED;
979                       *ap++ = attr;
980                       *ap++ = attr;
981                     }
982                   else
983                     {
984                       *pp++ = c;
985                       *op++ = brk;
986                       *ap++ = attr;
987                     }
988                 }
989             }
990         }
991 
992       /* Don't break immediately before the "\n" at the end.  */
993       if (es > s && es[-1] == '\n')
994         overrides[portion_len - 2] = UC_BREAK_PROHIBITED;
995 
996       linebreaks = XNMALLOC (portion_len, char);
997 
998       /* Subsequent lines after a break are all indented.
999          See INDENT-S.  */
1000       startcol_after_break = (line_prefix ? strlen (line_prefix) : 0);
1001       if (indent)
1002         startcol_after_break = (startcol_after_break + extra_indent + 8) & ~7;
1003       startcol_after_break++;
1004 
1005       /* The line width.  Allow room for the closing quote character.  */
1006       width = (wrap_strings && do_wrap != no ? page_width : INT_MAX) - 1;
1007       /* Adjust for indentation of subsequent lines.  */
1008       width -= startcol_after_break;
1009 
1010     recompute:
1011       /* The line starts with different things depending on whether it
1012          is the first line, and if we are using the indented style.
1013          See INDENT-F.  */
1014       startcol = (line_prefix ? strlen (line_prefix) : 0);
1015       if (first_line)
1016         {
1017           startcol += strlen (name);
1018           if (indent)
1019             startcol = (startcol + extra_indent + 8) & ~7;
1020           else
1021             startcol++;
1022         }
1023       else
1024         {
1025           if (indent)
1026             startcol = (startcol + extra_indent + 8) & ~7;
1027         }
1028       /* Allow room for the opening quote character.  */
1029       startcol++;
1030       /* Adjust for indentation of subsequent lines.  */
1031       startcol -= startcol_after_break;
1032 
1033       /* Do line breaking on the portion.  */
1034       ulc_width_linebreaks (portion, portion_len, width, startcol, 0,
1035                             overrides, canon_charset, linebreaks);
1036 
1037       /* If this is the first line, and we are not using the indented
1038          style, and the line would wrap, then use an empty first line
1039          and restart.  */
1040       if (first_line && !indent
1041           && portion_len > 0
1042           && (*es != '\0'
1043               || startcol > width
1044               || memchr (linebreaks, UC_BREAK_POSSIBLE, portion_len) != NULL))
1045         {
1046           if (line_prefix != NULL)
1047             ostream_write_str (stream, line_prefix);
1048           begin_css_class (stream, css_class);
1049           begin_css_class (stream, class_keyword);
1050           ostream_write_str (stream, name);
1051           end_css_class (stream, class_keyword);
1052           ostream_write_str (stream, " ");
1053           begin_css_class (stream, class_string);
1054           ostream_write_str (stream, "\"\"");
1055           end_css_class (stream, class_string);
1056           end_css_class (stream, css_class);
1057           ostream_write_str (stream, "\n");
1058           first_line = false;
1059           /* Recompute startcol and linebreaks.  */
1060           goto recompute;
1061         }
1062 
1063       /* Print the beginning of the line.  This will depend on whether
1064          this is the first line, and if the indented style is being
1065          used.  INDENT-F.  */
1066       {
1067         int currcol = 0;
1068 
1069         if (line_prefix != NULL)
1070           {
1071             ostream_write_str (stream, line_prefix);
1072             currcol = strlen (line_prefix);
1073           }
1074         begin_css_class (stream, css_class);
1075         if (first_line)
1076           {
1077             begin_css_class (stream, class_keyword);
1078             ostream_write_str (stream, name);
1079             currcol += strlen (name);
1080             end_css_class (stream, class_keyword);
1081             if (indent)
1082               {
1083                 if (extra_indent > 0)
1084                   ostream_write_mem (stream, "        ", extra_indent);
1085                 currcol += extra_indent;
1086                 ostream_write_mem (stream, "        ", 8 - (currcol & 7));
1087                 currcol = (currcol + 8) & ~7;
1088               }
1089             else
1090               {
1091                 ostream_write_str (stream, " ");
1092                 currcol++;
1093               }
1094             first_line = false;
1095           }
1096         else
1097           {
1098             if (indent)
1099               {
1100                 if (extra_indent > 0)
1101                   ostream_write_mem (stream, "        ", extra_indent);
1102                 currcol += extra_indent;
1103                 ostream_write_mem (stream, "        ", 8 - (currcol & 7));
1104                 currcol = (currcol + 8) & ~7;
1105               }
1106           }
1107       }
1108 
1109       /* Print the portion itself, with linebreaks where necessary.  */
1110       {
1111         char currattr = 0;
1112 
1113         begin_css_class (stream, class_string);
1114         ostream_write_str (stream, "\"");
1115         begin_css_class (stream, class_text);
1116 
1117         for (i = 0; i < portion_len; i++)
1118           {
1119             if (linebreaks[i] == UC_BREAK_POSSIBLE)
1120               {
1121                 int currcol;
1122 
1123                 /* Change currattr so that it becomes 0.  */
1124                 if (currattr & ATTR_ESCAPE_SEQUENCE)
1125                   {
1126                     end_css_class (stream, class_escape_sequence);
1127                     currattr &= ~ATTR_ESCAPE_SEQUENCE;
1128                   }
1129                 if (currattr & ATTR_FORMAT_DIRECTIVE)
1130                   {
1131                     end_css_class (stream, class_format_directive);
1132                     currattr &= ~ATTR_FORMAT_DIRECTIVE;
1133                   }
1134                 else if (currattr & ATTR_INVALID_FORMAT_DIRECTIVE)
1135                   {
1136                     end_css_class (stream, class_invalid_format_directive);
1137                     currattr &= ~ATTR_INVALID_FORMAT_DIRECTIVE;
1138                   }
1139                 if (!(currattr == 0))
1140                   abort ();
1141 
1142                 end_css_class (stream, class_text);
1143                 ostream_write_str (stream, "\"");
1144                 end_css_class (stream, class_string);
1145                 end_css_class (stream, css_class);
1146                 ostream_write_str (stream, "\n");
1147                 currcol = 0;
1148                 /* INDENT-S.  */
1149                 if (line_prefix != NULL)
1150                   {
1151                     ostream_write_str (stream, line_prefix);
1152                     currcol = strlen (line_prefix);
1153                   }
1154                 begin_css_class (stream, css_class);
1155                 if (indent)
1156                   {
1157                     ostream_write_mem (stream, "        ", 8 - (currcol & 7));
1158                     currcol = (currcol + 8) & ~7;
1159                   }
1160                 begin_css_class (stream, class_string);
1161                 ostream_write_str (stream, "\"");
1162                 begin_css_class (stream, class_text);
1163               }
1164             /* Change currattr so that it matches attributes[i].  */
1165             if (attributes[i] != currattr)
1166               {
1167                 /* class_escape_sequence occurs inside class_format_directive
1168                    and class_invalid_format_directive, so clear it first.  */
1169                 if (currattr & ATTR_ESCAPE_SEQUENCE)
1170                   {
1171                     end_css_class (stream, class_escape_sequence);
1172                     currattr &= ~ATTR_ESCAPE_SEQUENCE;
1173                   }
1174                 if (~attributes[i] & currattr & ATTR_FORMAT_DIRECTIVE)
1175                   {
1176                     end_css_class (stream, class_format_directive);
1177                     currattr &= ~ATTR_FORMAT_DIRECTIVE;
1178                   }
1179                 else if (~attributes[i] & currattr & ATTR_INVALID_FORMAT_DIRECTIVE)
1180                   {
1181                     end_css_class (stream, class_invalid_format_directive);
1182                     currattr &= ~ATTR_INVALID_FORMAT_DIRECTIVE;
1183                   }
1184                 if (attributes[i] & ~currattr & ATTR_FORMAT_DIRECTIVE)
1185                   {
1186                     begin_css_class (stream, class_format_directive);
1187                     currattr |= ATTR_FORMAT_DIRECTIVE;
1188                   }
1189                 else if (attributes[i] & ~currattr & ATTR_INVALID_FORMAT_DIRECTIVE)
1190                   {
1191                     begin_css_class (stream, class_invalid_format_directive);
1192                     currattr |= ATTR_INVALID_FORMAT_DIRECTIVE;
1193                   }
1194                 /* class_escape_sequence occurs inside class_format_directive
1195                    and class_invalid_format_directive, so set it last.  */
1196                 if (attributes[i] & ~currattr & ATTR_ESCAPE_SEQUENCE)
1197                   {
1198                     begin_css_class (stream, class_escape_sequence);
1199                     currattr |= ATTR_ESCAPE_SEQUENCE;
1200                   }
1201               }
1202             ostream_write_mem (stream, &portion[i], 1);
1203           }
1204 
1205         /* Change currattr so that it becomes 0.  */
1206         if (currattr & ATTR_ESCAPE_SEQUENCE)
1207           {
1208             end_css_class (stream, class_escape_sequence);
1209             currattr &= ~ATTR_ESCAPE_SEQUENCE;
1210           }
1211         if (currattr & ATTR_FORMAT_DIRECTIVE)
1212           {
1213             end_css_class (stream, class_format_directive);
1214             currattr &= ~ATTR_FORMAT_DIRECTIVE;
1215           }
1216         else if (currattr & ATTR_INVALID_FORMAT_DIRECTIVE)
1217           {
1218             end_css_class (stream, class_invalid_format_directive);
1219             currattr &= ~ATTR_INVALID_FORMAT_DIRECTIVE;
1220           }
1221         if (!(currattr == 0))
1222           abort ();
1223 
1224         end_css_class (stream, class_text);
1225         ostream_write_str (stream, "\"");
1226         end_css_class (stream, class_string);
1227         end_css_class (stream, css_class);
1228         ostream_write_str (stream, "\n");
1229       }
1230 
1231       free (linebreaks);
1232       free (attributes);
1233       free (overrides);
1234       free (portion);
1235 
1236       s = es;
1237 #     undef is_escape
1238     }
1239   while (*s);
1240 
1241   if (fmtdirattr != NULL)
1242     free (fmtdirattr);
1243   if (fmtdir != NULL)
1244     free (fmtdir);
1245 
1246 #if HAVE_ICONV
1247   if (conv != (iconv_t)(-1))
1248     iconv_close (conv);
1249 #endif
1250 }
1251 
1252 
1253 static void
print_blank_line(ostream_t stream)1254 print_blank_line (ostream_t stream)
1255 {
1256   if (uniforum)
1257     {
1258       begin_css_class (stream, class_comment);
1259       ostream_write_str (stream, "#\n");
1260       end_css_class (stream, class_comment);
1261     }
1262   else
1263     ostream_write_str (stream, "\n");
1264 }
1265 
1266 
1267 static void
message_print(const message_ty * mp,ostream_t stream,const char * charset,size_t page_width,bool blank_line,bool debug)1268 message_print (const message_ty *mp, ostream_t stream,
1269                const char *charset, size_t page_width, bool blank_line,
1270                bool debug)
1271 {
1272   int extra_indent;
1273 
1274   /* Separate messages with a blank line.  Uniforum doesn't like blank
1275      lines, so use an empty comment (unless there already is one).  */
1276   if (blank_line && (!uniforum
1277                      || mp->comment == NULL
1278                      || mp->comment->nitems == 0
1279                      || mp->comment->item[0][0] != '\0'))
1280     print_blank_line (stream);
1281 
1282   if (is_header (mp))
1283     begin_css_class (stream, class_header);
1284   else if (mp->msgstr[0] == '\0')
1285     begin_css_class (stream, class_untranslated);
1286   else if (mp->is_fuzzy)
1287     begin_css_class (stream, class_fuzzy);
1288   else
1289     begin_css_class (stream, class_translated);
1290 
1291   begin_css_class (stream, class_comment);
1292 
1293   /* Print translator comment if available.  */
1294   message_print_comment (mp, stream);
1295 
1296   /* Print xgettext extracted comments.  */
1297   message_print_comment_dot (mp, stream);
1298 
1299   /* Print the file position comments.  This will help a human who is
1300      trying to navigate the sources.  There is no problem of getting
1301      repeated positions, because duplicates are checked for.  */
1302   message_print_comment_filepos (mp, stream, uniforum, page_width);
1303 
1304   /* Print flag information in special comment.  */
1305   message_print_comment_flags (mp, stream, debug);
1306 
1307   /* Print the previous msgid.  This helps the translator when the msgid has
1308      only slightly changed.  */
1309   begin_css_class (stream, class_previous_comment);
1310   if (mp->prev_msgctxt != NULL)
1311     wrap (mp, stream, "#| ", 0, class_previous, "msgctxt", mp->prev_msgctxt,
1312           mp->do_wrap, page_width, charset);
1313   if (mp->prev_msgid != NULL)
1314     wrap (mp, stream, "#| ", 0, class_previous, "msgid", mp->prev_msgid,
1315           mp->do_wrap, page_width, charset);
1316   if (mp->prev_msgid_plural != NULL)
1317     wrap (mp, stream, "#| ", 0, class_previous, "msgid_plural",
1318           mp->prev_msgid_plural, mp->do_wrap, page_width, charset);
1319   end_css_class (stream, class_previous_comment);
1320   extra_indent = (mp->prev_msgctxt != NULL || mp->prev_msgid != NULL
1321                   || mp->prev_msgid_plural != NULL
1322                   ? 3
1323                   : 0);
1324 
1325   end_css_class (stream, class_comment);
1326 
1327   /* Print each of the message components.  Wrap them nicely so they
1328      are as readable as possible.  If there is no recorded msgstr for
1329      this domain, emit an empty string.  */
1330   if (mp->msgctxt != NULL && !is_ascii_string (mp->msgctxt)
1331       && po_charset_canonicalize (charset) != po_charset_utf8)
1332     {
1333       char *warning_message =
1334         xasprintf (_("\
1335 The following msgctxt contains non-ASCII characters.\n\
1336 This will cause problems to translators who use a character encoding\n\
1337 different from yours. Consider using a pure ASCII msgctxt instead.\n\
1338 %s\n"), mp->msgctxt);
1339       po_xerror (PO_SEVERITY_WARNING, mp, NULL, 0, 0, true, warning_message);
1340       free (warning_message);
1341     }
1342   if (!is_ascii_string (mp->msgid)
1343       && po_charset_canonicalize (charset) != po_charset_utf8)
1344     {
1345       char *warning_message =
1346         xasprintf (_("\
1347 The following msgid contains non-ASCII characters.\n\
1348 This will cause problems to translators who use a character encoding\n\
1349 different from yours. Consider using a pure ASCII msgid instead.\n\
1350 %s\n"), mp->msgid);
1351       po_xerror (PO_SEVERITY_WARNING, mp, NULL, 0, 0, true, warning_message);
1352       free (warning_message);
1353     }
1354   if (mp->msgctxt != NULL)
1355     wrap (mp, stream, NULL, extra_indent, class_msgid, "msgctxt", mp->msgctxt,
1356           mp->do_wrap, page_width, charset);
1357   wrap (mp, stream, NULL, extra_indent, class_msgid, "msgid", mp->msgid,
1358         mp->do_wrap, page_width, charset);
1359   if (mp->msgid_plural != NULL)
1360     wrap (mp, stream, NULL, extra_indent, class_msgid, "msgid_plural",
1361           mp->msgid_plural, mp->do_wrap, page_width, charset);
1362 
1363   if (mp->msgid_plural == NULL)
1364     wrap (mp, stream, NULL, extra_indent, class_msgstr, "msgstr", mp->msgstr,
1365           mp->do_wrap, page_width, charset);
1366   else
1367     {
1368       char prefix_buf[20];
1369       unsigned int i;
1370       const char *p;
1371 
1372       for (p = mp->msgstr, i = 0;
1373            p < mp->msgstr + mp->msgstr_len;
1374            p += strlen (p) + 1, i++)
1375         {
1376           sprintf (prefix_buf, "msgstr[%u]", i);
1377           wrap (mp, stream, NULL, extra_indent, class_msgstr, prefix_buf, p,
1378                 mp->do_wrap, page_width, charset);
1379         }
1380     }
1381 
1382   if (is_header (mp))
1383     end_css_class (stream, class_header);
1384   else if (mp->msgstr[0] == '\0')
1385     end_css_class (stream, class_untranslated);
1386   else if (mp->is_fuzzy)
1387     end_css_class (stream, class_fuzzy);
1388   else
1389     end_css_class (stream, class_translated);
1390 }
1391 
1392 
1393 static void
message_print_obsolete(const message_ty * mp,ostream_t stream,const char * charset,size_t page_width,bool blank_line,bool debug)1394 message_print_obsolete (const message_ty *mp, ostream_t stream,
1395                         const char *charset, size_t page_width, bool blank_line,
1396                         bool debug)
1397 {
1398   int extra_indent;
1399 
1400   /* If msgstr is the empty string we print nothing.  */
1401   if (mp->msgstr[0] == '\0')
1402     return;
1403 
1404   /* Separate messages with a blank line.  Uniforum doesn't like blank
1405      lines, so use an empty comment (unless there already is one).  */
1406   if (blank_line)
1407     print_blank_line (stream);
1408 
1409   begin_css_class (stream, class_obsolete);
1410 
1411   begin_css_class (stream, class_comment);
1412 
1413   /* Print translator comment if available.  */
1414   message_print_comment (mp, stream);
1415 
1416   /* Print xgettext extracted comments (normally empty).  */
1417   message_print_comment_dot (mp, stream);
1418 
1419   /* Print the file position comments (normally empty).  */
1420   message_print_comment_filepos (mp, stream, uniforum, page_width);
1421 
1422   /* Print flag information in special comment.
1423      Preserve only
1424        - the fuzzy flag, because it is important for the translator when the
1425          message becomes active again,
1426        - the no-wrap flag, because we use mp->do_wrap below for the wrapping,
1427          therefore further processing through 'msgcat' needs to use the same
1428          value of do_wrap,
1429        - the *-format flags, because the wrapping depends on these flags (see
1430          'Don't break inside format directives' comment), therefore further
1431          processing through 'msgcat' needs to use the same values of is_format.
1432      This is a trimmed-down variant of message_print_comment_flags.  */
1433   if (mp->is_fuzzy
1434       || has_significant_format_p (mp->is_format)
1435       || mp->do_wrap == no)
1436     {
1437       bool first_flag = true;
1438       size_t i;
1439 
1440       ostream_write_str (stream, "#,");
1441 
1442       if (mp->is_fuzzy)
1443         {
1444           ostream_write_str (stream, " fuzzy");
1445           first_flag = false;
1446         }
1447 
1448       for (i = 0; i < NFORMATS; i++)
1449         if (significant_format_p (mp->is_format[i]))
1450           {
1451             if (!first_flag)
1452               ostream_write_str (stream, ",");
1453 
1454             ostream_write_str (stream, " ");
1455             ostream_write_str (stream,
1456                                make_format_description_string (mp->is_format[i],
1457                                                                format_language[i],
1458                                                                debug));
1459             first_flag = false;
1460           }
1461 
1462       if (mp->do_wrap == no)
1463         {
1464           if (!first_flag)
1465             ostream_write_str (stream, ",");
1466 
1467           ostream_write_str (stream, " ");
1468           ostream_write_str (stream,
1469                              make_c_width_description_string (mp->do_wrap));
1470           first_flag = false;
1471         }
1472 
1473       ostream_write_str (stream, "\n");
1474     }
1475 
1476   /* Print the previous msgid.  This helps the translator when the msgid has
1477      only slightly changed.  */
1478   begin_css_class (stream, class_previous_comment);
1479   if (mp->prev_msgctxt != NULL)
1480     wrap (mp, stream, "#~| ", 0, class_previous, "msgctxt", mp->prev_msgctxt,
1481           mp->do_wrap, page_width, charset);
1482   if (mp->prev_msgid != NULL)
1483     wrap (mp, stream, "#~| ", 0, class_previous, "msgid", mp->prev_msgid,
1484           mp->do_wrap, page_width, charset);
1485   if (mp->prev_msgid_plural != NULL)
1486     wrap (mp, stream, "#~| ", 0, class_previous, "msgid_plural",
1487           mp->prev_msgid_plural, mp->do_wrap, page_width, charset);
1488   end_css_class (stream, class_previous_comment);
1489   extra_indent = (mp->prev_msgctxt != NULL || mp->prev_msgid != NULL
1490                   || mp->prev_msgid_plural != NULL
1491                   ? 1
1492                   : 0);
1493 
1494   end_css_class (stream, class_comment);
1495 
1496   /* Print each of the message components.  Wrap them nicely so they
1497      are as readable as possible.  */
1498   if (mp->msgctxt != NULL && !is_ascii_string (mp->msgctxt)
1499       && po_charset_canonicalize (charset) != po_charset_utf8)
1500     {
1501       char *warning_message =
1502         xasprintf (_("\
1503 The following msgctxt contains non-ASCII characters.\n\
1504 This will cause problems to translators who use a character encoding\n\
1505 different from yours. Consider using a pure ASCII msgctxt instead.\n\
1506 %s\n"), mp->msgctxt);
1507       po_xerror (PO_SEVERITY_WARNING, mp, NULL, 0, 0, true, warning_message);
1508       free (warning_message);
1509     }
1510   if (!is_ascii_string (mp->msgid)
1511       && po_charset_canonicalize (charset) != po_charset_utf8)
1512     {
1513       char *warning_message =
1514         xasprintf (_("\
1515 The following msgid contains non-ASCII characters.\n\
1516 This will cause problems to translators who use a character encoding\n\
1517 different from yours. Consider using a pure ASCII msgid instead.\n\
1518 %s\n"), mp->msgid);
1519       po_xerror (PO_SEVERITY_WARNING, mp, NULL, 0, 0, true, warning_message);
1520       free (warning_message);
1521     }
1522   if (mp->msgctxt != NULL)
1523     wrap (mp, stream, "#~ ", extra_indent, class_msgid, "msgctxt", mp->msgctxt,
1524           mp->do_wrap, page_width, charset);
1525   wrap (mp, stream, "#~ ", extra_indent, class_msgid, "msgid", mp->msgid,
1526         mp->do_wrap, page_width, charset);
1527   if (mp->msgid_plural != NULL)
1528     wrap (mp, stream, "#~ ", extra_indent, class_msgid, "msgid_plural",
1529           mp->msgid_plural, mp->do_wrap, page_width, charset);
1530 
1531   if (mp->msgid_plural == NULL)
1532     wrap (mp, stream, "#~ ", extra_indent, class_msgstr, "msgstr", mp->msgstr,
1533           mp->do_wrap, page_width, charset);
1534   else
1535     {
1536       char prefix_buf[20];
1537       unsigned int i;
1538       const char *p;
1539 
1540       for (p = mp->msgstr, i = 0;
1541            p < mp->msgstr + mp->msgstr_len;
1542            p += strlen (p) + 1, i++)
1543         {
1544           sprintf (prefix_buf, "msgstr[%u]", i);
1545           wrap (mp, stream, "#~ ", extra_indent, class_msgstr, prefix_buf, p,
1546                 mp->do_wrap, page_width, charset);
1547         }
1548     }
1549 
1550   end_css_class (stream, class_obsolete);
1551 }
1552 
1553 
1554 static void
msgdomain_list_print_po(msgdomain_list_ty * mdlp,ostream_t stream,size_t page_width,bool debug)1555 msgdomain_list_print_po (msgdomain_list_ty *mdlp, ostream_t stream,
1556                          size_t page_width, bool debug)
1557 {
1558   size_t j, k;
1559   bool blank_line;
1560 
1561   /* Write out the messages for each domain.  */
1562   blank_line = false;
1563   for (k = 0; k < mdlp->nitems; k++)
1564     {
1565       message_list_ty *mlp;
1566       const char *header;
1567       const char *charset;
1568       char *allocated_charset;
1569 
1570       /* If the first domain is the default, don't bother emitting
1571          the domain name, because it is the default.  */
1572       if (!(k == 0
1573             && strcmp (mdlp->item[k]->domain, MESSAGE_DOMAIN_DEFAULT) == 0))
1574         {
1575           if (blank_line)
1576             print_blank_line (stream);
1577           begin_css_class (stream, class_keyword);
1578           ostream_write_str (stream, "domain");
1579           end_css_class (stream, class_keyword);
1580           ostream_write_str (stream, " ");
1581           begin_css_class (stream, class_string);
1582           ostream_write_str (stream, "\"");
1583           begin_css_class (stream, class_text);
1584           ostream_write_str (stream, mdlp->item[k]->domain);
1585           end_css_class (stream, class_text);
1586           ostream_write_str (stream, "\"");
1587           end_css_class (stream, class_string);
1588           ostream_write_str (stream, "\n");
1589           blank_line = true;
1590         }
1591 
1592       mlp = mdlp->item[k]->messages;
1593 
1594       /* Search the header entry.  */
1595       header = NULL;
1596       for (j = 0; j < mlp->nitems; ++j)
1597         if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
1598           {
1599             header = mlp->item[j]->msgstr;
1600             break;
1601           }
1602 
1603       /* Extract the charset name.  */
1604       charset = "ASCII";
1605       allocated_charset = NULL;
1606       if (header != NULL)
1607         {
1608           const char *charsetstr = c_strstr (header, "charset=");
1609 
1610           if (charsetstr != NULL)
1611             {
1612               size_t len;
1613 
1614               charsetstr += strlen ("charset=");
1615               len = strcspn (charsetstr, " \t\n");
1616               allocated_charset = (char *) xmalloca (len + 1);
1617               memcpy (allocated_charset, charsetstr, len);
1618               allocated_charset[len] = '\0';
1619               charset = allocated_charset;
1620 
1621               /* Treat the dummy default value as if it were absent.  */
1622               if (strcmp (charset, "CHARSET") == 0)
1623                 charset = "ASCII";
1624             }
1625         }
1626 
1627       /* Write out each of the messages for this domain.  */
1628       for (j = 0; j < mlp->nitems; ++j)
1629         if (!mlp->item[j]->obsolete)
1630           {
1631             message_print (mlp->item[j], stream, charset, page_width,
1632                            blank_line, debug);
1633             blank_line = true;
1634           }
1635 
1636       /* Write out each of the obsolete messages for this domain.  */
1637       for (j = 0; j < mlp->nitems; ++j)
1638         if (mlp->item[j]->obsolete)
1639           {
1640             message_print_obsolete (mlp->item[j], stream, charset, page_width,
1641                                     blank_line, debug);
1642             blank_line = true;
1643           }
1644 
1645       if (allocated_charset != NULL)
1646         freea (allocated_charset);
1647     }
1648 }
1649 
1650 
1651 /* Describes a PO file in .po syntax.  */
1652 const struct catalog_output_format output_format_po =
1653 {
1654   msgdomain_list_print_po,              /* print */
1655   false,                                /* requires_utf8 */
1656   true,                                 /* supports_color */
1657   true,                                 /* supports_multiple_domains */
1658   true,                                 /* supports_contexts */
1659   true,                                 /* supports_plurals */
1660   true,                                 /* sorts_obsoletes_to_end */
1661   false,                                /* alternative_is_po */
1662   false                                 /* alternative_is_java_class */
1663 };
1664