1 /* GNU gettext - internationalization aids
2    Copyright (C) 1995-1998, 2000-2006 Free Software Foundation, Inc.
3 
4    This file was written by Peter Miller <millerp@canb.auug.org.au>
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software Foundation,
18    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
19 
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23 #include <alloca.h>
24 
25 /* Specification.  */
26 #include "write-po.h"
27 
28 #include <errno.h>
29 #include <limits.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 
34 #if HAVE_ICONV
35 # include <iconv.h>
36 #endif
37 
38 #include "c-ctype.h"
39 #include "po-charset.h"
40 #include "linebreak.h"
41 #include "msgl-ascii.h"
42 #include "write-properties.h"
43 #include "write-stringtable.h"
44 #include "xalloc.h"
45 #include "xallocsa.h"
46 #include "c-strstr.h"
47 #include "xvasprintf.h"
48 #include "po-xerror.h"
49 #include "gettext.h"
50 
51 /* Our regular abbreviation.  */
52 #define _(str) gettext (str)
53 
54 #if HAVE_DECL_PUTC_UNLOCKED
55 # undef putc
56 # define putc putc_unlocked
57 #endif
58 
59 
60 /* =================== Putting together a #, flags line. =================== */
61 
62 
63 /* Convert IS_FORMAT in the context of programming language LANG to a flag
64    string for use in #, flags.  */
65 
66 const char *
make_format_description_string(enum is_format is_format,const char * lang,bool debug)67 make_format_description_string (enum is_format is_format, const char *lang,
68 				bool debug)
69 {
70   static char result[100];
71 
72   switch (is_format)
73     {
74     case possible:
75       if (debug)
76 	{
77 	  sprintf (result, " possible-%s-format", lang);
78 	  break;
79 	}
80       /* FALLTHROUGH */
81     case yes_according_to_context:
82     case yes:
83       sprintf (result, " %s-format", lang);
84       break;
85     case no:
86       sprintf (result, " no-%s-format", lang);
87       break;
88     default:
89       /* The others have already been filtered out by significant_format_p.  */
90       abort ();
91     }
92 
93   return result;
94 }
95 
96 
97 /* Return true if IS_FORMAT is worth mentioning in a #, flags list.  */
98 
99 bool
significant_format_p(enum is_format is_format)100 significant_format_p (enum is_format is_format)
101 {
102   return is_format != undecided && is_format != impossible;
103 }
104 
105 
106 /* Return true if one of IS_FORMAT is worth mentioning in a #, flags list.  */
107 
108 static bool
has_significant_format_p(const enum is_format is_format[NFORMATS])109 has_significant_format_p (const enum is_format is_format[NFORMATS])
110 {
111   size_t i;
112 
113   for (i = 0; i < NFORMATS; i++)
114     if (significant_format_p (is_format[i]))
115       return true;
116   return false;
117 }
118 
119 
120 /* Convert a wrapping flag DO_WRAP to a string for use in #, flags.  */
121 
122 static const char *
make_c_width_description_string(enum is_wrap do_wrap)123 make_c_width_description_string (enum is_wrap do_wrap)
124 {
125   const char *result = NULL;
126 
127   switch (do_wrap)
128     {
129     case yes:
130       result = " wrap";
131       break;
132     case no:
133       result = " no-wrap";
134       break;
135     default:
136       abort ();
137     }
138 
139   return result;
140 }
141 
142 
143 /* ================ Output parts of a message, as comments. ================ */
144 
145 
146 /* Output mp->comment as a set of comment lines.  */
147 
148 void
message_print_comment(const message_ty * mp,FILE * fp)149 message_print_comment (const message_ty *mp, FILE *fp)
150 {
151   if (mp->comment != NULL)
152     {
153       size_t j;
154 
155       for (j = 0; j < mp->comment->nitems; ++j)
156 	{
157 	  const char *s = mp->comment->item[j];
158 	  do
159 	    {
160 	      const char *e;
161 	      putc ('#', fp);
162 	      if (*s != '\0' && *s != ' ')
163 		putc (' ', fp);
164 	      e = strchr (s, '\n');
165 	      if (e == NULL)
166 		{
167 		  fputs (s, fp);
168 		  s = NULL;
169 		}
170 	      else
171 		{
172 		  fwrite (s, 1, e - s, fp);
173 		  s = e + 1;
174 		}
175 	      putc ('\n', fp);
176 	    }
177 	  while (s != NULL);
178 	}
179     }
180 }
181 
182 
183 /* Output mp->comment_dot as a set of comment lines.  */
184 
185 void
message_print_comment_dot(const message_ty * mp,FILE * fp)186 message_print_comment_dot (const message_ty *mp, FILE *fp)
187 {
188   if (mp->comment_dot != NULL)
189     {
190       size_t j;
191 
192       for (j = 0; j < mp->comment_dot->nitems; ++j)
193 	{
194 	  const char *s = mp->comment_dot->item[j];
195 	  putc ('#', fp);
196 	  putc ('.', fp);
197 	  if (*s != '\0' && *s != ' ')
198 	    putc (' ', fp);
199 	  fputs (s, fp);
200 	  putc ('\n', fp);
201 	}
202     }
203 }
204 
205 
206 /* Output mp->filepos as a set of comment lines.  */
207 
208 void
message_print_comment_filepos(const message_ty * mp,FILE * fp,bool uniforum,size_t page_width)209 message_print_comment_filepos (const message_ty *mp, FILE *fp,
210 			       bool uniforum, size_t page_width)
211 {
212   if (mp->filepos_count != 0)
213     {
214       if (uniforum)
215 	{
216 	  size_t j;
217 
218 	  for (j = 0; j < mp->filepos_count; ++j)
219 	    {
220 	      lex_pos_ty *pp = &mp->filepos[j];
221 	      char *cp = pp->file_name;
222 	      while (cp[0] == '.' && cp[1] == '/')
223 		cp += 2;
224 	      /* There are two Sun formats to choose from: SunOS and
225 		 Solaris.  Use the Solaris form here.  */
226 	      fprintf (fp, "# File: %s, line: %ld\n",
227 		       cp, (long) pp->line_number);
228 	    }
229 	}
230       else
231 	{
232 	  size_t column;
233 	  size_t j;
234 
235 	  fputs ("#:", fp);
236 	  column = 2;
237 	  for (j = 0; j < mp->filepos_count; ++j)
238 	    {
239 	      lex_pos_ty *pp;
240 	      char buffer[21];
241 	      char *cp;
242 	      size_t len;
243 
244 	      pp = &mp->filepos[j];
245 	      cp = pp->file_name;
246 	      while (cp[0] == '.' && cp[1] == '/')
247 		cp += 2;
248 	      /* Some xgettext input formats, like RST, lack line numbers.  */
249 	      if (pp->line_number == (size_t)(-1))
250 		buffer[0] = '\0';
251 	      else
252 		sprintf (buffer, ":%ld", (long) pp->line_number);
253 	      len = strlen (cp) + strlen (buffer) + 1;
254 	      if (column > 2 && column + len >= page_width)
255 		{
256 		  fputs ("\n#:", fp);
257 		  column = 2;
258 		}
259 	      fprintf (fp, " %s%s", cp, buffer);
260 	      column += len;
261 	    }
262 	  putc ('\n', fp);
263 	}
264     }
265 }
266 
267 
268 /* Output mp->is_fuzzy, mp->is_format, mp->do_wrap as a comment line.  */
269 
270 void
message_print_comment_flags(const message_ty * mp,FILE * fp,bool debug)271 message_print_comment_flags (const message_ty *mp, FILE *fp, bool debug)
272 {
273   if ((mp->is_fuzzy && mp->msgstr[0] != '\0')
274       || has_significant_format_p (mp->is_format)
275       || mp->do_wrap == no)
276     {
277       bool first_flag = true;
278       size_t i;
279 
280       putc ('#', fp);
281       putc (',', fp);
282 
283       /* We don't print the fuzzy flag if the msgstr is empty.  This
284 	 might be introduced by the user but we want to normalize the
285 	 output.  */
286       if (mp->is_fuzzy && mp->msgstr[0] != '\0')
287 	{
288 	  fputs (" fuzzy", fp);
289 	  first_flag = false;
290 	}
291 
292       for (i = 0; i < NFORMATS; i++)
293 	if (significant_format_p (mp->is_format[i]))
294 	  {
295 	    if (!first_flag)
296 	      putc (',', fp);
297 
298 	    fputs (make_format_description_string (mp->is_format[i],
299 						   format_language[i], debug),
300 		   fp);
301 	    first_flag = false;
302 	  }
303 
304       if (mp->do_wrap == no)
305 	{
306 	  if (!first_flag)
307 	    putc (',', fp);
308 
309 	  fputs (make_c_width_description_string (mp->do_wrap), fp);
310 	  first_flag = false;
311 	}
312 
313       putc ('\n', fp);
314     }
315 }
316 
317 
318 /* ========= Some parameters for use by 'msgdomain_list_print_po'. ========= */
319 
320 
321 /* This variable controls the extent to which the page width applies.
322    True means it applies to message strings and file reference lines.
323    False means it applies to file reference lines only.  */
324 static bool wrap_strings = true;
325 
326 void
message_page_width_ignore()327 message_page_width_ignore ()
328 {
329   wrap_strings = false;
330 }
331 
332 
333 /* These three variables control the output style of the message_print
334    function.  Interface functions for them are to be used.  */
335 static bool indent = false;
336 static bool uniforum = false;
337 static bool escape = false;
338 
339 void
message_print_style_indent()340 message_print_style_indent ()
341 {
342   indent = true;
343 }
344 
345 void
message_print_style_uniforum()346 message_print_style_uniforum ()
347 {
348   uniforum = true;
349 }
350 
351 void
message_print_style_escape(bool flag)352 message_print_style_escape (bool flag)
353 {
354   escape = flag;
355 }
356 
357 
358 /* =============== msgdomain_list_print_po() and subroutines. =============== */
359 
360 
361 /* A version of memcpy optimized for the case n <= 1.  */
362 static inline void
memcpy_small(void * dst,const void * src,size_t n)363 memcpy_small (void *dst, const void *src, size_t n)
364 {
365   if (n > 0)
366     {
367       char *q = (char *) dst;
368       const char *p = (const char *) src;
369 
370       *q = *p;
371       if (--n > 0)
372 	do *++q = *++p; while (--n > 0);
373     }
374 }
375 
376 
377 static void
wrap(const message_ty * mp,FILE * fp,const char * line_prefix,int extra_indent,const char * name,const char * value,enum is_wrap do_wrap,size_t page_width,const char * charset)378 wrap (const message_ty *mp, FILE *fp, const char *line_prefix, int extra_indent,
379       const char *name, const char *value,
380       enum is_wrap do_wrap, size_t page_width,
381       const char *charset)
382 {
383   const char *canon_charset;
384   const char *s;
385   bool first_line;
386 #if HAVE_ICONV
387   const char *envval;
388   iconv_t conv;
389 #endif
390   bool weird_cjk;
391 
392   canon_charset = po_charset_canonicalize (charset);
393 
394 #if HAVE_ICONV
395   /* The old Solaris/openwin msgfmt and GNU msgfmt <= 0.10.35 don't know
396      about multibyte encodings, and require a spurious backslash after
397      every multibyte character whose last byte is 0x5C.  Some programs,
398      like vim, distribute PO files in this broken format.  It is important
399      for such programs that GNU msgmerge continues to support this old
400      PO file format when the Makefile requests it.  */
401   envval = getenv ("OLD_PO_FILE_OUTPUT");
402   if (envval != NULL && *envval != '\0')
403     /* Write a PO file in old format, with extraneous backslashes.  */
404     conv = (iconv_t)(-1);
405   else
406     if (canon_charset == NULL)
407       /* Invalid PO file encoding.  */
408       conv = (iconv_t)(-1);
409     else
410       /* Avoid glibc-2.1 bug with EUC-KR.  */
411 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
412       if (strcmp (canon_charset, "EUC-KR") == 0)
413 	conv = (iconv_t)(-1);
414       else
415 # endif
416       /* Avoid Solaris 2.9 bug with GB2312, EUC-TW, BIG5, BIG5-HKSCS, GBK,
417 	 GB18030.  */
418 # if defined __sun && !defined _LIBICONV_VERSION
419       if (   strcmp (canon_charset, "GB2312") == 0
420 	  || strcmp (canon_charset, "EUC-TW") == 0
421 	  || strcmp (canon_charset, "BIG5") == 0
422 	  || strcmp (canon_charset, "BIG5-HKSCS") == 0
423 	  || strcmp (canon_charset, "GBK") == 0
424 	  || strcmp (canon_charset, "GB18030") == 0)
425 	conv = (iconv_t)(-1);
426       else
427 # endif
428       /* Use iconv() to parse multibyte characters.  */
429       conv = iconv_open ("UTF-8", canon_charset);
430 
431   if (conv != (iconv_t)(-1))
432     weird_cjk = false;
433   else
434 #endif
435     if (canon_charset == NULL)
436       weird_cjk = false;
437     else
438       weird_cjk = po_is_charset_weird_cjk (canon_charset);
439 
440   if (canon_charset == NULL)
441     canon_charset = po_charset_ascii;
442 
443   /* Loop over the '\n' delimited portions of value.  */
444   s = value;
445   first_line = true;
446   do
447     {
448       /* The usual escapes, as defined by the ANSI C Standard.  */
449 #     define is_escape(c) \
450         ((c) == '\a' || (c) == '\b' || (c) == '\f' || (c) == '\n' \
451          || (c) == '\r' || (c) == '\t' || (c) == '\v')
452 
453       const char *es;
454       const char *ep;
455       size_t portion_len;
456       char *portion;
457       char *overrides;
458       char *linebreaks;
459       char *pp;
460       char *op;
461       int startcol, startcol_after_break, width;
462       size_t i;
463 
464       for (es = s; *es != '\0'; )
465 	if (*es++ == '\n')
466 	  break;
467 
468       /* Expand escape sequences in each portion.  */
469       for (ep = s, portion_len = 0; ep < es; ep++)
470 	{
471 	  char c = *ep;
472 	  if (is_escape (c))
473 	    portion_len += 2;
474 	  else if (escape && !c_isprint ((unsigned char) c))
475 	    portion_len += 4;
476 	  else if (c == '\\' || c == '"')
477 	    portion_len += 2;
478 	  else
479 	    {
480 #if HAVE_ICONV
481 	      if (conv != (iconv_t)(-1))
482 		{
483 		  /* Skip over a complete multi-byte character.  Don't
484 		     interpret the second byte of a multi-byte character as
485 		     ASCII.  This is needed for the BIG5, BIG5-HKSCS, GBK,
486 		     GB18030, SHIFT_JIS, JOHAB encodings.  */
487 		  char scratchbuf[64];
488 		  const char *inptr = ep;
489 		  size_t insize;
490 		  char *outptr = &scratchbuf[0];
491 		  size_t outsize = sizeof (scratchbuf);
492 		  size_t res;
493 
494 		  res = (size_t)(-1);
495 		  for (insize = 1; inptr + insize <= es; insize++)
496 		    {
497 		      res = iconv (conv,
498 				   (ICONV_CONST char **) &inptr, &insize,
499 				   &outptr, &outsize);
500 		      if (!(res == (size_t)(-1) && errno == EINVAL))
501 			break;
502 		      /* We expect that no input bytes have been consumed
503 			 so far.  */
504 		      if (inptr != ep)
505 			abort ();
506 		    }
507 		  if (res == (size_t)(-1))
508 		    {
509 		      if (errno == EILSEQ)
510 			{
511 			  po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
512 				     _("invalid multibyte sequence"));
513 			  continue;
514 			}
515 		      else
516 			abort ();
517 		    }
518 		  insize = inptr - ep;
519 		  portion_len += insize;
520 		  ep += insize - 1;
521 		}
522 	      else
523 #endif
524 		{
525 		  if (weird_cjk
526 		      /* Special handling of encodings with CJK structure.  */
527 		      && ep + 2 <= es
528 		      && (unsigned char) ep[0] >= 0x80
529 		      && (unsigned char) ep[1] >= 0x30)
530 		    {
531 		      portion_len += 2;
532 		      ep += 1;
533 		    }
534 		  else
535 		    portion_len += 1;
536 		}
537 	    }
538 	}
539       portion = (char *) xmalloc (portion_len);
540       overrides = (char *) xmalloc (portion_len);
541       memset (overrides, UC_BREAK_UNDEFINED, portion_len);
542       for (ep = s, pp = portion, op = overrides; ep < es; ep++)
543 	{
544 	  char c = *ep;
545 	  if (is_escape (c))
546 	    {
547 	      switch (c)
548 		{
549 		case '\a': c = 'a'; break;
550 		case '\b': c = 'b'; break;
551 		case '\f': c = 'f'; break;
552 		case '\n': c = 'n'; break;
553 		case '\r': c = 'r'; break;
554 		case '\t': c = 't'; break;
555 		case '\v': c = 'v'; break;
556 		default: abort ();
557 		}
558 	      *pp++ = '\\';
559 	      *pp++ = c;
560 	      op++;
561 	      *op++ = UC_BREAK_PROHIBITED;
562 	      /* We warn about any use of escape sequences beside
563 		 '\n' and '\t'.  */
564 	      if (c != 'n' && c != 't')
565 		{
566 		  char *error_message =
567 		    xasprintf (_("\
568 internationalized messages should not contain the `\\%c' escape sequence"),
569 			       c);
570 		  po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
571 			     error_message);
572 		  free (error_message);
573 		}
574 	    }
575 	  else if (escape && !c_isprint ((unsigned char) c))
576 	    {
577 	      *pp++ = '\\';
578 	      *pp++ = '0' + (((unsigned char) c >> 6) & 7);
579 	      *pp++ = '0' + (((unsigned char) c >> 3) & 7);
580 	      *pp++ = '0' + ((unsigned char) c & 7);
581 	      op++;
582 	      *op++ = UC_BREAK_PROHIBITED;
583 	      *op++ = UC_BREAK_PROHIBITED;
584 	      *op++ = UC_BREAK_PROHIBITED;
585 	    }
586 	  else if (c == '\\' || c == '"')
587 	    {
588 	      *pp++ = '\\';
589 	      *pp++ = c;
590 	      op++;
591 	      *op++ = UC_BREAK_PROHIBITED;
592 	    }
593 	  else
594 	    {
595 #if HAVE_ICONV
596 	      if (conv != (iconv_t)(-1))
597 		{
598 		  /* Copy a complete multi-byte character.  Don't
599 		     interpret the second byte of a multi-byte character as
600 		     ASCII.  This is needed for the BIG5, BIG5-HKSCS, GBK,
601 		     GB18030, SHIFT_JIS, JOHAB encodings.  */
602 		  char scratchbuf[64];
603 		  const char *inptr = ep;
604 		  size_t insize;
605 		  char *outptr = &scratchbuf[0];
606 		  size_t outsize = sizeof (scratchbuf);
607 		  size_t res;
608 
609 		  res = (size_t)(-1);
610 		  for (insize = 1; inptr + insize <= es; insize++)
611 		    {
612 		      res = iconv (conv,
613 				   (ICONV_CONST char **) &inptr, &insize,
614 				   &outptr, &outsize);
615 		      if (!(res == (size_t)(-1) && errno == EINVAL))
616 			break;
617 		      /* We expect that no input bytes have been consumed
618 			 so far.  */
619 		      if (inptr != ep)
620 			abort ();
621 		    }
622 		  if (res == (size_t)(-1))
623 		    {
624 		      if (errno == EILSEQ)
625 			{
626 			  po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0,
627 				     false, _("invalid multibyte sequence"));
628 			  continue;
629 			}
630 		      else
631 			abort ();
632 		    }
633 		  insize = inptr - ep;
634 		  memcpy_small (pp, ep, insize);
635 		  pp += insize;
636 		  op += insize;
637 		  ep += insize - 1;
638 		}
639 	      else
640 #endif
641 		{
642 		  if (weird_cjk
643 		      /* Special handling of encodings with CJK structure.  */
644 		      && ep + 2 <= es
645 		      && (unsigned char) c >= 0x80
646 		      && (unsigned char) ep[1] >= 0x30)
647 		    {
648 		      *pp++ = c;
649 		      ep += 1;
650 		      *pp++ = *ep;
651 		      op += 2;
652 		    }
653 		  else
654 		    {
655 		      *pp++ = c;
656 		      op++;
657 		    }
658 		}
659 	    }
660 	}
661 
662       /* Don't break immediately before the "\n" at the end.  */
663       if (es > s && es[-1] == '\n')
664 	overrides[portion_len - 2] = UC_BREAK_PROHIBITED;
665 
666       linebreaks = (char *) xmalloc (portion_len);
667 
668       /* Subsequent lines after a break are all indented.
669 	 See INDENT-S.  */
670       startcol_after_break = (line_prefix ? strlen (line_prefix) : 0);
671       if (indent)
672 	startcol_after_break = (startcol_after_break + extra_indent + 8) & ~7;
673       startcol_after_break++;
674 
675       /* The line width.  Allow room for the closing quote character.  */
676       width = (wrap_strings && do_wrap != no ? page_width : INT_MAX) - 1;
677       /* Adjust for indentation of subsequent lines.  */
678       width -= startcol_after_break;
679 
680     recompute:
681       /* The line starts with different things depending on whether it
682 	 is the first line, and if we are using the indented style.
683 	 See INDENT-F.  */
684       startcol = (line_prefix ? strlen (line_prefix) : 0);
685       if (first_line)
686 	{
687 	  startcol += strlen (name);
688 	  if (indent)
689 	    startcol = (startcol + extra_indent + 8) & ~7;
690 	  else
691 	    startcol++;
692 	}
693       else
694 	{
695 	  if (indent)
696 	    startcol = (startcol + extra_indent + 8) & ~7;
697 	}
698       /* Allow room for the opening quote character.  */
699       startcol++;
700       /* Adjust for indentation of subsequent lines.  */
701       startcol -= startcol_after_break;
702 
703       /* Do line breaking on the portion.  */
704       mbs_width_linebreaks (portion, portion_len, width, startcol, 0,
705 			    overrides, canon_charset, linebreaks);
706 
707       /* If this is the first line, and we are not using the indented
708 	 style, and the line would wrap, then use an empty first line
709 	 and restart.  */
710       if (first_line && !indent
711 	  && portion_len > 0
712 	  && (*es != '\0'
713 	      || startcol > width
714 	      || memchr (linebreaks, UC_BREAK_POSSIBLE, portion_len) != NULL))
715 	{
716 	  if (line_prefix != NULL)
717 	    fputs (line_prefix, fp);
718 	  fputs (name, fp);
719 	  fputs (" \"\"\n", fp);
720 	  first_line = false;
721 	  /* Recompute startcol and linebreaks.  */
722 	  goto recompute;
723 	}
724 
725       /* Print the beginning of the line.  This will depend on whether
726 	 this is the first line, and if the indented style is being
727 	 used.  INDENT-F.  */
728       if (line_prefix != NULL)
729 	fputs (line_prefix, fp);
730       if (first_line)
731 	{
732 	  fputs (name, fp);
733 	  if (indent)
734 	    {
735 	      if (extra_indent > 0)
736 		fwrite ("        ", 1, extra_indent, fp);
737 	      putc ('\t', fp);
738 	    }
739 	  else
740 	    putc (' ', fp);
741 	  first_line = false;
742 	}
743       else
744 	{
745 	  if (indent)
746 	    {
747 	      if (extra_indent > 0)
748 		fwrite ("        ", 1, extra_indent, fp);
749 	      putc ('\t', fp);
750 	    }
751 	}
752 
753       /* Print the portion itself, with linebreaks where necessary.  */
754       putc ('"', fp);
755       for (i = 0; i < portion_len; i++)
756 	{
757 	  if (linebreaks[i] == UC_BREAK_POSSIBLE)
758 	    {
759 	      fputs ("\"\n", fp);
760 	      /* INDENT-S.  */
761 	      if (line_prefix != NULL)
762 		fputs (line_prefix, fp);
763 	      if (indent)
764 		putc ('\t', fp);
765 	      putc ('"', fp);
766 	    }
767 	  putc (portion[i], fp);
768 	}
769       fputs ("\"\n", fp);
770 
771       free (linebreaks);
772       free (overrides);
773       free (portion);
774 
775       s = es;
776 #     undef is_escape
777     }
778   while (*s);
779 
780 #if HAVE_ICONV
781   if (conv != (iconv_t)(-1))
782     iconv_close (conv);
783 #endif
784 }
785 
786 
787 static void
print_blank_line(FILE * fp)788 print_blank_line (FILE *fp)
789 {
790   if (uniforum)
791     fputs ("#\n", fp);
792   else
793     putc ('\n', fp);
794 }
795 
796 
797 static void
message_print(const message_ty * mp,FILE * fp,const char * charset,size_t page_width,bool blank_line,bool debug)798 message_print (const message_ty *mp, FILE *fp, const char *charset,
799 	       size_t page_width, bool blank_line, bool debug)
800 {
801   int extra_indent;
802 
803   /* Separate messages with a blank line.  Uniforum doesn't like blank
804      lines, so use an empty comment (unless there already is one).  */
805   if (blank_line && (!uniforum
806 		     || mp->comment == NULL
807 		     || mp->comment->nitems == 0
808 		     || mp->comment->item[0][0] != '\0'))
809     print_blank_line (fp);
810 
811   /* Print translator comment if available.  */
812   message_print_comment (mp, fp);
813 
814   /* Print xgettext extracted comments.  */
815   message_print_comment_dot (mp, fp);
816 
817   /* Print the file position comments.  This will help a human who is
818      trying to navigate the sources.  There is no problem of getting
819      repeated positions, because duplicates are checked for.  */
820   message_print_comment_filepos (mp, fp, uniforum, page_width);
821 
822   /* Print flag information in special comment.  */
823   message_print_comment_flags (mp, fp, debug);
824 
825   /* Print the previous msgid.  This helps the translator when the msgid has
826      only slightly changed.  */
827   if (mp->prev_msgctxt != NULL)
828     wrap (mp, fp, "#| ", 0, "msgctxt", mp->prev_msgctxt, mp->do_wrap,
829 	  page_width, charset);
830   if (mp->prev_msgid != NULL)
831     wrap (mp, fp, "#| ", 0, "msgid", mp->prev_msgid, mp->do_wrap, page_width,
832 	  charset);
833   if (mp->prev_msgid_plural != NULL)
834     wrap (mp, fp, "#| ", 0, "msgid_plural", mp->prev_msgid_plural, mp->do_wrap,
835 	  page_width, charset);
836   extra_indent = (mp->prev_msgctxt != NULL || mp->prev_msgid != NULL
837 		  || mp->prev_msgid_plural != NULL
838 		  ? 3
839 		  : 0);
840 
841   /* Print each of the message components.  Wrap them nicely so they
842      are as readable as possible.  If there is no recorded msgstr for
843      this domain, emit an empty string.  */
844   if (mp->msgctxt != NULL && !is_ascii_string (mp->msgctxt)
845       && po_charset_canonicalize (charset) != po_charset_utf8)
846     {
847       char *warning_message =
848 	xasprintf (_("\
849 The following msgctxt contains non-ASCII characters.\n\
850 This will cause problems to translators who use a character encoding\n\
851 different from yours. Consider using a pure ASCII msgctxt instead.\n\
852 %s\n"), mp->msgctxt);
853       po_xerror (PO_SEVERITY_WARNING, mp, NULL, 0, 0, true, warning_message);
854       free (warning_message);
855     }
856   if (!is_ascii_string (mp->msgid)
857       && po_charset_canonicalize (charset) != po_charset_utf8)
858     {
859       char *warning_message =
860 	xasprintf (_("\
861 The following msgid contains non-ASCII characters.\n\
862 This will cause problems to translators who use a character encoding\n\
863 different from yours. Consider using a pure ASCII msgid instead.\n\
864 %s\n"), mp->msgid);
865       po_xerror (PO_SEVERITY_WARNING, mp, NULL, 0, 0, true, warning_message);
866       free (warning_message);
867     }
868   if (mp->msgctxt != NULL)
869     wrap (mp, fp, NULL, extra_indent, "msgctxt", mp->msgctxt, mp->do_wrap,
870 	  page_width, charset);
871   wrap (mp, fp, NULL, extra_indent, "msgid", mp->msgid, mp->do_wrap,
872 	  page_width, charset);
873   if (mp->msgid_plural != NULL)
874     wrap (mp, fp, NULL, extra_indent, "msgid_plural", mp->msgid_plural,
875 	  mp->do_wrap, page_width, charset);
876 
877   if (mp->msgid_plural == NULL)
878     wrap (mp, fp, NULL, extra_indent, "msgstr", mp->msgstr, mp->do_wrap,
879 	  page_width, charset);
880   else
881     {
882       char prefix_buf[20];
883       unsigned int i;
884       const char *p;
885 
886       for (p = mp->msgstr, i = 0;
887 	   p < mp->msgstr + mp->msgstr_len;
888 	   p += strlen (p) + 1, i++)
889 	{
890 	  sprintf (prefix_buf, "msgstr[%u]", i);
891 	  wrap (mp, fp, NULL, extra_indent, prefix_buf, p, mp->do_wrap,
892 		page_width, charset);
893 	}
894     }
895 }
896 
897 
898 static void
message_print_obsolete(const message_ty * mp,FILE * fp,const char * charset,size_t page_width,bool blank_line)899 message_print_obsolete (const message_ty *mp, FILE *fp, const char *charset,
900 			size_t page_width, bool blank_line)
901 {
902   int extra_indent;
903 
904   /* If msgstr is the empty string we print nothing.  */
905   if (mp->msgstr[0] == '\0')
906     return;
907 
908   /* Separate messages with a blank line.  Uniforum doesn't like blank
909      lines, so use an empty comment (unless there already is one).  */
910   if (blank_line)
911     print_blank_line (fp);
912 
913   /* Print translator comment if available.  */
914   message_print_comment (mp, fp);
915 
916   /* Print xgettext extracted comments (normally empty).  */
917   message_print_comment_dot (mp, fp);
918 
919   /* Print the file position comments (normally empty).  */
920   message_print_comment_filepos (mp, fp, uniforum, page_width);
921 
922   /* Print flag information in special comment.  */
923   if (mp->is_fuzzy)
924     {
925       bool first = true;
926 
927       putc ('#', fp);
928       putc (',', fp);
929 
930       if (mp->is_fuzzy)
931 	{
932 	  fputs (" fuzzy", fp);
933 	  first = false;
934 	}
935 
936       putc ('\n', fp);
937     }
938 
939   /* Print the previous msgid.  This helps the translator when the msgid has
940      only slightly changed.  */
941   if (mp->prev_msgctxt != NULL)
942     wrap (mp, fp, "#~| ", 0, "msgctxt", mp->prev_msgctxt, mp->do_wrap,
943 	  page_width, charset);
944   if (mp->prev_msgid != NULL)
945     wrap (mp, fp, "#~| ", 0, "msgid", mp->prev_msgid, mp->do_wrap, page_width,
946 	  charset);
947   if (mp->prev_msgid_plural != NULL)
948     wrap (mp, fp, "#~| ", 0, "msgid_plural", mp->prev_msgid_plural, mp->do_wrap,
949 	  page_width, charset);
950   extra_indent = (mp->prev_msgctxt != NULL || mp->prev_msgid != NULL
951 		  || mp->prev_msgid_plural != NULL
952 		  ? 1
953 		  : 0);
954 
955   /* Print each of the message components.  Wrap them nicely so they
956      are as readable as possible.  */
957   if (mp->msgctxt != NULL && !is_ascii_string (mp->msgctxt)
958       && po_charset_canonicalize (charset) != po_charset_utf8)
959     {
960       char *warning_message =
961 	xasprintf (_("\
962 The following msgctxt contains non-ASCII characters.\n\
963 This will cause problems to translators who use a character encoding\n\
964 different from yours. Consider using a pure ASCII msgctxt instead.\n\
965 %s\n"), mp->msgctxt);
966       po_xerror (PO_SEVERITY_WARNING, mp, NULL, 0, 0, true, warning_message);
967       free (warning_message);
968     }
969   if (!is_ascii_string (mp->msgid)
970       && po_charset_canonicalize (charset) != po_charset_utf8)
971     {
972       char *warning_message =
973 	xasprintf (_("\
974 The following msgid contains non-ASCII characters.\n\
975 This will cause problems to translators who use a character encoding\n\
976 different from yours. Consider using a pure ASCII msgid instead.\n\
977 %s\n"), mp->msgid);
978       po_xerror (PO_SEVERITY_WARNING, mp, NULL, 0, 0, true, warning_message);
979       free (warning_message);
980     }
981   if (mp->msgctxt != NULL)
982     wrap (mp, fp, "#~ ", extra_indent, "msgctxt", mp->msgctxt, mp->do_wrap,
983 	  page_width, charset);
984   wrap (mp, fp, "#~ ", extra_indent, "msgid", mp->msgid, mp->do_wrap,
985 	page_width, charset);
986   if (mp->msgid_plural != NULL)
987     wrap (mp, fp, "#~ ", extra_indent, "msgid_plural", mp->msgid_plural,
988 	  mp->do_wrap, page_width, charset);
989 
990   if (mp->msgid_plural == NULL)
991     wrap (mp, fp, "#~ ", extra_indent, "msgstr", mp->msgstr, mp->do_wrap,
992 	  page_width, charset);
993   else
994     {
995       char prefix_buf[20];
996       unsigned int i;
997       const char *p;
998 
999       for (p = mp->msgstr, i = 0;
1000 	   p < mp->msgstr + mp->msgstr_len;
1001 	   p += strlen (p) + 1, i++)
1002 	{
1003 	  sprintf (prefix_buf, "msgstr[%u]", i);
1004 	  wrap (mp, fp, "#~ ", extra_indent, prefix_buf, p, mp->do_wrap,
1005 		page_width, charset);
1006 	}
1007     }
1008 }
1009 
1010 
1011 static void
msgdomain_list_print_po(msgdomain_list_ty * mdlp,FILE * fp,size_t page_width,bool debug)1012 msgdomain_list_print_po (msgdomain_list_ty *mdlp, FILE *fp, size_t page_width,
1013 			 bool debug)
1014 {
1015   size_t j, k;
1016   bool blank_line;
1017 
1018   /* Write out the messages for each domain.  */
1019   blank_line = false;
1020   for (k = 0; k < mdlp->nitems; k++)
1021     {
1022       message_list_ty *mlp;
1023       const char *header;
1024       const char *charset;
1025       char *allocated_charset;
1026 
1027       /* If the first domain is the default, don't bother emitting
1028 	 the domain name, because it is the default.  */
1029       if (!(k == 0
1030 	    && strcmp (mdlp->item[k]->domain, MESSAGE_DOMAIN_DEFAULT) == 0))
1031 	{
1032 	  if (blank_line)
1033 	    print_blank_line (fp);
1034 	  fprintf (fp, "domain \"%s\"\n", mdlp->item[k]->domain);
1035 	  blank_line = true;
1036 	}
1037 
1038       mlp = mdlp->item[k]->messages;
1039 
1040       /* Search the header entry.  */
1041       header = NULL;
1042       for (j = 0; j < mlp->nitems; ++j)
1043 	if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
1044 	  {
1045 	    header = mlp->item[j]->msgstr;
1046 	    break;
1047 	  }
1048 
1049       /* Extract the charset name.  */
1050       charset = "ASCII";
1051       allocated_charset = NULL;
1052       if (header != NULL)
1053 	{
1054 	  const char *charsetstr = c_strstr (header, "charset=");
1055 
1056 	  if (charsetstr != NULL)
1057 	    {
1058 	      size_t len;
1059 
1060 	      charsetstr += strlen ("charset=");
1061 	      len = strcspn (charsetstr, " \t\n");
1062 	      allocated_charset = (char *) xallocsa (len + 1);
1063 	      memcpy (allocated_charset, charsetstr, len);
1064 	      allocated_charset[len] = '\0';
1065 	      charset = allocated_charset;
1066 
1067 	      /* Treat the dummy default value as if it were absent.  */
1068 	      if (strcmp (charset, "CHARSET") == 0)
1069 		charset = "ASCII";
1070 	    }
1071 	}
1072 
1073       /* Write out each of the messages for this domain.  */
1074       for (j = 0; j < mlp->nitems; ++j)
1075 	if (!mlp->item[j]->obsolete)
1076 	  {
1077 	    message_print (mlp->item[j], fp, charset, page_width, blank_line,
1078 			   debug);
1079 	    blank_line = true;
1080 	  }
1081 
1082       /* Write out each of the obsolete messages for this domain.  */
1083       for (j = 0; j < mlp->nitems; ++j)
1084 	if (mlp->item[j]->obsolete)
1085 	  {
1086 	    message_print_obsolete (mlp->item[j], fp, charset, page_width,
1087 				    blank_line);
1088 	    blank_line = true;
1089 	  }
1090 
1091       if (allocated_charset != NULL)
1092 	freesa (allocated_charset);
1093     }
1094 }
1095 
1096 
1097 /* Describes a PO file in .po syntax.  */
1098 const struct catalog_output_format output_format_po =
1099 {
1100   msgdomain_list_print_po,		/* print */
1101   false,				/* requires_utf8 */
1102   true,					/* supports_multiple_domains */
1103   true,					/* supports_contexts */
1104   true,					/* supports_plurals */
1105   false,				/* alternative_is_po */
1106   false					/* alternative_is_java_class */
1107 };
1108