gettext-tools/src/msgmerge.c

/* GNU gettext - internationalization aids
   Copyright (C) 1995-1998, 2000-2010, 2012, 2014-2016, 2018-2020 Free Software
   Foundation, Inc.
   This file was written by Peter Miller <millerp@canb.auug.org.au>

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */

#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <alloca.h>

#include <getopt.h>
#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>
#ifdef _OPENMP
# include <omp.h>
#endif

#include <textstyle.h>

#include "noreturn.h"
#include "closeout.h"
#include "dir-list.h"
#include "error.h"
#include "error-progname.h"
#include "progname.h"
#include "relocatable.h"
#include "basename-lgpl.h"
#include "message.h"
#include "read-catalog.h"
#include "read-po.h"
#include "read-properties.h"
#include "read-stringtable.h"
#include "write-catalog.h"
#include "write-po.h"
#include "write-properties.h"
#include "write-stringtable.h"
#include "format.h"
#include "xalloc.h"
#include "xmalloca.h"
#include "obstack.h"
#include "c-strstr.h"
#include "c-strcase.h"
#include "po-charset.h"
#include "msgl-iconv.h"
#include "msgl-equal.h"
#include "msgl-fsearch.h"
#include "glthread/lock.h"
#include "lang-table.h"
#include "plural-exp.h"
#include "plural-count.h"
#include "msgl-check.h"
#include "po-xerror.h"
#include "backupfile.h"
#include "copy-file.h"
#include "propername.h"
#include "gettext.h"

#define _(str) gettext (str)

#define obstack_chunk_alloc xmalloc
#define obstack_chunk_free free


/* If true do not print unneeded messages.  */
static bool quiet;

/* Verbosity level.  */
static int verbosity_level;

/* Force output of PO file even if empty.  */
static int force_po;

/* Apply the .pot file to each of the domains in the PO file.  */
static bool multi_domain_mode = false;

/* Produce output for msgfmt, not for a translator.
   msgfmt ignores
     - untranslated messages,
     - fuzzy messages, except the header entry,
     - obsolete messages.
   Therefore output for msgfmt does not need to include such messages.  */
static bool for_msgfmt = false;

/* Determines whether to use fuzzy matching.  */
static bool use_fuzzy_matching = true;

/* Determines whether to keep old msgids as previous msgids.  */
static bool keep_previous = false;

/* Language (ISO-639 code) and optional territory (ISO-3166 code).  */
static const char *catalogname = NULL;

/* List of user-specified compendiums.  */
static message_list_list_ty *compendiums;

/* List of corresponding filenames.  */
static string_list_ty *compendium_filenames;

/* Update mode.  */
static bool update_mode = false;
static const char *version_control_string;
static const char *backup_suffix_string;

/* Long options.  */
static const struct option long_options[] =
{
  { "add-location", optional_argument, NULL, 'n' },
  { "backup", required_argument, NULL, CHAR_MAX + 1 },
  { "color", optional_argument, NULL, CHAR_MAX + 9 },
  { "compendium", required_argument, NULL, 'C' },
  { "directory", required_argument, NULL, 'D' },
  { "escape", no_argument, NULL, 'E' },
  { "for-msgfmt", no_argument, NULL, CHAR_MAX + 12 },
  { "force-po", no_argument, &force_po, 1 },
  { "help", no_argument, NULL, 'h' },
  { "indent", no_argument, NULL, 'i' },
  { "lang", required_argument, NULL, CHAR_MAX + 8 },
  { "multi-domain", no_argument, NULL, 'm' },
  { "no-escape", no_argument, NULL, 'e' },
  { "no-fuzzy-matching", no_argument, NULL, 'N' },
  { "no-location", no_argument, NULL, CHAR_MAX + 11 },
  { "no-wrap", no_argument, NULL, CHAR_MAX + 4 },
  { "output-file", required_argument, NULL, 'o' },
  { "previous", no_argument, NULL, CHAR_MAX + 7 },
  { "properties-input", no_argument, NULL, 'P' },
  { "properties-output", no_argument, NULL, 'p' },
  { "quiet", no_argument, NULL, 'q' },
  { "sort-by-file", no_argument, NULL, 'F' },
  { "sort-output", no_argument, NULL, 's' },
  { "silent", no_argument, NULL, 'q' },
  { "strict", no_argument, NULL, CHAR_MAX + 2 },
  { "stringtable-input", no_argument, NULL, CHAR_MAX + 5 },
  { "stringtable-output", no_argument, NULL, CHAR_MAX + 6 },
  { "style", required_argument, NULL, CHAR_MAX + 10 },
  { "suffix", required_argument, NULL, CHAR_MAX + 3 },
  { "update", no_argument, NULL, 'U' },
  { "verbose", no_argument, NULL, 'v' },
  { "version", no_argument, NULL, 'V' },
  { "width", required_argument, NULL, 'w' },
  { NULL, 0, NULL, 0 }
};


struct statistics
{
  size_t merged;
  size_t fuzzied;
  size_t missing;
  size_t obsolete;
};


/* Forward declaration of local functions.  */
_GL_NORETURN_FUNC static void usage (int status);
static void compendium (const char *filename);
static void msgdomain_list_stablesort_by_obsolete (msgdomain_list_ty *mdlp);
static msgdomain_list_ty *merge (const char *fn1, const char *fn2,
                                 catalog_input_format_ty input_syntax,
                                 msgdomain_list_ty **defp);


int
main (int argc, char **argv)
{
  int opt;
  bool do_help;
  bool do_version;
  char *output_file;
  char *color;
  msgdomain_list_ty *def;
  msgdomain_list_ty *result;
  catalog_input_format_ty input_syntax = &input_format_po;
  catalog_output_format_ty output_syntax = &output_format_po;
  bool sort_by_filepos = false;
  bool sort_by_msgid = false;

  /* Set program name for messages.  */
  set_program_name (argv[0]);
  error_print_progname = maybe_print_progname;
  verbosity_level = 0;
  quiet = false;
  gram_max_allowed_errors = UINT_MAX;

  /* Set locale via LC_ALL.  */
  setlocale (LC_ALL, "");

  /* Set the text message domain.  */
  bindtextdomain (PACKAGE, relocate (LOCALEDIR));
  bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR));
  textdomain (PACKAGE);

  /* Ensure that write errors on stdout are detected.  */
  atexit (close_stdout);

  /* Set default values for variables.  */
  do_help = false;
  do_version = false;
  output_file = NULL;
  color = NULL;

  while ((opt = getopt_long (argc, argv, "C:D:eEFhimn:No:pPqsUvVw:",
                             long_options, NULL))
         != EOF)
    switch (opt)
      {
      case '\0':                /* Long option.  */
        break;

      case 'C':
        compendium (optarg);
        break;

      case 'D':
        dir_list_append (optarg);
        break;

      case 'e':
        message_print_style_escape (false);
        break;

      case 'E':
        message_print_style_escape (true);
        break;

      case 'F':
        sort_by_filepos = true;
        break;

      case 'h':
        do_help = true;
        break;

      case 'i':
        message_print_style_indent ();
        break;

      case 'm':
        multi_domain_mode = true;
        break;

      case 'n':
        if (handle_filepos_comment_option (optarg))
          usage (EXIT_FAILURE);
        break;

      case 'N':
        use_fuzzy_matching = false;
        break;

      case 'o':
        output_file = optarg;
        break;

      case 'p':
        output_syntax = &output_format_properties;
        break;

      case 'P':
        input_syntax = &input_format_properties;
        break;

      case 'q':
        quiet = true;
        break;

      case 's':
        sort_by_msgid = true;
        break;

      case 'U':
        update_mode = true;
        break;

      case 'v':
        ++verbosity_level;
        break;

      case 'V':
        do_version = true;
        break;

      case 'w':
        {
          int value;
          char *endp;
          value = strtol (optarg, &endp, 10);
          if (endp != optarg)
            message_page_width_set (value);
        }
        break;

      case CHAR_MAX + 1: /* --backup */
        version_control_string = optarg;
        break;

      case CHAR_MAX + 2: /* --strict */
        message_print_style_uniforum ();
        break;

      case CHAR_MAX + 3: /* --suffix */
        backup_suffix_string = optarg;
        break;

      case CHAR_MAX + 4: /* --no-wrap */
        message_page_width_ignore ();
        break;

      case CHAR_MAX + 5: /* --stringtable-input */
        input_syntax = &input_format_stringtable;
        break;

      case CHAR_MAX + 6: /* --stringtable-output */
        output_syntax = &output_format_stringtable;
        break;

      case CHAR_MAX + 7: /* --previous */
        keep_previous = true;
        break;

      case CHAR_MAX + 8: /* --lang */
        catalogname = optarg;
        break;

      case CHAR_MAX + 9: /* --color */
        if (handle_color_option (optarg) || color_test_mode)
          usage (EXIT_FAILURE);
        color = optarg;
        break;

      case CHAR_MAX + 10: /* --style */
        handle_style_option (optarg);
        break;

      case CHAR_MAX + 11: /* --no-location */
        message_print_style_filepos (filepos_comment_none);
        break;

      case CHAR_MAX + 12: /* --for-msgfmt */
        for_msgfmt = true;
        break;

      default:
        usage (EXIT_FAILURE);
        break;
      }

  /* Version information is requested.  */
  if (do_version)
    {
      printf ("%s (GNU %s) %s\n", last_component (program_name),
              PACKAGE, VERSION);
      /* xgettext: no-wrap */
      printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
License GPLv3+: GNU GPL version 3 or later <%s>\n\
This is free software: you are free to change and redistribute it.\n\
There is NO WARRANTY, to the extent permitted by law.\n\
"),
              "1995-2020", "https://gnu.org/licenses/gpl.html");
      printf (_("Written by %s.\n"), proper_name ("Peter Miller"));
      exit (EXIT_SUCCESS);
    }

  /* Help is requested.  */
  if (do_help)
    usage (EXIT_SUCCESS);

  /* Test whether we have an .po file name as argument.  */
  if (optind >= argc)
    {
      error (EXIT_SUCCESS, 0, _("no input files given"));
      usage (EXIT_FAILURE);
    }
  if (optind + 2 != argc)
    {
      error (EXIT_SUCCESS, 0, _("exactly 2 input files required"));
      usage (EXIT_FAILURE);
    }

  /* Verify selected options.  */
  if (update_mode)
    {
      if (output_file != NULL)
        {
          error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
                 "--update", "--output-file");
        }
      if (for_msgfmt)
        {
          error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
                 "--update", "--for-msgfmt");
        }
      if (color != NULL)
        {
          error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
                 "--update", "--color");
        }
      if (style_file_name != NULL)
        {
          error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
                 "--update", "--style");
        }
    }
  else
    {
      if (version_control_string != NULL)
        {
          error (EXIT_SUCCESS, 0, _("%s is only valid with %s"),
                 "--backup", "--update");
          usage (EXIT_FAILURE);
        }
      if (backup_suffix_string != NULL)
        {
          error (EXIT_SUCCESS, 0, _("%s is only valid with %s"),
                 "--suffix", "--update");
          usage (EXIT_FAILURE);
        }
    }

  if (sort_by_msgid && sort_by_filepos)
    error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
           "--sort-output", "--sort-by-file");

  /* In update mode, --properties-input implies --properties-output.  */
  if (update_mode && input_syntax == &input_format_properties)
    output_syntax = &output_format_properties;
  /* In update mode, --stringtable-input implies --stringtable-output.  */
  if (update_mode && input_syntax == &input_format_stringtable)
    output_syntax = &output_format_stringtable;

  if (for_msgfmt)
    {
      /* With --for-msgfmt, no fuzzy matching.  */
      use_fuzzy_matching = false;

      /* With --for-msgfmt, merging is fast, therefore no need for a progress
         indicator.  */
      quiet = true;

      /* With --for-msgfmt, no need for comments.  */
      message_print_style_comment (false);

      /* With --for-msgfmt, no need for source location lines.  */
      message_print_style_filepos (filepos_comment_none);
    }

  /* Initialize OpenMP.  */
  #ifdef _OPENMP
  openmp_init ();
  #endif

  /* Merge the two files.  */
  result = merge (argv[optind], argv[optind + 1], input_syntax, &def);

  /* Sort the results.  */
  if (sort_by_filepos)
    msgdomain_list_sort_by_filepos (result);
  else if (sort_by_msgid)
    msgdomain_list_sort_by_msgid (result);

  if (update_mode)
    {
      /* Before comparing result with def, sort the result into the same order
         as would be done implicitly by output_syntax->print.  */
      if (output_syntax->sorts_obsoletes_to_end)
        msgdomain_list_stablesort_by_obsolete (result);

      /* Do nothing if the original file and the result are equal.  Also do
         nothing if the original file and the result differ only by the
         POT-Creation-Date in the header entry; this is needed for projects
         which don't put the .pot file under CVS.  */
      if (!msgdomain_list_equal (def, result, true))
        {
          /* Back up def.po.  */
          enum backup_type backup_type;
          char *backup_file;

          output_file = argv[optind];

          if (backup_suffix_string == NULL)
            {
              backup_suffix_string = getenv ("SIMPLE_BACKUP_SUFFIX");
              if (backup_suffix_string != NULL
                  && backup_suffix_string[0] == '\0')
                backup_suffix_string = NULL;
            }
          if (backup_suffix_string != NULL)
            simple_backup_suffix = backup_suffix_string;

          backup_type = xget_version (_("backup type"), version_control_string);
          if (backup_type != none)
            {
              backup_file = find_backup_file_name (output_file, backup_type);
              copy_file_preserving (output_file, backup_file);
            }

          /* Write the merged message list out.  */
          msgdomain_list_print (result, output_file, output_syntax, true,
                                false);
        }
    }
  else
    {
      /* Write the merged message list out.  */
      msgdomain_list_print (result, output_file, output_syntax,
                            for_msgfmt || force_po, false);
    }

  exit (EXIT_SUCCESS);
}


/* Display usage information and exit.  */
static void
usage (int status)
{
  if (status != EXIT_SUCCESS)
    fprintf (stderr, _("Try '%s --help' for more information.\n"),
             program_name);
  else
    {
      printf (_("\
Usage: %s [OPTION] def.po ref.pot\n\
"), program_name);
      printf ("\n");
      /* xgettext: no-wrap */
      printf (_("\
Merges two Uniforum style .po files together.  The def.po file is an\n\
existing PO file with translations which will be taken over to the newly\n\
created file as long as they still match; comments will be preserved,\n\
but extracted comments and file positions will be discarded.  The ref.pot\n\
file is the last created PO file with up-to-date source references but\n\
old translations, or a PO Template file (generally created by xgettext);\n\
any translations or comments in the file will be discarded, however dot\n\
comments and file positions will be preserved.  Where an exact match\n\
cannot be found, fuzzy matching is used to produce better results.\n\
"));
      printf ("\n");
      printf (_("\
Mandatory arguments to long options are mandatory for short options too.\n"));
      printf ("\n");
      printf (_("\
Input file location:\n"));
      printf (_("\
  def.po                      translations referring to old sources\n"));
      printf (_("\
  ref.pot                     references to new sources\n"));
      printf (_("\
  -D, --directory=DIRECTORY   add DIRECTORY to list for input files search\n"));
      printf (_("\
  -C, --compendium=FILE       additional library of message translations,\n\
                              may be specified more than once\n"));
      printf ("\n");
      printf (_("\
Operation mode:\n"));
      printf (_("\
  -U, --update                update def.po,\n\
                              do nothing if def.po already up to date\n"));
      printf ("\n");
      printf (_("\
Output file location:\n"));
      printf (_("\
  -o, --output-file=FILE      write output to specified file\n"));
      printf (_("\
The results are written to standard output if no output file is specified\n\
or if it is -.\n"));
      printf ("\n");
      printf (_("\
Output file location in update mode:\n"));
      printf (_("\
The result is written back to def.po.\n"));
      printf (_("\
      --backup=CONTROL        make a backup of def.po\n"));
      printf (_("\
      --suffix=SUFFIX         override the usual backup suffix\n"));
      printf (_("\
The version control method may be selected via the --backup option or through\n\
the VERSION_CONTROL environment variable.  Here are the values:\n\
  none, off       never make backups (even if --backup is given)\n\
  numbered, t     make numbered backups\n\
  existing, nil   numbered if numbered backups exist, simple otherwise\n\
  simple, never   always make simple backups\n"));
      printf (_("\
The backup suffix is '~', unless set with --suffix or the SIMPLE_BACKUP_SUFFIX\n\
environment variable.\n\
"));
      printf ("\n");
      printf (_("\
Operation modifiers:\n"));
      printf (_("\
  -m, --multi-domain          apply ref.pot to each of the domains in def.po\n"));
      printf (_("\
      --for-msgfmt            produce output for '%s', not for a translator\n"),
              "msgfmt");
      printf (_("\
  -N, --no-fuzzy-matching     do not use fuzzy matching\n"));
      printf (_("\
      --previous              keep previous msgids of translated messages\n"));
      printf ("\n");
      printf (_("\
Input file syntax:\n"));
      printf (_("\
  -P, --properties-input      input files are in Java .properties syntax\n"));
      printf (_("\
      --stringtable-input     input files are in NeXTstep/GNUstep .strings\n\
                              syntax\n"));
      printf ("\n");
      printf (_("\
Output details:\n"));
      printf (_("\
      --lang=CATALOGNAME      set 'Language' field in the header entry\n"));
      printf (_("\
      --color                 use colors and other text attributes always\n\
      --color=WHEN            use colors and other text attributes if WHEN.\n\
                              WHEN may be 'always', 'never', 'auto', or 'html'.\n"));
      printf (_("\
      --style=STYLEFILE       specify CSS style rule file for --color\n"));
      printf (_("\
  -e, --no-escape             do not use C escapes in output (default)\n"));
      printf (_("\
  -E, --escape                use C escapes in output, no extended chars\n"));
      printf (_("\
      --force-po              write PO file even if empty\n"));
      printf (_("\
  -i, --indent                indented output style\n"));
      printf (_("\
      --no-location           suppress '#: filename:line' lines\n"));
      printf (_("\
  -n, --add-location          preserve '#: filename:line' lines (default)\n"));
      printf (_("\
      --strict                strict Uniforum output style\n"));
      printf (_("\
  -p, --properties-output     write out a Java .properties file\n"));
      printf (_("\
      --stringtable-output    write out a NeXTstep/GNUstep .strings file\n"));
      printf (_("\
  -w, --width=NUMBER          set output page width\n"));
      printf (_("\
      --no-wrap               do not break long message lines, longer than\n\
                              the output page width, into several lines\n"));
      printf (_("\
  -s, --sort-output           generate sorted output\n"));
      printf (_("\
  -F, --sort-by-file          sort output by file location\n"));
      printf ("\n");
      printf (_("\
Informative output:\n"));
      printf (_("\
  -h, --help                  display this help and exit\n"));
      printf (_("\
  -V, --version               output version information and exit\n"));
      printf (_("\
  -v, --verbose               increase verbosity level\n"));
      printf (_("\
  -q, --quiet, --silent       suppress progress indicators\n"));
      printf ("\n");
      /* TRANSLATORS: The first placeholder is the web address of the Savannah
         project of this package.  The second placeholder is the bug-reporting
         email address for this package.  Please add _another line_ saying
         "Report translation bugs to <...>\n" with the address for translation
         bugs (typically your translation team's web or email address).  */
      printf(_("\
Report bugs in the bug tracker at <%s>\n\
or by email to <%s>.\n"),
             "https://savannah.gnu.org/projects/gettext",
             "bug-gettext@gnu.org");
    }

  exit (status);
}


static void
compendium (const char *filename)
{
  msgdomain_list_ty *mdlp;
  size_t k;

  mdlp = read_catalog_file (filename, &input_format_po);
  if (compendiums == NULL)
    {
      compendiums = message_list_list_alloc ();
      compendium_filenames = string_list_alloc ();
    }
  for (k = 0; k < mdlp->nitems; k++)
    {
      message_list_list_append (compendiums, mdlp->item[k]->messages);
      string_list_append (compendium_filenames, filename);
    }
}


/* Sorts obsolete messages to the end, for every domain.  */
static void
msgdomain_list_stablesort_by_obsolete (msgdomain_list_ty *mdlp)
{
  size_t k;

  for (k = 0; k < mdlp->nitems; k++)
    {
      message_list_ty *mlp = mdlp->item[k]->messages;

      /* Sort obsolete messages to the end.  */
      if (mlp->nitems > 0)
        {
          message_ty **l1 = XNMALLOC (mlp->nitems, message_ty *);
          size_t n1;
          message_ty **l2 = XNMALLOC (mlp->nitems, message_ty *);
          size_t n2;
          size_t j;

          /* Sort the non-obsolete messages into l1 and the obsolete messages
             into l2.  */
          n1 = 0;
          n2 = 0;
          for (j = 0; j < mlp->nitems; j++)
            {
              message_ty *mp = mlp->item[j];

              if (mp->obsolete)
                l2[n2++] = mp;
              else
                l1[n1++] = mp;
            }
          if (n1 > 0 && n2 > 0)
            {
              memcpy (mlp->item, l1, n1 * sizeof (message_ty *));
              memcpy (mlp->item + n1, l2, n2 * sizeof (message_ty *));
            }
          free (l2);
          free (l1);
        }
    }
}


/* Data structure representing the messages with known translations.
   They are composed of
     - A message list from def.po,
     - The compendiums.
   The data structure is optimized for exact and fuzzy searches.  */
typedef struct definitions_ty definitions_ty;
struct definitions_ty
{
  /* A list of message lists.  The first comes from def.po, the other ones
     from the compendiums.  Each message list has a built-in hash table,
     for speed when doing the exact searches.  */
  message_list_list_ty *lists;

  /* A fuzzy index of the current list of non-compendium messages, for speed
     when doing fuzzy searches.  Used only if use_fuzzy_matching is true.  */
  message_fuzzy_index_ty *curr_findex;
  /* A once-only execution guard for the initialization of the fuzzy index.
     Needed for OpenMP.  */
  gl_lock_define(, curr_findex_init_lock)

  /* A fuzzy index of the compendiums, for speed when doing fuzzy searches.
     Used only if use_fuzzy_matching is true and compendiums != NULL.  */
  message_fuzzy_index_ty *comp_findex;
  /* A once-only execution guard for the initialization of the fuzzy index.
     Needed for OpenMP.  */
  gl_lock_define(, comp_findex_init_lock)

  /* The canonical encoding of the definitions and the compendiums.
     Only used for fuzzy matching.  */
  const char *canon_charset;
};

static inline void
definitions_init (definitions_ty *definitions, const char *canon_charset)
{
  definitions->lists = message_list_list_alloc ();
  message_list_list_append (definitions->lists, NULL);
  if (compendiums != NULL)
    message_list_list_append_list (definitions->lists, compendiums);
  definitions->curr_findex = NULL;
  gl_lock_init (definitions->curr_findex_init_lock);
  definitions->comp_findex = NULL;
  gl_lock_init (definitions->comp_findex_init_lock);
  definitions->canon_charset = canon_charset;
}

/* Return the current list of non-compendium messages.  */
static inline message_list_ty *
definitions_current_list (const definitions_ty *definitions)
{
  return definitions->lists->item[0];
}

/* Set the current list of non-compendium messages.  */
static inline void
definitions_set_current_list (definitions_ty *definitions, message_list_ty *mlp)
{
  definitions->lists->item[0] = mlp;
  if (definitions->curr_findex != NULL)
    {
      message_fuzzy_index_free (definitions->curr_findex);
      definitions->curr_findex = NULL;
    }
}

/* Create the fuzzy index for the current list of non-compendium messages.
   Used only if use_fuzzy_matching is true.  */
static inline void
definitions_init_curr_findex (definitions_ty *definitions)
{
  /* Protect against concurrent execution.  */
  gl_lock_lock (definitions->curr_findex_init_lock);
  if (definitions->curr_findex == NULL)
    definitions->curr_findex =
      message_fuzzy_index_alloc (definitions_current_list (definitions),
                                 definitions->canon_charset);
  gl_lock_unlock (definitions->curr_findex_init_lock);
}

/* Create the fuzzy index for the compendium messages.
   Used only if use_fuzzy_matching is true and compendiums != NULL.  */
static inline void
definitions_init_comp_findex (definitions_ty *definitions)
{
  /* Protect against concurrent execution.  */
  gl_lock_lock (definitions->comp_findex_init_lock);
  if (definitions->comp_findex == NULL)
    {
      /* Combine all the compendium message lists into a single one.  Don't
         bother checking for duplicates.  */
      message_list_ty *all_compendium;
      size_t i;

      all_compendium = message_list_alloc (false);
      for (i = 0; i < compendiums->nitems; i++)
        {
          message_list_ty *mlp = compendiums->item[i];
          size_t j;

          for (j = 0; j < mlp->nitems; j++)
            message_list_append (all_compendium, mlp->item[j]);
        }

      /* Create the fuzzy index from it.  */
      definitions->comp_findex =
        message_fuzzy_index_alloc (all_compendium, definitions->canon_charset);
    }
  gl_lock_unlock (definitions->comp_findex_init_lock);
}

/* Exact search.  */
static inline message_ty *
definitions_search (const definitions_ty *definitions,
                    const char *msgctxt, const char *msgid)
{
  return message_list_list_search (definitions->lists, msgctxt, msgid);
}

/* Fuzzy search.
   Used only if use_fuzzy_matching is true.  */
static inline message_ty *
definitions_search_fuzzy (definitions_ty *definitions,
                          const char *msgctxt, const char *msgid)
{
  message_ty *mp1;

  if (false)
    {
      /* Old, slow code.  */
      mp1 =
        message_list_search_fuzzy (definitions_current_list (definitions),
                                   msgctxt, msgid);
    }
  else
    {
      /* Speedup through early abort in fstrcmp(), combined with pre-sorting
         of the messages through a hashed index.  */
      /* Create the fuzzy index lazily.  */
      if (definitions->curr_findex == NULL)
        definitions_init_curr_findex (definitions);
      mp1 = message_fuzzy_index_search (definitions->curr_findex,
                                        msgctxt, msgid,
                                        FUZZY_THRESHOLD, false);
    }

  if (compendiums != NULL)
    {
      double lower_bound_for_mp2;
      message_ty *mp2;

      lower_bound_for_mp2 =
        (mp1 != NULL
         ? fuzzy_search_goal_function (mp1, msgctxt, msgid, 0.0)
         : FUZZY_THRESHOLD);
      /* This lower bound must be >= FUZZY_THRESHOLD.  */
      if (!(lower_bound_for_mp2 >= FUZZY_THRESHOLD))
        abort ();

      /* Create the fuzzy index lazily.  */
      if (definitions->comp_findex == NULL)
        definitions_init_comp_findex (definitions);

      mp2 = message_fuzzy_index_search (definitions->comp_findex,
                                        msgctxt, msgid,
                                        lower_bound_for_mp2, true);

      /* Choose the best among mp1, mp2.  */
      if (mp1 == NULL
          || (mp2 != NULL
              && (fuzzy_search_goal_function (mp2, msgctxt, msgid,
                                              lower_bound_for_mp2)
                  > lower_bound_for_mp2)))
        mp1 = mp2;
    }

  return mp1;
}

static inline void
definitions_destroy (definitions_ty *definitions)
{
  message_list_list_free (definitions->lists, 2);
  if (definitions->curr_findex != NULL)
    message_fuzzy_index_free (definitions->curr_findex);
  if (definitions->comp_findex != NULL)
    message_fuzzy_index_free (definitions->comp_findex);
}


/* A silent error logger.  We are only interested in knowing whether errors
   occurred at all.  */
static void
silent_error_logger (const char *format, ...)
     __attribute__ ((__format__ (__printf__, 1, 2)));
static void
silent_error_logger (const char *format, ...)
{
}


/* Another silent error logger.  */
static void
silent_xerror (int severity,
               const struct message_ty *message,
               const char *filename, size_t lineno, size_t column,
               int multiline_p, const char *message_text)
{
}


static message_ty *
message_merge (message_ty *def, message_ty *ref, bool force_fuzzy,
               const struct plural_distribution *distribution)
{
  const char *msgstr;
  size_t msgstr_len;
  const char *prev_msgctxt;
  const char *prev_msgid;
  const char *prev_msgid_plural;
  message_ty *result;
  size_t j, i;

  /* Take the msgid from the reference.  When fuzzy matches are made,
     the definition will not be unique, but the reference will be -
     usually because it has only been slightly changed.  */

  /* Take the msgstr from the definition.  The msgstr of the reference
     is usually empty, as it was generated by xgettext.  If we currently
     process the header entry we have to merge the msgstr by using the
     Report-Msgid-Bugs-To and POT-Creation-Date fields from the reference.  */
  if (is_header (ref))
    {
      /* Oh, oh.  The header entry and we have something to fill in.  */
      static const struct
      {
        const char *name;
        size_t len;
      } known_fields[] =
      {
        { "Project-Id-Version:", sizeof ("Project-Id-Version:") - 1 },
#define PROJECT_ID              0
        { "Report-Msgid-Bugs-To:", sizeof ("Report-Msgid-Bugs-To:") - 1 },
#define REPORT_MSGID_BUGS_TO    1
        { "POT-Creation-Date:", sizeof ("POT-Creation-Date:") - 1 },
#define POT_CREATION_DATE       2
        { "PO-Revision-Date:", sizeof ("PO-Revision-Date:") - 1 },
#define PO_REVISION_DATE        3
        { "Last-Translator:", sizeof ("Last-Translator:") - 1 },
#define LAST_TRANSLATOR         4
        { "Language-Team:", sizeof ("Language-Team:") - 1 },
#define LANGUAGE_TEAM           5
        { "Language:", sizeof ("Language:") - 1 },
#define LANGUAGE                6
        { "MIME-Version:", sizeof ("MIME-Version:") - 1 },
#define MIME_VERSION            7
        { "Content-Type:", sizeof ("Content-Type:") - 1 },
#define CONTENT_TYPE            8
        { "Content-Transfer-Encoding:",
          sizeof ("Content-Transfer-Encoding:") - 1 }
#define CONTENT_TRANSFER        9
      };
#define UNKNOWN 10
      struct
      {
        const char *string;
        size_t len;
      } header_fields[UNKNOWN + 1];
      struct obstack pool;
      const char *cp;
      char *newp;
      size_t len, cnt;

      /* Clear all fields.  */
      memset (header_fields, '\0', sizeof (header_fields));

      /* Prepare a temporary memory pool.  */
      obstack_init (&pool);

      cp = def->msgstr;
      while (*cp != '\0')
        {
          const char *endp = strchr (cp, '\n');
          int terminated = endp != NULL;

          if (!terminated)
            {
              /* Add a trailing newline.  */
              char *copy;
              endp = strchr (cp, '\0');

              len = endp - cp + 1;

              copy = (char *) obstack_alloc (&pool, len + 1);
              stpcpy (stpcpy (copy, cp), "\n");
              cp = copy;
            }
          else
            {
              len = (endp - cp) + 1;
              ++endp;
            }

          /* Compare with any of the known fields.  */
          for (cnt = 0;
               cnt < sizeof (known_fields) / sizeof (known_fields[0]);
               ++cnt)
            if (c_strncasecmp (cp, known_fields[cnt].name, known_fields[cnt].len)
                == 0)
              break;

          if (cnt < sizeof (known_fields) / sizeof (known_fields[0]))
            {
              header_fields[cnt].string = &cp[known_fields[cnt].len];
              header_fields[cnt].len = len - known_fields[cnt].len;
            }
          else
            {
              /* It's an unknown field.  Append content to what is already
                 known.  */
              char *extended =
                (char *) obstack_alloc (&pool,
                                        header_fields[UNKNOWN].len + len + 1);
              if (header_fields[UNKNOWN].string)
                memcpy (extended, header_fields[UNKNOWN].string,
                        header_fields[UNKNOWN].len);
              memcpy (&extended[header_fields[UNKNOWN].len], cp, len);
              extended[header_fields[UNKNOWN].len + len] = '\0';
              header_fields[UNKNOWN].string = extended;
              header_fields[UNKNOWN].len += len;
            }

          cp = endp;
        }

      /* Set the Language field if specified on the command line.  */
      if (catalogname != NULL)
        {
          /* Prepend a space and append a newline.  */
          size_t len = strlen (catalogname);
          char *copy = (char *) obstack_alloc (&pool, 1 + len + 1 + 1);
          stpcpy (stpcpy (stpcpy (copy, " "), catalogname), "\n");
          header_fields[LANGUAGE].string = copy;
          header_fields[LANGUAGE].len = strlen (header_fields[LANGUAGE].string);
        }
      /* Add a Language field to PO files that don't have one.  The Language
         field was introduced in gettext-0.18.  */
      else if (header_fields[LANGUAGE].string == NULL)
        {
          const char *language_team_ptr = header_fields[LANGUAGE_TEAM].string;

          if (language_team_ptr != NULL)
            {
              size_t language_team_len = header_fields[LANGUAGE_TEAM].len;

              /* Trim leading blanks.  */
              while (language_team_len > 0
                     && (*language_team_ptr == ' '
                         || *language_team_ptr == '\t'))
                {
                  language_team_ptr++;
                  language_team_len--;
                }

              /* Trim trailing blanks.  */
              while (language_team_len > 0
                     && (language_team_ptr[language_team_len - 1] == ' '
                         || language_team_ptr[language_team_len - 1] == '\t'))
                language_team_len--;

              /* Trim last word, if it looks like an URL or email address.  */
              {
                size_t i;

                for (i = language_team_len; i > 0; i--)
                  if (language_team_ptr[i - 1] == ' '
                      || language_team_ptr[i - 1] == '\t')
                    break;
                /* The last word: language_team_ptr[i..language_team_len-1].  */
                if (i < language_team_len
                    && (language_team_ptr[i] == '<'
                        || language_team_ptr[language_team_len - 1] == '>'
                        || memchr (language_team_ptr, '@', language_team_len)
                           != NULL
                        || memchr (language_team_ptr, '/', language_team_len)
                           != NULL))
                  {
                    /* Trim last word and blanks before it.  */
                    while (i > 0
                           && (language_team_ptr[i - 1] == ' '
                               || language_team_ptr[i - 1] == '\t'))
                      i--;
                    language_team_len = i;
                  }
              }

              /* The rest of the Language-Team field should be the english name
                 of the languge.  Convert to ISO 639 and ISO 3166 syntax.  */
              {
                size_t i;

                for (i = 0; i < language_variant_table_size; i++)
                  if (strlen (language_variant_table[i].english)
                      == language_team_len
                      && memcmp (language_variant_table[i].english,
                                 language_team_ptr, language_team_len) == 0)
                    {
                      header_fields[LANGUAGE].string =
                        language_variant_table[i].code;
                      break;
                    }
              }
              if (header_fields[LANGUAGE].string == NULL)
                {
                  size_t i;

                  for (i = 0; i < language_table_size; i++)
                    if (strlen (language_table[i].english) == language_team_len
                        && memcmp (language_table[i].english,
                                   language_team_ptr, language_team_len) == 0)
                      {
                        header_fields[LANGUAGE].string = language_table[i].code;
                        break;
                      }
                }
              if (header_fields[LANGUAGE].string != NULL)
                {
                  /* Prepend a space and append a newline.  */
                  const char *str = header_fields[LANGUAGE].string;
                  size_t len = strlen (str);
                  char *copy = (char *) obstack_alloc (&pool, 1 + len + 1 + 1);
                  stpcpy (stpcpy (stpcpy (copy, " "), str), "\n");
                  header_fields[LANGUAGE].string = copy;
                }
              else
                header_fields[LANGUAGE].string = " \n";
              header_fields[LANGUAGE].len =
                strlen (header_fields[LANGUAGE].string);
            }
        }

      {
        const char *msgid_bugs_ptr;

        msgid_bugs_ptr = c_strstr (ref->msgstr, "Report-Msgid-Bugs-To:");
        if (msgid_bugs_ptr != NULL)
          {
            size_t msgid_bugs_len;
            const char *endp;

            msgid_bugs_ptr += sizeof ("Report-Msgid-Bugs-To:") - 1;

            endp = strchr (msgid_bugs_ptr, '\n');
            if (endp == NULL)
              {
                /* Add a trailing newline.  */
                char *extended;
                endp = strchr (msgid_bugs_ptr, '\0');
                msgid_bugs_len = (endp - msgid_bugs_ptr) + 1;
                extended = (char *) obstack_alloc (&pool, msgid_bugs_len + 1);
                stpcpy (stpcpy (extended, msgid_bugs_ptr), "\n");
                msgid_bugs_ptr = extended;
              }
            else
              msgid_bugs_len = (endp - msgid_bugs_ptr) + 1;

            header_fields[REPORT_MSGID_BUGS_TO].string = msgid_bugs_ptr;
            header_fields[REPORT_MSGID_BUGS_TO].len = msgid_bugs_len;
          }
      }

      {
        const char *pot_date_ptr;

        pot_date_ptr = c_strstr (ref->msgstr, "POT-Creation-Date:");
        if (pot_date_ptr != NULL)
          {
            size_t pot_date_len;
            const char *endp;

            pot_date_ptr += sizeof ("POT-Creation-Date:") - 1;

            endp = strchr (pot_date_ptr, '\n');
            if (endp == NULL)
              {
                /* Add a trailing newline.  */
                char *extended;
                endp = strchr (pot_date_ptr, '\0');
                pot_date_len = (endp - pot_date_ptr) + 1;
                extended = (char *) obstack_alloc (&pool, pot_date_len + 1);
                stpcpy (stpcpy (extended, pot_date_ptr), "\n");
                pot_date_ptr = extended;
              }
            else
              pot_date_len = (endp - pot_date_ptr) + 1;

            header_fields[POT_CREATION_DATE].string = pot_date_ptr;
            header_fields[POT_CREATION_DATE].len = pot_date_len;
          }
      }

      /* Concatenate all the various fields.  */
      len = 0;
      for (cnt = 0; cnt < UNKNOWN; ++cnt)
        if (header_fields[cnt].string != NULL)
          len += known_fields[cnt].len + header_fields[cnt].len;
      len += header_fields[UNKNOWN].len;

      cp = newp = XNMALLOC (len + 1, char);
      newp[len] = '\0';

#define IF_FILLED(idx)                                                        \
      if (header_fields[idx].string)                                          \
        newp = stpncpy (stpcpy (newp, known_fields[idx].name),                \
                        header_fields[idx].string, header_fields[idx].len)

      IF_FILLED (PROJECT_ID);
      IF_FILLED (REPORT_MSGID_BUGS_TO);
      IF_FILLED (POT_CREATION_DATE);
      IF_FILLED (PO_REVISION_DATE);
      IF_FILLED (LAST_TRANSLATOR);
      IF_FILLED (LANGUAGE_TEAM);
      IF_FILLED (LANGUAGE);
      IF_FILLED (MIME_VERSION);
      IF_FILLED (CONTENT_TYPE);
      IF_FILLED (CONTENT_TRANSFER);
      if (header_fields[UNKNOWN].string != NULL)
        stpcpy (newp, header_fields[UNKNOWN].string);

#undef IF_FILLED

      /* Free the temporary memory pool.  */
      obstack_free (&pool, NULL);

      msgstr = cp;
      msgstr_len = strlen (cp) + 1;

      prev_msgctxt = NULL;
      prev_msgid = NULL;
      prev_msgid_plural = NULL;
    }
  else
    {
      msgstr = def->msgstr;
      msgstr_len = def->msgstr_len;

      if (def->is_fuzzy)
        {
          prev_msgctxt = def->prev_msgctxt;
          prev_msgid = def->prev_msgid;
          prev_msgid_plural = def->prev_msgid_plural;
        }
      else
        {
          prev_msgctxt = def->msgctxt;
          prev_msgid = def->msgid;
          prev_msgid_plural = def->msgid_plural;
        }
    }

  result = message_alloc (ref->msgctxt != NULL ? xstrdup (ref->msgctxt) : NULL,
                          xstrdup (ref->msgid), ref->msgid_plural,
                          msgstr, msgstr_len, &def->pos);

  /* Take the comments from the definition file.  There will be none at
     all in the reference file, as it was generated by xgettext.  */
  if (def->comment)
    for (j = 0; j < def->comment->nitems; ++j)
      message_comment_append (result, def->comment->item[j]);

  /* Take the dot comments from the reference file, as they are
     generated by xgettext.  Any in the definition file are old ones
     collected by previous runs of xgettext and msgmerge.  */
  if (ref->comment_dot)
    for (j = 0; j < ref->comment_dot->nitems; ++j)
      message_comment_dot_append (result, ref->comment_dot->item[j]);

  /* The flags are mixed in a special way.  Some informations come
     from the reference message (such as format/no-format), others
     come from the definition file (fuzzy or not).  */
  result->is_fuzzy = def->is_fuzzy | force_fuzzy;

  /* If ref and def have the same msgid but different msgid_plural, it's
     a reason to mark the result fuzzy.  */
  if (!result->is_fuzzy
      && (ref->msgid_plural != NULL
          ? def->msgid_plural == NULL
            || strcmp (ref->msgid_plural, def->msgid_plural) != 0
          : def->msgid_plural != NULL))
    result->is_fuzzy = true;

  for (i = 0; i < NFORMATS; i++)
    {
      result->is_format[i] = ref->is_format[i];

      /* If the reference message is marked as being a format specifier,
         but the definition message is not, we check if the resulting
         message would pass "msgfmt -c".  If yes, then all is fine.  If
         not, we add a fuzzy marker, because
         1. the message needs the translator's attention,
         2. msgmerge must not transform a PO file which passes "msgfmt -c"
            into a PO file which doesn't.  */
      if (!result->is_fuzzy
          && possible_format_p (ref->is_format[i])
          && !possible_format_p (def->is_format[i])
          && check_msgid_msgstr_format_i (ref->msgid, ref->msgid_plural,
                                          msgstr, msgstr_len, i, ref->range,
                                          distribution, silent_error_logger)
             > 0)
        result->is_fuzzy = true;
    }

  result->range = ref->range;
  /* If the definition message was assuming a certain range, but the reference
     message does not specify a range any more or specifies a range that is
     not the same or a subset, we add a fuzzy marker, because
       1. the message needs the translator's attention,
       2. msgmerge must not transform a PO file which passes "msgfmt -c"
          into a PO file which doesn't.  */
  if (!result->is_fuzzy
      && has_range_p (def->range)
      && !(has_range_p (ref->range)
           && ref->range.min >= def->range.min
           && ref->range.max <= def->range.max))
    result->is_fuzzy = true;

  result->do_wrap = ref->do_wrap;

  for (i = 0; i < NSYNTAXCHECKS; i++)
    result->do_syntax_check[i] = ref->do_syntax_check[i];

  /* Insert previous msgid, commented out with "#|".
     Do so only when --previous is specified, for backward compatibility.
     Since the "previous msgid" represents the original msgid that led to
     the current msgstr,
       - we can omit it if the resulting message is not fuzzy or is
         untranslated (but do this in a later pass, since result->is_fuzzy
         is not finalized at this point),
       - otherwise, if the corresponding message from the definition file
         was translated (not fuzzy), we use that message's msgid,
       - otherwise, we use that message's prev_msgid.  */
  if (keep_previous)
    {
      result->prev_msgctxt = prev_msgctxt;
      result->prev_msgid = prev_msgid;
      result->prev_msgid_plural = prev_msgid_plural;
    }

  /* If the reference message was obsolete, make the resulting message
     obsolete.  This case doesn't occur for POT files, but users sometimes
     use PO files that are themselves the result of msgmerge instead of POT
     files.  */
  result->obsolete = ref->obsolete;

  /* Take the file position comments from the reference file, as they
     are generated by xgettext.  Any in the definition file are old ones
     collected by previous runs of xgettext and msgmerge.  */
  for (j = 0; j < ref->filepos_count; ++j)
    {
      lex_pos_ty *pp = &ref->filepos[j];
      message_comment_filepos (result, pp->file_name, pp->line_number);
    }

  /* Special postprocessing is needed if the reference message is a
     plural form and the definition message isn't, or vice versa.  */
  if (ref->msgid_plural != NULL)
    {
      if (def->msgid_plural == NULL)
        result->used = 1;
    }
  else
    {
      if (def->msgid_plural != NULL)
        result->used = 2;
    }

  /* All done, return the merged message to the caller.  */
  return result;
}


#define DOT_FREQUENCY 10

static void
match_domain (const char *fn1, const char *fn2,
              definitions_ty *definitions, message_list_ty *refmlp,
              message_list_ty *resultmlp,
              struct statistics *stats, unsigned int *processed)
{
  message_ty *header_entry;
  unsigned long int nplurals;
  const struct expression *plural_expr;
  char *untranslated_plural_msgstr;
  struct plural_distribution distribution;
  struct search_result { message_ty *found; bool fuzzy; } *search_results;
  size_t j;

  header_entry =
    message_list_search (definitions_current_list (definitions), NULL, "");
  extract_plural_expression (header_entry ? header_entry->msgstr : NULL,
                             &plural_expr, &nplurals);
  untranslated_plural_msgstr = XNMALLOC (nplurals, char);
  memset (untranslated_plural_msgstr, '\0', nplurals);

  /* Determine the plural distribution of the plural_expr formula.  */
  {
    /* Disable error output temporarily.  */
    void (*old_po_xerror) (int, const struct message_ty *, const char *, size_t,
                           size_t, int, const char *)
      = po_xerror;
    po_xerror = silent_xerror;

    if (check_plural_eval (plural_expr, nplurals, header_entry,
                           &distribution) > 0)
      {
        distribution.expr = NULL;
        distribution.often = NULL;
        distribution.often_length = 0;
        distribution.histogram = NULL;
      }

    po_xerror = old_po_xerror;
  }

  /* Most of the time is spent in definitions_search_fuzzy.
     Perform it in a separate loop that can be parallelized by an OpenMP
     capable compiler.  */
  search_results = XNMALLOC (refmlp->nitems, struct search_result);
  {
    long int nn = refmlp->nitems;
    long int jj;

    /* Tell the OpenMP capable compiler to distribute this loop across
       several threads.  The schedule is dynamic, because for some messages
       the loop body can be executed very quickly, whereas for others it takes
       a long time.
       Note: The Sun Workshop 6.2 C compiler does not allow a space between
       '#' and 'pragma'.  */
    #ifdef _OPENMP
     #pragma omp parallel for schedule(dynamic)
    #endif
    for (jj = 0; jj < nn; jj++)
      {
        message_ty *refmsg = refmlp->item[jj];
        message_ty *defmsg;

        /* Because merging can take a while we print something to signal
           we are not dead.  */
        if (!quiet && verbosity_level <= 1 && *processed % DOT_FREQUENCY == 0)
          fputc ('.', stderr);
        #ifdef _OPENMP
         #pragma omp atomic
        #endif
        (*processed)++;

        /* See if it is in the other file.  */
        defmsg =
          definitions_search (definitions, refmsg->msgctxt, refmsg->msgid);
        if (defmsg != NULL)
          {
            search_results[jj].found = defmsg;
            search_results[jj].fuzzy = false;
          }
        else if (!is_header (refmsg)
                 /* If the message was not defined at all, try to find a very
                    similar message, it could be a typo, or the suggestion may
                    help.  */
                 && use_fuzzy_matching
                 && ((defmsg =
                        definitions_search_fuzzy (definitions,
                                                  refmsg->msgctxt,
                                                  refmsg->msgid)) != NULL))
          {
            search_results[jj].found = defmsg;
            search_results[jj].fuzzy = true;
          }
        else
          search_results[jj].found = NULL;
      }
  }

  for (j = 0; j < refmlp->nitems; j++)
    {
      message_ty *refmsg = refmlp->item[j];

      /* See if it is in the other file.
         This used definitions_search.  */
      if (search_results[j].found != NULL && !search_results[j].fuzzy)
        {
          message_ty *defmsg = search_results[j].found;
          /* Merge the reference with the definition: take the #. and
             #: comments from the reference, take the # comments from
             the definition, take the msgstr from the definition.  Add
             this merged entry to the output message list.  */
          message_ty *mp =
            message_merge (defmsg, refmsg, false, &distribution);

          /* When producing output for msgfmt, omit messages that are
             untranslated or fuzzy (except the header entry).  */
          if (!(for_msgfmt
                && (mp->msgstr[0] == '\0' /* untranslated? */
                    || (mp->is_fuzzy && !is_header (mp))))) /* fuzzy? */
            {
              message_list_append (resultmlp, mp);

              /* Remember that this message has been used, when we scan
                 later to see if anything was omitted.  */
              defmsg->used = 1;
            }

          stats->merged++;
        }
      else if (!is_header (refmsg))
        {
          /* If the message was not defined at all, try to find a very
             similar message, it could be a typo, or the suggestion may
             help.  This search assumed use_fuzzy_matching and used
             definitions_search_fuzzy.  */
          if (search_results[j].found != NULL && search_results[j].fuzzy)
            {
              message_ty *defmsg = search_results[j].found;
              message_ty *mp;

              if (verbosity_level > 1)
                {
                  po_gram_error_at_line (&refmsg->pos,
                                         _("this message is used but not defined..."));
                  error_message_count--;
                  po_gram_error_at_line (&defmsg->pos,
                                         _("...but this definition is similar"));
                }

              /* Merge the reference with the definition: take the #. and
                 #: comments from the reference, take the # comments from
                 the definition, take the msgstr from the definition.  Add
                 this merged entry to the output message list.  */
              mp = message_merge (defmsg, refmsg, true, &distribution);

              message_list_append (resultmlp, mp);

              /* Remember that this message has been used, when we scan
                 later to see if anything was omitted.  */
              defmsg->used = 1;

              stats->fuzzied++;
              if (!quiet && verbosity_level <= 1)
                /* Always print a dot if we handled a fuzzy match.  */
                fputc ('.', stderr);
            }
          else
            {
              message_ty *mp;
              bool is_untranslated;
              const char *p;
              const char *pend;

              if (verbosity_level > 1)
                po_gram_error_at_line (&refmsg->pos,
                                       _("this message is used but not defined in %s"),
                                       fn1);

              mp = message_copy (refmsg);

              /* Test if mp is untranslated.  (It most likely is.)  */
              is_untranslated = true;
              for (p = mp->msgstr, pend = p + mp->msgstr_len; p < pend; p++)
                if (*p != '\0')
                  {
                    is_untranslated = false;
                    break;
                  }

              if (mp->msgid_plural != NULL && is_untranslated)
                {
                  /* Change mp->msgstr_len consecutive empty strings into
                     nplurals consecutive empty strings.  */
                  if (nplurals > mp->msgstr_len)
                    mp->msgstr = untranslated_plural_msgstr;
                  mp->msgstr_len = nplurals;
                }

              /* When producing output for msgfmt, omit messages that are
                 untranslated or fuzzy (except the header entry).  */
              if (!(for_msgfmt && (is_untranslated || mp->is_fuzzy)))
                {
                  message_list_append (resultmlp, mp);
                }

              stats->missing++;
            }
        }
    }

  free (search_results);

  /* Now postprocess the problematic merges.  This is needed because we
     want the result to pass the "msgfmt -c -v" check.  */
  {
    /* message_merge sets mp->used to 1 or 2, depending on the problem.
       Compute the bitwise OR of all these.  */
    int problematic = 0;

    for (j = 0; j < resultmlp->nitems; j++)
      problematic |= resultmlp->item[j]->used;

    if (problematic)
      {
        unsigned long int nplurals = 0;

        if (problematic & 1)
          {
            /* Need to know nplurals of the result domain.  */
            message_ty *header_entry =
              message_list_search (resultmlp, NULL, "");

            nplurals = get_plural_count (header_entry
                                         ? header_entry->msgstr
                                         : NULL);
          }

        for (j = 0; j < resultmlp->nitems; j++)
          {
            message_ty *mp = resultmlp->item[j];

            if ((mp->used & 1) && (nplurals > 0))
              {
                /* ref->msgid_plural != NULL but def->msgid_plural == NULL.
                   Use a copy of def->msgstr for each possible plural form.  */
                size_t new_msgstr_len;
                char *new_msgstr;
                char *p;
                unsigned long i;

                if (verbosity_level > 1)
                  po_gram_error_at_line (&mp->pos,
                                         _("this message should define plural forms"));

                new_msgstr_len = nplurals * mp->msgstr_len;
                new_msgstr = XNMALLOC (new_msgstr_len, char);
                for (i = 0, p = new_msgstr; i < nplurals; i++)
                  {
                    memcpy (p, mp->msgstr, mp->msgstr_len);
                    p += mp->msgstr_len;
                  }
                mp->msgstr = new_msgstr;
                mp->msgstr_len = new_msgstr_len;
                mp->is_fuzzy = true;
              }

            if ((mp->used & 2) && (mp->msgstr_len > strlen (mp->msgstr) + 1))
              {
                /* ref->msgid_plural == NULL but def->msgid_plural != NULL.
                   Use only the first among the plural forms.  */

                if (verbosity_level > 1)
                  po_gram_error_at_line (&mp->pos,
                                         _("this message should not define plural forms"));

                mp->msgstr_len = strlen (mp->msgstr) + 1;
                mp->is_fuzzy = true;
              }

            /* Postprocessing of this message is done.  */
            mp->used = 0;
          }
      }
  }

  /* Now that mp->is_fuzzy is finalized for all messages, remove the
     "previous msgid" information from all messages that are not fuzzy or
     are untranslated.  */
  for (j = 0; j < resultmlp->nitems; j++)
    {
      message_ty *mp = resultmlp->item[j];

      if (!mp->is_fuzzy || mp->msgstr[0] == '\0')
        {
          mp->prev_msgctxt = NULL;
          mp->prev_msgid = NULL;
          mp->prev_msgid_plural = NULL;
        }
    }
}

static msgdomain_list_ty *
merge (const char *fn1, const char *fn2, catalog_input_format_ty input_syntax,
       msgdomain_list_ty **defp)
{
  msgdomain_list_ty *def;
  msgdomain_list_ty *ref;
  size_t j, k;
  unsigned int processed;
  struct statistics stats;
  msgdomain_list_ty *result;
  const char *def_canon_charset;
  definitions_ty definitions;
  message_list_ty *empty_list;

  stats.merged = stats.fuzzied = stats.missing = stats.obsolete = 0;

  /* This is the definitions file, created by a human.  */
  def = read_catalog_file (fn1, input_syntax);

  /* This is the references file, created by groping the sources with
     the xgettext program.  */
  ref = read_catalog_file (fn2, input_syntax);
  /* Add a dummy header entry, if the references file contains none.  */
  for (k = 0; k < ref->nitems; k++)
    if (message_list_search (ref->item[k]->messages, NULL, "") == NULL)
      {
        static lex_pos_ty pos = { __FILE__, __LINE__ };
        message_ty *refheader = message_alloc (NULL, "", NULL, "", 1, &pos);

        message_list_prepend (ref->item[k]->messages, refheader);
      }

  /* The references file can be either in ASCII or in UTF-8.  If it is
     in UTF-8, we have to convert the definitions and the compendiums to
     UTF-8 as well.  */
  {
    bool was_utf8 = false;
    for (k = 0; k < ref->nitems; k++)
      {
        message_list_ty *mlp = ref->item[k]->messages;

        for (j = 0; j < mlp->nitems; j++)
          if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
            {
              const char *header = mlp->item[j]->msgstr;

              if (header != NULL)
                {
                  const char *charsetstr = c_strstr (header, "charset=");

                  if (charsetstr != NULL)
                    {
                      size_t len;

                      charsetstr += strlen ("charset=");
                      len = strcspn (charsetstr, " \t\n");
                      if (len == strlen ("UTF-8")
                          && c_strncasecmp (charsetstr, "UTF-8", len) == 0)
                        was_utf8 = true;
                    }
                }
            }
        }
    if (was_utf8)
      {
        def = iconv_msgdomain_list (def, "UTF-8", true, fn1);
        if (compendiums != NULL)
          for (k = 0; k < compendiums->nitems; k++)
            iconv_message_list (compendiums->item[k], NULL, po_charset_utf8,
                                compendium_filenames->item[k]);
      }
    else if (compendiums != NULL && compendiums->nitems > 0)
      {
        /* Ensure that the definitions and the compendiums are in the same
           encoding.  Prefer the encoding of the definitions file, if
           possible; otherwise, if the definitions file is empty and the
           compendiums are all in the same encoding, use that encoding;
           otherwise, use UTF-8.  */
        bool conversion_done = false;
        {
          char *charset = NULL;

          /* Get the encoding of the definitions file.  */
          for (k = 0; k < def->nitems; k++)
            {
              message_list_ty *mlp = def->item[k]->messages;

              for (j = 0; j < mlp->nitems; j++)
                if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
                  {
                    const char *header = mlp->item[j]->msgstr;

                    if (header != NULL)
                      {
                        const char *charsetstr = c_strstr (header, "charset=");

                        if (charsetstr != NULL)
                          {
                            size_t len;

                            charsetstr += strlen ("charset=");
                            len = strcspn (charsetstr, " \t\n");
                            charset = (char *) xmalloca (len + 1);
                            memcpy (charset, charsetstr, len);
                            charset[len] = '\0';
                            break;
                          }
                      }
                  }
              if (charset != NULL)
                break;
            }
          if (charset != NULL)
            {
              const char *canon_charset = po_charset_canonicalize (charset);

              if (canon_charset != NULL)
                {
                  bool all_compendiums_iconvable = true;

                  if (compendiums != NULL)
                    for (k = 0; k < compendiums->nitems; k++)
                      if (!is_message_list_iconvable (compendiums->item[k],
                                                      NULL, canon_charset))
                        {
                          all_compendiums_iconvable = false;
                          break;
                        }

                  if (all_compendiums_iconvable)
                    {
                      /* Convert the compendiums to def's encoding.  */
                      if (compendiums != NULL)
                        for (k = 0; k < compendiums->nitems; k++)
                          iconv_message_list (compendiums->item[k],
                                              NULL, canon_charset,
                                              compendium_filenames->item[k]);
                      conversion_done = true;
                    }
                }
              freea (charset);
            }
        }
        if (!conversion_done)
          {
            if (def->nitems == 0
                || (def->nitems == 1 && def->item[0]->messages->nitems == 0))
              {
                /* The definitions file is empty.
                   Compare the encodings of the compendiums.  */
                const char *common_canon_charset = NULL;

                for (k = 0; k < compendiums->nitems; k++)
                  {
                    message_list_ty *mlp = compendiums->item[k];
                    char *charset = NULL;
                    const char *canon_charset = NULL;

                    for (j = 0; j < mlp->nitems; j++)
                      if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
                        {
                          const char *header = mlp->item[j]->msgstr;

                          if (header != NULL)
                            {
                              const char *charsetstr =
                                c_strstr (header, "charset=");

                              if (charsetstr != NULL)
                                {
                                  size_t len;

                                  charsetstr += strlen ("charset=");
                                  len = strcspn (charsetstr, " \t\n");
                                  charset = (char *) xmalloca (len + 1);
                                  memcpy (charset, charsetstr, len);
                                  charset[len] = '\0';

                                  break;
                                }
                            }
                        }
                    if (charset != NULL)
                      {
                        canon_charset = po_charset_canonicalize (charset);
                        freea (charset);
                      }
                    /* If no charset declaration was found in this file,
                       or if it is not a valid encoding name, or if it
                       differs from the common charset found so far,
                       we have no common charset.  */
                    if (canon_charset == NULL
                        || (common_canon_charset != NULL
                            && canon_charset != common_canon_charset))
                      {
                        common_canon_charset = NULL;
                        break;
                      }
                    common_canon_charset = canon_charset;
                  }

                if (common_canon_charset != NULL)
                  /* No conversion needed in this case.  */
                  conversion_done = true;
              }
            if (!conversion_done)
              {
                /* It's too hairy to find out what would be the optimal target
                   encoding.  So, convert everything to UTF-8.  */
                def = iconv_msgdomain_list (def, "UTF-8", true, fn1);
                if (compendiums != NULL)
                  for (k = 0; k < compendiums->nitems; k++)
                    iconv_message_list (compendiums->item[k],
                                        NULL, po_charset_utf8,
                                        compendium_filenames->item[k]);
              }
          }
      }
  }

  /* Determine canonicalized encoding name of the definitions now, after
     conversion.  Only used for fuzzy matching.  */
  if (use_fuzzy_matching)
    {
      def_canon_charset = def->encoding;
      if (def_canon_charset == NULL)
        {
          char *charset = NULL;

          /* Get the encoding of the definitions file.  */
          for (k = 0; k < def->nitems; k++)
            {
              message_list_ty *mlp = def->item[k]->messages;

              for (j = 0; j < mlp->nitems; j++)
                if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
                  {
                    const char *header = mlp->item[j]->msgstr;

                    if (header != NULL)
                      {
                        const char *charsetstr = c_strstr (header, "charset=");

                        if (charsetstr != NULL)
                          {
                            size_t len;

                            charsetstr += strlen ("charset=");
                            len = strcspn (charsetstr, " \t\n");
                            charset = (char *) xmalloca (len + 1);
                            memcpy (charset, charsetstr, len);
                            charset[len] = '\0';
                            break;
                          }
                      }
                  }
              if (charset != NULL)
                break;
            }
          if (charset != NULL)
            def_canon_charset = po_charset_canonicalize (charset);
          if (def_canon_charset == NULL)
            /* Unspecified encoding.  Assume unibyte encoding.  */
            def_canon_charset = po_charset_ascii;
        }
    }
  else
    def_canon_charset = NULL;

  /* Initialize and preprocess the total set of message definitions.  */
  definitions_init (&definitions, def_canon_charset);
  empty_list = message_list_alloc (false);

  result = msgdomain_list_alloc (false);
  processed = 0;

  /* Every reference must be matched with its definition. */
  if (!multi_domain_mode)
    for (k = 0; k < ref->nitems; k++)
      {
        const char *domain = ref->item[k]->domain;
        message_list_ty *refmlp = ref->item[k]->messages;
        message_list_ty *resultmlp =
          msgdomain_list_sublist (result, domain, true);
        message_list_ty *defmlp;

        defmlp = msgdomain_list_sublist (def, domain, false);
        if (defmlp == NULL)
          defmlp = empty_list;
        definitions_set_current_list (&definitions, defmlp);

        match_domain (fn1, fn2, &definitions, refmlp, resultmlp,
                      &stats, &processed);
      }
  else
    {
      /* Apply the references messages in the default domain to each of
         the definition domains.  */
      message_list_ty *refmlp = ref->item[0]->messages;

      for (k = 0; k < def->nitems; k++)
        {
          const char *domain = def->item[k]->domain;
          message_list_ty *defmlp = def->item[k]->messages;

          /* Ignore the default message domain if it has no messages.  */
          if (k > 0 || defmlp->nitems > 0)
            {
              message_list_ty *resultmlp =
                msgdomain_list_sublist (result, domain, true);

              definitions_set_current_list (&definitions, defmlp);

              match_domain (fn1, fn2, &definitions, refmlp, resultmlp,
                            &stats, &processed);
            }
        }
    }

  definitions_destroy (&definitions);

  if (!for_msgfmt)
    {
      /* Look for messages in the definition file, which are not present
         in the reference file, indicating messages which defined but not
         used in the program.  Don't scan the compendium(s).  */
      for (k = 0; k < def->nitems; ++k)
        {
          const char *domain = def->item[k]->domain;
          message_list_ty *defmlp = def->item[k]->messages;

          for (j = 0; j < defmlp->nitems; j++)
            {
              message_ty *defmsg = defmlp->item[j];

              if (!defmsg->used)
                {
                  /* Remember the old translation although it is not used anymore.
                     But we mark it as obsolete.  */
                  message_ty *mp;

                  mp = message_copy (defmsg);
                  /* Clear the extracted comments.  */
                  if (mp->comment_dot != NULL)
                    {
                      string_list_free (mp->comment_dot);
                      mp->comment_dot = NULL;
                    }
                  /* Clear the file position comments.  */
                  if (mp->filepos != NULL)
                    {
                      size_t i;

                      for (i = 0; i < mp->filepos_count; i++)
                        free ((char *) mp->filepos[i].file_name);
                      mp->filepos_count = 0;
                      free (mp->filepos);
                      mp->filepos = NULL;
                    }
                  /* Mark as obsolete.   */
                  mp->obsolete = true;

                  message_list_append (msgdomain_list_sublist (result, domain, true),
                                       mp);
                  stats.obsolete++;
                }
            }
        }
    }

  /* Determine the known a-priori encoding, if any.  */
  if (def->encoding == ref->encoding)
    result->encoding = def->encoding;

  /* Report some statistics.  */
  if (verbosity_level > 0)
    fprintf (stderr, _("%s\
Read %ld old + %ld reference, \
merged %ld, fuzzied %ld, missing %ld, obsolete %ld.\n"),
             !quiet && verbosity_level <= 1 ? "\n" : "",
             (long) def->nitems, (long) ref->nitems,
             (long) stats.merged, (long) stats.fuzzied, (long) stats.missing,
             (long) stats.obsolete);
  else if (!quiet)
    fputs (_(" done.\n"), stderr);

  /* Return results.  */
  *defp = def;
  return result;
}