xref: /openbsd/gnu/usr.bin/texinfo/makeinfo/lang.c (revision 78b63d65)
1 /* lang.c -- language depend behaviour (startpoint)
2    $Id: lang.c,v 1.1.1.1 2000/02/09 01:25:20 espie Exp $
3 
4    Copyright (C) 1999 Free Software Foundation, Inc.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software
18    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 
20    Written by Karl Heinz Marbaise <kama@hippo.fido.de>.  */
21 
22 #include "system.h"
23 #include "cmds.h"
24 #include "lang.h"
25 #include "makeinfo.h"
26 
27 /* Current document encoding.  */
28 char *document_encoding = NULL;
29 
30 /* Current language code; default is English.  */
31 language_code_type language_code = en;
32 
33 language_struct language_table[] = {
34   { aa, "aa", "Afar" },
35   { ab, "ab", "Abkhazian" },
36   { af, "af", "Afrikaans" },
37   { am, "am", "Amharic" },
38   { ar, "ar", "Arabic" },
39   { as, "as", "Assamese" },
40   { ay, "ay", "Aymara" },
41   { az, "az", "Azerbaijani" },
42   { ba, "ba", "Bashkir" },
43   { be, "be", "Byelorussian" },
44   { bg, "bg", "Bulgarian" },
45   { bh, "bh", "Bihari" },
46   { bi, "bi", "Bislama" },
47   { bn, "bn", "Bengali; Bangla" },
48   { bo, "bo", "Tibetan" },
49   { br, "br", "Breton" },
50   { ca, "ca", "Catalan" },
51   { co, "co", "Corsican" },
52   { cs, "cs", "Czech" },
53   { cy, "cy", "Welsh" },
54   { da, "da", "Danish" },
55   { de, "de", "German" },
56   { dz, "dz", "Bhutani" },
57   { el, "el", "Greek" },
58   { en, "en", "English" },
59   { eo, "eo", "Esperanto" },
60   { es, "es", "Spanish" },
61   { et, "et", "Estonian" },
62   { eu, "eu", "Basque" },
63   { fa, "fa", "Persian" },
64   { fi, "fi", "Finnish" },
65   { fj, "fj", "Fiji" },
66   { fo, "fo", "Faroese" },
67   { fr, "fr", "French" },
68   { fy, "fy", "Frisian" },
69   { ga, "ga", "Irish" },
70   { gd, "gd", "Scots Gaelic" },
71   { gl, "gl", "Galician" },
72   { gn, "gn", "Guarani" },
73   { gu, "gu", "Gujarati" },
74   { ha, "ha", "Hausa" },
75   { he, "he", "Hebrew" } /* (formerly iw) */,
76   { hi, "hi", "Hindi" },
77   { hr, "hr", "Croatian" },
78   { hu, "hu", "Hungarian" },
79   { hy, "hy", "Armenian" },
80   { ia, "ia", "Interlingua" },
81   { id, "id", "Indonesian" } /* (formerly in) */,
82   { ie, "ie", "Interlingue" },
83   { ik, "ik", "Inupiak" },
84   { is, "is", "Icelandic" },
85   { it, "it", "Italian" },
86   { iu, "iu", "Inuktitut" },
87   { ja, "ja", "Japanese" },
88   { jw, "jw", "Javanese" },
89   { ka, "ka", "Georgian" },
90   { kk, "kk", "Kazakh" },
91   { kl, "kl", "Greenlandic" },
92   { km, "km", "Cambodian" },
93   { kn, "kn", "Kannada" },
94   { ko, "ko", "Korean" },
95   { ks, "ks", "Kashmiri" },
96   { ku, "ku", "Kurdish" },
97   { ky, "ky", "Kirghiz" },
98   { la, "la", "Latin" },
99   { ln, "ln", "Lingala" },
100   { lo, "lo", "Laothian" },
101   { lt, "lt", "Lithuanian" },
102   { lv, "lv", "Latvian, Lettish" },
103   { mg, "mg", "Malagasy" },
104   { mi, "mi", "Maori" },
105   { mk, "mk", "Macedonian" },
106   { ml, "ml", "Malayalam" },
107   { mn, "mn", "Mongolian" },
108   { mo, "mo", "Moldavian" },
109   { mr, "mr", "Marathi" },
110   { ms, "ms", "Malay" },
111   { mt, "mt", "Maltese" },
112   { my, "my", "Burmese" },
113   { na, "na", "Nauru" },
114   { ne, "ne", "Nepali" },
115   { nl, "nl", "Dutch" },
116   { no, "no", "Norwegian" },
117   { oc, "oc", "Occitan" },
118   { om, "om", "(Afan) Oromo" },
119   { or, "or", "Oriya" },
120   { pa, "pa", "Punjabi" },
121   { pl, "pl", "Polish" },
122   { ps, "ps", "Pashto, Pushto" },
123   { pt, "pt", "Portuguese" },
124   { qu, "qu", "Quechua" },
125   { rm, "rm", "Rhaeto-Romance" },
126   { rn, "rn", "Kirundi" },
127   { ro, "ro", "Romanian" },
128   { ru, "ru", "Russian" },
129   { rw, "rw", "Kinyarwanda" },
130   { sa, "sa", "Sanskrit" },
131   { sd, "sd", "Sindhi" },
132   { sg, "sg", "Sangro" },
133   { sh, "sh", "Serbo-Croatian" },
134   { si, "si", "Sinhalese" },
135   { sk, "sk", "Slovak" },
136   { sl, "sl", "Slovenian" },
137   { sm, "sm", "Samoan" },
138   { sn, "sn", "Shona" },
139   { so, "so", "Somali" },
140   { sq, "sq", "Albanian" },
141   { sr, "sr", "Serbian" },
142   { ss, "ss", "Siswati" },
143   { st, "st", "Sesotho" },
144   { su, "su", "Sundanese" },
145   { sv, "sv", "Swedish" },
146   { sw, "sw", "Swahili" },
147   { ta, "ta", "Tamil" },
148   { te, "te", "Telugu" },
149   { tg, "tg", "Tajik" },
150   { th, "th", "Thai" },
151   { ti, "ti", "Tigrinya" },
152   { tk, "tk", "Turkmen" },
153   { tl, "tl", "Tagalog" },
154   { tn, "tn", "Setswana" },
155   { to, "to", "Tonga" },
156   { tr, "tr", "Turkish" },
157   { ts, "ts", "Tsonga" },
158   { tt, "tt", "Tatar" },
159   { tw, "tw", "Twi" },
160   { ug, "ug", "Uighur" },
161   { uk, "uk", "Ukrainian" },
162   { ur, "ur", "Urdu" },
163   { uz, "uz", "Uzbek" },
164   { vi, "vi", "Vietnamese" },
165   { vo, "vo", "Volapuk" },
166   { wo, "wo", "Wolof" },
167   { xh, "xh", "Xhosa" },
168   { yi, "yi", "Yiddish" } /* (formerly ji) */,
169   { yo, "yo", "Yoruba" },
170   { za, "za", "Zhuang" },
171   { zh, "zh", "Chinese" },
172   { zu, "zu", "Zulu" },
173   { last_language_code, NULL, NULL }
174 };
175 
176 /* @documentlanguage.  Maybe we'll do something useful with this in the
177    future.  For now, we just recognize it.  */
178 void
179 cm_documentlanguage ()
180 {
181   language_code_type c;
182   char *lang_arg;
183 
184   /* Read the line with the language code on it.  */
185   get_rest_of_line (1, &lang_arg);
186 
187   /* Linear search is fine these days.  */
188   for (c = aa; c != last_language_code; c++)
189     {
190       if (strcmp (lang_arg, language_table[c].abbrev) == 0)
191         { /* Set current language code.  */
192           language_code = c;
193           break;
194         }
195     }
196 
197   /* If we didn't find this code, complain.  */
198   if (c == last_language_code)
199     warning (_("%s is not a valid ISO 639 language code"), lang_arg);
200 
201   free (lang_arg);
202 }
203 
204 
205 
206 /* @documentencoding.  Set global.  */
207 void
208 cm_documentencoding ()
209 {
210   get_rest_of_line (1, &document_encoding);
211 }
212 
213 
214 
215 /* Accent commands that take explicit arguments and don't have any
216    special HTML support.  */
217 
218 void
219 cm_accent (arg)
220     int arg;
221 {
222   if (arg == START)
223     {
224       /* Must come first to avoid ambiguity with overdot.  */
225       if (strcmp (command, "udotaccent") == 0)      /* underdot */
226         add_char ('.');
227     }
228   else if (arg == END)
229     {
230       if (strcmp (command, "=") == 0)               /* macron */
231         add_word (html ? "&macr;" : "=");
232       else if (strcmp (command, "H") == 0)          /* Hungarian umlaut */
233         add_word ("''");
234       else if (strcmp (command, "dotaccent") == 0)  /* overdot */
235         add_meta_char ('.');
236       else if (strcmp (command, "ringaccent") == 0) /* ring */
237         add_char ('*');
238       else if (strcmp (command, "tieaccent") == 0)  /* long tie */
239         add_char ('[');
240       else if (strcmp (command, "u") == 0)          /* breve */
241         add_char ('(');
242       else if (strcmp (command, "ubaraccent") == 0) /* underbar */
243         add_char ('_');
244       else if (strcmp (command, "v") == 0)          /* hacek/check */
245         add_word (html ? "&lt;" : "<");
246     }
247 }
248 
249 /* Common routine for the accent characters that have support in HTML.
250    If the character being accented is in the HTML_SUPPORTED set, then
251    produce &CHTML_SOLO;, for example, &Auml; for an A-umlaut.  If not in
252    HTML_SUPPORTED, just produce &HTML_SOLO;X for the best we can do with
253    at an X-umlaut.  Finally, if not producing HTML, just use SINGLE, a
254    character such as " which is the best plain text representation we
255    can manage.  If HTML_SOLO_STANDALONE is zero the given HTML_SOLO
256    does not exist as valid standalone character in HTML.  */
257 
258 static void
259 cm_accent_generic (arg, start, end, html_supported, single,
260                    html_solo_standalone, html_solo)
261      int arg, start, end;
262      char *html_supported;
263      int single;
264      int html_solo_standalone;
265      char *html_solo;
266 {
267   if (html)
268     {
269       static int valid_html_accent;
270 
271       if (arg == START)
272 	{ /* If HTML has good support for this character, use it.  */
273 	  if (strchr (html_supported, curchar ()))
274 	    { /* Yes; start with an ampersand.  The character itself
275 	         will be added later in read_command (makeinfo.c).  */
276 	      add_char ('&');
277               valid_html_accent = 1;
278             }
279 	  else
280 	    { /* No special HTML support, so produce standalone char.  */
281 	      valid_html_accent = 0;
282 	      if (html_solo_standalone)
283 		{
284 		  add_char ('&');
285 		  add_word (html_solo);
286 		  add_char (';');
287 		}
288 	      else
289 		/* If the html_solo does not exist as standalone character
290 		   (namely &circ; &grave; &tilde;), then we use
291  		   the single character version instead.  */
292                 add_char (single);
293 	    }
294 	}
295       else if (arg == END)
296 	{ /* Only if we saw a valid_html_accent can we use the full
297 	     HTML accent (umlaut, grave ...).  */
298 	  if (valid_html_accent)
299 	    {
300 	      add_word (html_solo);
301 	      add_char (';');
302 	    }
303 	}
304     }
305   else if (arg == END)
306     { /* Not producing HTML, so just use the normal character.  */
307       add_char (single);
308     }
309 }
310 
311 void
312 cm_accent_umlaut (arg, start, end)
313      int arg, start, end;
314 {
315   cm_accent_generic (arg, start, end, "aouAOUEeIiy", '"', 1, "uml");
316 }
317 
318 void
319 cm_accent_acute (arg, start, end)
320      int arg, start, end;
321 {
322   cm_accent_generic (arg, start, end, "AEIOUYaeiouy", '\'', 1, "acute");
323 }
324 
325 void
326 cm_accent_cedilla (arg, start, end)
327      int arg, start, end;
328 {
329   cm_accent_generic (arg, start, end, "Cc", ',', 1, "cedil");
330 }
331 
332 void
333 cm_accent_hat (arg, start, end)
334      int arg, start, end;
335 {
336   cm_accent_generic (arg, start, end, "AEIOUaeiou", '^', 0, "circ");
337 }
338 
339 void
340 cm_accent_grave (arg, start, end)
341      int arg, start, end;
342 {
343   cm_accent_generic (arg, start, end, "AEIOUaeiou", '`', 0, "grave");
344 }
345 
346 void
347 cm_accent_tilde (arg, start, end)
348      int arg, start, end;
349 {
350   cm_accent_generic (arg, start, end, "AOano", '~', 0, "tilde");
351 }
352 
353 
354 
355 /* Non-English letters/characters that don't insert themselves.  */
356 void
357 cm_special_char (arg)
358 {
359   if (arg == START)
360     {
361       if ((*command == 'L' || *command == 'l'
362            || *command == 'O' || *command == 'o')
363           && command[1] == 0)
364         { /* Lslash lslash Oslash oslash.
365              Lslash and lslash aren't supported in HTML.  */
366           if (html && (command[0] == 'O' || command[0] == 'o'))
367             add_word_args ("&%cslash;", command[0]);
368           else
369             add_word_args ("/%c", command[0]);
370         }
371       else if (strcmp (command, "exclamdown") == 0)
372         add_word (html ? "&iexcl;" : "!");
373       else if (strcmp (command, "pounds") == 0)
374         add_word (html ? "&pound;" : "#");
375       else if (strcmp (command, "questiondown") == 0)
376         add_word (html ? "&iquest;" : "?");
377       else if (strcmp (command, "AE") == 0)
378         add_word (html ? "&AElig;" : command);
379       else if (strcmp (command, "ae") == 0)
380         add_word (html ? "&aelig;" : command);
381       else if (strcmp (command, "OE") == 0)
382         add_word (html ? "&#140;" : command);
383       else if (strcmp (command, "oe") == 0)
384         add_word (html ? "&#156;" : command);
385       else if (strcmp (command, "AA") == 0)
386         add_word (html ? "&Aring;" : command);
387       else if (strcmp (command, "aa") == 0)
388         add_word (html ? "&aring;" : command);
389       else if (strcmp (command, "ss") == 0)
390         add_word (html ? "&szlig;" : command);
391       else
392         line_error ("cm_special_char internal error: command=@%s", command);
393     }
394 }
395 
396 /* Dotless i or j.  */
397 void
398 cm_dotless (arg, start, end)
399     int arg, start, end;
400 {
401   if (arg == END)
402     {
403       if (output_paragraph[start] != 'i' && output_paragraph[start] != 'j')
404         /* This error message isn't perfect if the argument is multiple
405            characters, but it doesn't seem worth getting right.  */
406         line_error (_("%c%s expects `i' or `j' as argument, not `%c'"),
407                     COMMAND_PREFIX, command, output_paragraph[start]);
408 
409       else if (end - start != 1)
410         line_error (_("%c%s expects a single character `i' or `j' as argument"),
411                     COMMAND_PREFIX, command);
412 
413       /* We've already inserted the `i' or `j', so nothing to do.  */
414     }
415 }
416