1 /*@z43.c:Language Service:LanguageChange, LanguageString@*********************/
2 /*                                                                           */
3 /*  THE LOUT DOCUMENT FORMATTING SYSTEM (VERSION 3.39)                       */
4 /*  COPYRIGHT (C) 1991, 2008 Jeffrey H. Kingston                             */
5 /*                                                                           */
6 /*  Jeffrey H. Kingston (jeff@it.usyd.edu.au)                                */
7 /*  School of Information Technologies                                       */
8 /*  The University of Sydney 2006                                            */
9 /*  AUSTRALIA                                                                */
10 /*                                                                           */
11 /*  This program is free software; you can redistribute it and/or modify     */
12 /*  it under the terms of the GNU General Public License as published by     */
13 /*  the Free Software Foundation; either Version 3, or (at your option)      */
14 /*  any later version.                                                       */
15 /*                                                                           */
16 /*  This program is distributed in the hope that it will be useful,          */
17 /*  but WITHOUT ANY WARRANTY; without even the implied warranty of           */
18 /*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            */
19 /*  GNU General Public License for more details.                             */
20 /*                                                                           */
21 /*  You should have received a copy of the GNU General Public License        */
22 /*  along with this program; if not, write to the Free Software              */
23 /*  Foundation, Inc., 59 Temple Place, Suite 330, Boston MA 02111-1307 USA   */
24 /*                                                                           */
25 /*  FILE:         z43.c                                                      */
26 /*  MODULE:       Language Service                                           */
27 /*  EXTERNS:      LanguageInit(), LanguageDefine(), LanguageChange(),        */
28 /*                LanguageString(), LanguageHyph()                           */
29 /*                                                                           */
30 /*****************************************************************************/
31 #include "externs.h"
32 #define INIT_LANGUAGE_NUM	100
33 
34 
35 /*****************************************************************************/
36 /*                                                                           */
37 /*  LANGUAGE_TABLE                                                           */
38 /*                                                                           */
39 /*  A symbol table permitting access to language name records.               */
40 /*  The table will automatically enlarge to accept any number of entries.    */
41 /*                                                                           */
42 /*     ltab_new(newsize)         New empty table, newsize capacity           */
43 /*     ltab_insert(x, &S)        Insert new language name object x into S    */
44 /*     ltab_retrieve(str, S)     Retrieve language name object named str     */
45 /*     ltab_debug(S, fp)         Debug print of table S to file fp           */
46 /*                                                                           */
47 /*****************************************************************************/
48 
49 typedef struct
50 { int langtab_size;				/* size of table             */
51   int langtab_count;				/* number of objects held    */
52   OBJECT langtab_item[1];
53 } *LANGUAGE_TABLE;
54 
55 #define	ltab_size(S)	(S)->langtab_size
56 #define	ltab_count(S)	(S)->langtab_count
57 #define	ltab_item(S, i)	(S)->langtab_item[i]
58 
59 #define hash(pos, str, S)						\
60 { FULL_CHAR *p = str;							\
61   pos = *p++;								\
62   while( *p ) pos += *p++;						\
63   pos = pos % ltab_size(S);						\
64 }
65 
ltab_new(int newsize)66 static LANGUAGE_TABLE ltab_new(int newsize)
67 { LANGUAGE_TABLE S;  int i;
68   ifdebug(DMA, D, DebugRegisterUsage(MEM_LANG_TAB, 1,
69     2*sizeof(int) + newsize * sizeof(OBJECT)));
70   S = (LANGUAGE_TABLE)
71 	  malloc(2*sizeof(int) + newsize * sizeof(OBJECT));
72   if( S == (LANGUAGE_TABLE) NULL )
73     Error(43, 1, "run out of memory enlarging language table", FATAL, no_fpos);
74   ltab_size(S) = newsize;
75   ltab_count(S) = 0;
76   for( i = 0;  i < newsize;  i++ )  ltab_item(S, i) = nilobj;
77   return S;
78 } /* end ltab_new */
79 
80 static void ltab_insert(OBJECT x, LANGUAGE_TABLE *S);
81 
ltab_rehash(LANGUAGE_TABLE S,int newsize)82 static LANGUAGE_TABLE ltab_rehash(LANGUAGE_TABLE S, int newsize)
83 { LANGUAGE_TABLE NewS;  int i;
84   NewS = ltab_new(newsize);
85   for( i = 1;  i <= ltab_size(S);  i++ )
86   { if( ltab_item(S, i) != nilobj )
87       ltab_insert(ltab_item(S, i), &NewS);
88   }
89   free(S);
90   return NewS;
91 } /* end ltab_rehash */
92 
ltab_insert(OBJECT x,LANGUAGE_TABLE * S)93 static void ltab_insert(OBJECT x, LANGUAGE_TABLE *S)
94 { int pos;  OBJECT z, link, y;
95   if( ltab_count(*S) == ltab_size(*S) - 1 )	/* one less since 0 unused */
96     *S = ltab_rehash(*S, 2*ltab_size(*S));
97   hash(pos, string(x), *S);
98   if( ltab_item(*S, pos) == nilobj )  New(ltab_item(*S, pos), ACAT);
99   z = ltab_item(*S, pos);
100   for( link = Down(z);  link != z;  link = NextDown(link) )
101   { Child(y, link);
102     if( StringEqual(string(x), string(y)) )
103     { Error(43, 2, "language name %s used twice (first at%s)",
104 	FATAL, &fpos(x), string(x), EchoFilePos(&fpos(y)));
105     }
106   }
107   Link(ltab_item(*S, pos), x);
108 } /* end ltab_insert */
109 
ltab_retrieve(FULL_CHAR * str,LANGUAGE_TABLE S)110 static OBJECT ltab_retrieve(FULL_CHAR *str, LANGUAGE_TABLE S)
111 { OBJECT x, link, y;  int pos;
112   hash(pos, str, S);
113   x = ltab_item(S, pos);
114   if( x == nilobj )  return nilobj;
115   for( link = Down(x);  link != x;  link = NextDown(link) )
116   { Child(y, link);
117     if( StringEqual(str, string(y)) )  return y;
118   }
119   return nilobj;
120 } /* end ltab_retrieve */
121 
122 #if DEBUG_ON
ltab_debug(LANGUAGE_TABLE S,FILE * fp)123 static void ltab_debug(LANGUAGE_TABLE S, FILE *fp)
124 { int i;  OBJECT x, link, y;
125   fprintf(fp, "  table size: %d;  current number of keys: %d%s",
126     ltab_size(S), ltab_count(S), STR_NEWLINE);
127   for( i = 0;  i < ltab_size(S);  i++ )
128   { x = ltab_item(S, i);
129     fprintf(fp, "ltab_item(S, %d) =", i);
130     if( x == nilobj )
131       fprintf(fp, " <nilobj>");
132     else if( type(x) != ACAT )
133       fprintf(fp, " not ACAT!");
134     else for( link = Down(x);  link != x;  link = NextDown(link) )
135     { Child(y, link);
136       fprintf(fp, " %s",
137 	is_word(type(y)) ? string(y) : AsciiToFull("not-WORD!"));
138     }
139     fprintf(fp, "%s", STR_NEWLINE);
140   }
141 } /* end ltab_debug */
142 #endif
143 
144 
145 static	LANGUAGE_TABLE	names_tab;		/* the language names        */
146 static	OBJECT		*hyph_tab;		/* arry of hyph filenames    */
147 static	OBJECT		*canonical_tab;		/* array of lang names       */
148 static	int		lang_tabsize;		/* size of prev two arrays   */
149 static	int		lang_count;		/* number of languages       */
150 static	OBJECT		lang_ends[MAX_LANGUAGE];/* sentence endings        */
151 
152 /*@@**************************************************************************/
153 /*                                                                           */
154 /*  BOOLEAN LanguageSentenceEnds[]                                           */
155 /*                                                                           */
156 /*  LanguageSentenceEnds[ch] is TRUE if there exists a language in which     */
157 /*  character ch could occur at the end of a sentence.                       */
158 /*                                                                           */
159 /*****************************************************************************/
160 
161 BOOLEAN LanguageSentenceEnds[MAX_CHARS];
162 
163 
164 /*@::LanguageInit(), LanguageDefine()@****************************************/
165 /*                                                                           */
166 /*  LanguageInit()                                                           */
167 /*                                                                           */
168 /*  Initialize this module.                                                  */
169 /*                                                                           */
170 /*****************************************************************************/
171 
LanguageInit(void)172 void LanguageInit(void)
173 { int i;
174   debug0(DLS, D, "LanguageInit()");
175   names_tab = ltab_new(INIT_LANGUAGE_NUM);
176   lang_count = 0;
177   lang_tabsize = INIT_LANGUAGE_NUM;
178   ifdebug(DMA, D, DebugRegisterUsage(MEM_LANG_TAB, 0,
179     INIT_LANGUAGE_NUM * sizeof(OBJECT)));
180   hyph_tab = (OBJECT *) malloc(INIT_LANGUAGE_NUM * sizeof(OBJECT));
181   ifdebug(DMA, D, DebugRegisterUsage(MEM_LANG_TAB, 0,
182     INIT_LANGUAGE_NUM * sizeof(OBJECT)));
183   canonical_tab = (OBJECT *) malloc(INIT_LANGUAGE_NUM * sizeof(OBJECT));
184   for( i = 0;  i < MAX_CHARS;  i++ )  LanguageSentenceEnds[i] = FALSE;
185   debug0(DLS, D, "LanguageInit returning.");
186 } /* end LanguageInit */
187 
188 
189 /*****************************************************************************/
190 /*                                                                           */
191 /*  LanguageDefine(names, inside)                                            */
192 /*                                                                           */
193 /*  Define a language whose names are given by ACAT of words names, and      */
194 /*  whose associated hyphenation patterns file name is hyph_file.            */
195 /*                                                                           */
196 /*****************************************************************************/
197 
LanguageDefine(OBJECT names,OBJECT inside)198 void LanguageDefine(OBJECT names, OBJECT inside)
199 { OBJECT link, y, hyph_file;  BOOLEAN junk;  FULL_CHAR ch;
200   int len;
201   assert( names != nilobj && type(names) == ACAT, "LanguageDefine: names!");
202   assert( Down(names) != names, "LanguageDefine: names is empty!");
203   debug2(DLS, D, "LanguageDefine(%s, %s)",
204     EchoObject(names), EchoObject(inside));
205 
206   /* double table size if overflow */
207   if( ++lang_count >= lang_tabsize )
208   {
209     ifdebug(DMA, D, DebugRegisterUsage(MEM_LANG_TAB, 0,
210       -lang_tabsize * sizeof(OBJECT)));
211     ifdebug(DMA, D, DebugRegisterUsage(MEM_LANG_TAB, 0,
212       -lang_tabsize * sizeof(OBJECT)));
213     lang_tabsize *= 2;
214     ifdebug(DMA, D, DebugRegisterUsage(MEM_LANG_TAB, 0,
215       lang_tabsize * sizeof(OBJECT)));
216     ifdebug(DMA, D, DebugRegisterUsage(MEM_LANG_TAB, 0,
217       lang_tabsize * sizeof(OBJECT)));
218     hyph_tab = (OBJECT *) realloc(hyph_tab, lang_tabsize * sizeof(OBJECT) );
219     canonical_tab = (OBJECT *) realloc(canonical_tab, lang_tabsize * sizeof(OBJECT) );
220   }
221 
222   /* insert each language name into names_tab */
223   for( link = Down(names);  link != names;  link = NextDown(link) )
224   { Child(y, link);
225     assert( is_word(type(y)), "LanguageDefine: type(y) != WORD!" );
226     word_language(y) = lang_count;
227     ltab_insert(y, &names_tab);
228   }
229 
230   /* initialize canonical language name entry */
231   Child(y, Down(names));
232   canonical_tab[lang_count] = y;
233 
234   /* make inside an ACAT if it isn't already */
235   if( type(inside) != ACAT )
236   { New(y, ACAT);
237     FposCopy(fpos(y), fpos(inside));
238     Link(y, inside);
239     inside = y;
240   }
241 
242   /* initialize hyphenation file entry (first child of inside) */
243   Child(hyph_file, Down(inside));
244   DeleteLink(Down(inside));
245   if( !is_word(type(hyph_file)) )
246     Error(43, 3, "hyphenation file name expected here",
247       FATAL, &fpos(inside));
248   if( StringEqual(string(hyph_file), STR_EMPTY) ||
249       StringEqual(string(hyph_file), STR_HYPHEN) )
250   { Dispose(hyph_file);
251     hyph_tab[lang_count] = nilobj;
252   }
253   else hyph_tab[lang_count] = hyph_file;
254 
255   /* initialize sentence ends */
256   lang_ends[lang_count] = inside;
257   for( link = Down(inside);  link != inside;  link = NextDown(link) )
258   { Child(y, link);
259     if( type(y) == GAP_OBJ )
260     { link = PrevDown(link);
261       DisposeChild(NextDown(link));
262       continue;
263     }
264     if( !is_word(type(y)) )
265     { debug2(DLS, D, "word patterns failing on %s %s", Image(type(y)),
266 	EchoObject(y));
267       Error(43, 4, "expected word ending pattern here", FATAL, &fpos(y));
268     }
269     len = StringLength(string(y));
270     if( len == 0 )
271       Error(43, 5, "empty word ending pattern", FATAL, &fpos(y));
272     ch = string(y)[len - 1];
273     LanguageSentenceEnds[ch] = TRUE;
274   }
275 
276   /* if initializing run, initialize the hyphenation table */
277   if( InitializeAll )
278   { if( hyph_tab[lang_count] != nilobj )
279     junk = ReadHyphTable(lang_count);
280   }
281 
282   debug0(DLS, D, "LanguageDefine returning.");
283 } /* end LanguageDefine */
284 
285 
286 /*****************************************************************************/
287 /*                                                                           */
288 /*  BOOLEAN LanguageWordEndsSentence(OBJECT wd, BOOLEAN lc_prec)             */
289 /*                                                                           */
290 /*  Returns TRUE if word ends a sentence in the current language.  This is   */
291 /*  so if it ends with a string in the list associated with the current      */
292 /*  language.  If lc_prec is TRUE, it is also necessary for the character    */
293 /*  preceding this suffix to be lower-case.                                  */
294 /*                                                                           */
295 /*****************************************************************************/
296 
LanguageWordEndsSentence(OBJECT wd,BOOLEAN lc_prec)297 BOOLEAN LanguageWordEndsSentence(OBJECT wd, BOOLEAN lc_prec)
298 { OBJECT x, y, link;  int pos;
299   assert( is_word(type(wd)), "LanguageWordEndsSentence: wd!" );
300   debug2(DLS, D, "LanguageWordEndsSentence(%d %s)",
301     word_language(wd), EchoObject(wd));
302   x = lang_ends[word_language(wd)];
303   for( link = Down(x);  link != x;  link = NextDown(link) )
304   { Child(y, link);
305     if( StringEndsWith(string(wd), string(y)) )
306     {
307       if( !lc_prec )
308       { debug0(DLS, D, "LanguageWordEndsSentence returning TRUE (!lc_prec)");
309         return TRUE;
310       }
311 
312       /* now check whether the preceding character is lower case */
313       pos = StringLength(string(wd)) - StringLength(string(y)) - 1;
314       if( pos >= 0 &&
315 	MapIsLowerCase(string(wd)[pos], FontMapping(word_font(wd), &fpos(wd))))
316       {
317         debug0(DLS, D, "LanguageWordEndsSentence returning TRUE (!lc_prec)");
318         return TRUE;
319       }
320     }
321   }
322   debug0(DLS, D, "LanguageWordEndsSentence returning FALSE");
323   return FALSE;
324 } /* end LanguageWordEndsSentence */
325 
326 
327 /*@::LanguageChange(), LanguageString(), LanguageHyph()@**********************/
328 /*                                                                           */
329 /*  LanguageChange(style, x)                                                 */
330 /*                                                                           */
331 /*  Change the current style to contain the language of language command x.  */
332 /*                                                                           */
333 /*****************************************************************************/
334 
LanguageChange(STYLE * style,OBJECT x)335 void LanguageChange(STYLE *style, OBJECT x)
336 { OBJECT lname;
337   debug2(DLS, D, "LanguageChange(%s, %s)", EchoStyle(style), EchoObject(x));
338 
339   /* if argument is not a word, fail and exit */
340   if( !is_word(type(x)) )
341   { Error(43, 6, "%s ignored (illegal left parameter)", WARN, &fpos(x),
342       KW_LANGUAGE);
343     debug0(DLS, D, "LanguageChange returning (language unchanged)");
344     return;
345   }
346 
347   /* if argument is empty, return unchanged */
348   if( StringEqual(string(x), STR_EMPTY) )
349   { debug0(DLS, D, "LanguageChange returning (empty, language unchanged)");
350     return;
351   }
352 
353   /* retrieve language record if present, else leave style unchanged */
354   lname = ltab_retrieve(string(x), names_tab);
355   if( lname == nilobj )
356     Error(43, 7, "%s ignored (unknown language %s)", WARN, &fpos(x),
357       KW_LANGUAGE, string(x));
358   else language(*style) = word_language(lname);
359 
360   debug1(DLS, D, "LanguageChange returning (language = %s)", string(lname));
361   ifdebug(DLS, DD, ltab_debug(names_tab, stderr));
362 } /* LanguageChange */
363 
364 
365 /*****************************************************************************/
366 /*                                                                           */
367 /*  FULL_CHAR *LanguageString(lnum)                                          */
368 /*                                                                           */
369 /*  Return the canonical name of language lnum.                              */
370 /*                                                                           */
371 /*****************************************************************************/
372 
LanguageString(LANGUAGE_NUM lnum)373 FULL_CHAR *LanguageString(LANGUAGE_NUM lnum)
374 { FULL_CHAR *res;
375   debug1(DLS, D, "LanguageString(%d)", lnum);
376   assert( lnum > 0 && lnum <= lang_count, "LanguageString: unknown number" );
377 
378   res = string(canonical_tab[lnum]);
379 
380   debug1(DLS, D, "LanguageString returning %s", res);
381   return res;
382 } /* end LanguageString */
383 
384 
385 /*****************************************************************************/
386 /*                                                                           */
387 /*  OBJECT LanguageHyph(lnum)                                                */
388 /*                                                                           */
389 /*  Return the hyphenation file name object for language lnum.               */
390 /*                                                                           */
391 /*****************************************************************************/
392 
LanguageHyph(LANGUAGE_NUM lnum)393 OBJECT LanguageHyph(LANGUAGE_NUM lnum)
394 { OBJECT res;
395   debug1(DLS, D, "LanguageHyph(%d)", lnum);
396   assert( lnum > 0 && lnum <= lang_count, "LanguageHyph: unknown number" );
397 
398   res = hyph_tab[lnum];
399 
400   debug1(DLS, D, "LanguageHyph returning %s", EchoObject(res));
401   return res;
402 } /* end LanguageHyph */
403