1 /*@z43.c:Language Service:LanguageChange, LanguageString@*********************/
2 /* */
3 /* THE LOUT DOCUMENT FORMATTING SYSTEM (VERSION 3.39) */
4 /* COPYRIGHT (C) 1991, 2008 Jeffrey H. Kingston */
5 /* */
6 /* Jeffrey H. Kingston (jeff@it.usyd.edu.au) */
7 /* School of Information Technologies */
8 /* The University of Sydney 2006 */
9 /* AUSTRALIA */
10 /* */
11 /* This program is free software; you can redistribute it and/or modify */
12 /* it under the terms of the GNU General Public License as published by */
13 /* the Free Software Foundation; either Version 3, or (at your option) */
14 /* any later version. */
15 /* */
16 /* This program is distributed in the hope that it will be useful, */
17 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
18 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
19 /* GNU General Public License for more details. */
20 /* */
21 /* You should have received a copy of the GNU General Public License */
22 /* along with this program; if not, write to the Free Software */
23 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston MA 02111-1307 USA */
24 /* */
25 /* FILE: z43.c */
26 /* MODULE: Language Service */
27 /* EXTERNS: LanguageInit(), LanguageDefine(), LanguageChange(), */
28 /* LanguageString(), LanguageHyph() */
29 /* */
30 /*****************************************************************************/
31 #include "externs.h"
32 #define INIT_LANGUAGE_NUM 100
33
34
35 /*****************************************************************************/
36 /* */
37 /* LANGUAGE_TABLE */
38 /* */
39 /* A symbol table permitting access to language name records. */
40 /* The table will automatically enlarge to accept any number of entries. */
41 /* */
42 /* ltab_new(newsize) New empty table, newsize capacity */
43 /* ltab_insert(x, &S) Insert new language name object x into S */
44 /* ltab_retrieve(str, S) Retrieve language name object named str */
45 /* ltab_debug(S, fp) Debug print of table S to file fp */
46 /* */
47 /*****************************************************************************/
48
49 typedef struct
50 { int langtab_size; /* size of table */
51 int langtab_count; /* number of objects held */
52 OBJECT langtab_item[1];
53 } *LANGUAGE_TABLE;
54
55 #define ltab_size(S) (S)->langtab_size
56 #define ltab_count(S) (S)->langtab_count
57 #define ltab_item(S, i) (S)->langtab_item[i]
58
59 #define hash(pos, str, S) \
60 { FULL_CHAR *p = str; \
61 pos = *p++; \
62 while( *p ) pos += *p++; \
63 pos = pos % ltab_size(S); \
64 }
65
ltab_new(int newsize)66 static LANGUAGE_TABLE ltab_new(int newsize)
67 { LANGUAGE_TABLE S; int i;
68 ifdebug(DMA, D, DebugRegisterUsage(MEM_LANG_TAB, 1,
69 2*sizeof(int) + newsize * sizeof(OBJECT)));
70 S = (LANGUAGE_TABLE)
71 malloc(2*sizeof(int) + newsize * sizeof(OBJECT));
72 if( S == (LANGUAGE_TABLE) NULL )
73 Error(43, 1, "run out of memory enlarging language table", FATAL, no_fpos);
74 ltab_size(S) = newsize;
75 ltab_count(S) = 0;
76 for( i = 0; i < newsize; i++ ) ltab_item(S, i) = nilobj;
77 return S;
78 } /* end ltab_new */
79
80 static void ltab_insert(OBJECT x, LANGUAGE_TABLE *S);
81
ltab_rehash(LANGUAGE_TABLE S,int newsize)82 static LANGUAGE_TABLE ltab_rehash(LANGUAGE_TABLE S, int newsize)
83 { LANGUAGE_TABLE NewS; int i;
84 NewS = ltab_new(newsize);
85 for( i = 1; i <= ltab_size(S); i++ )
86 { if( ltab_item(S, i) != nilobj )
87 ltab_insert(ltab_item(S, i), &NewS);
88 }
89 free(S);
90 return NewS;
91 } /* end ltab_rehash */
92
ltab_insert(OBJECT x,LANGUAGE_TABLE * S)93 static void ltab_insert(OBJECT x, LANGUAGE_TABLE *S)
94 { int pos; OBJECT z, link, y;
95 if( ltab_count(*S) == ltab_size(*S) - 1 ) /* one less since 0 unused */
96 *S = ltab_rehash(*S, 2*ltab_size(*S));
97 hash(pos, string(x), *S);
98 if( ltab_item(*S, pos) == nilobj ) New(ltab_item(*S, pos), ACAT);
99 z = ltab_item(*S, pos);
100 for( link = Down(z); link != z; link = NextDown(link) )
101 { Child(y, link);
102 if( StringEqual(string(x), string(y)) )
103 { Error(43, 2, "language name %s used twice (first at%s)",
104 FATAL, &fpos(x), string(x), EchoFilePos(&fpos(y)));
105 }
106 }
107 Link(ltab_item(*S, pos), x);
108 } /* end ltab_insert */
109
ltab_retrieve(FULL_CHAR * str,LANGUAGE_TABLE S)110 static OBJECT ltab_retrieve(FULL_CHAR *str, LANGUAGE_TABLE S)
111 { OBJECT x, link, y; int pos;
112 hash(pos, str, S);
113 x = ltab_item(S, pos);
114 if( x == nilobj ) return nilobj;
115 for( link = Down(x); link != x; link = NextDown(link) )
116 { Child(y, link);
117 if( StringEqual(str, string(y)) ) return y;
118 }
119 return nilobj;
120 } /* end ltab_retrieve */
121
122 #if DEBUG_ON
ltab_debug(LANGUAGE_TABLE S,FILE * fp)123 static void ltab_debug(LANGUAGE_TABLE S, FILE *fp)
124 { int i; OBJECT x, link, y;
125 fprintf(fp, " table size: %d; current number of keys: %d%s",
126 ltab_size(S), ltab_count(S), STR_NEWLINE);
127 for( i = 0; i < ltab_size(S); i++ )
128 { x = ltab_item(S, i);
129 fprintf(fp, "ltab_item(S, %d) =", i);
130 if( x == nilobj )
131 fprintf(fp, " <nilobj>");
132 else if( type(x) != ACAT )
133 fprintf(fp, " not ACAT!");
134 else for( link = Down(x); link != x; link = NextDown(link) )
135 { Child(y, link);
136 fprintf(fp, " %s",
137 is_word(type(y)) ? string(y) : AsciiToFull("not-WORD!"));
138 }
139 fprintf(fp, "%s", STR_NEWLINE);
140 }
141 } /* end ltab_debug */
142 #endif
143
144
145 static LANGUAGE_TABLE names_tab; /* the language names */
146 static OBJECT *hyph_tab; /* arry of hyph filenames */
147 static OBJECT *canonical_tab; /* array of lang names */
148 static int lang_tabsize; /* size of prev two arrays */
149 static int lang_count; /* number of languages */
150 static OBJECT lang_ends[MAX_LANGUAGE];/* sentence endings */
151
152 /*@@**************************************************************************/
153 /* */
154 /* BOOLEAN LanguageSentenceEnds[] */
155 /* */
156 /* LanguageSentenceEnds[ch] is TRUE if there exists a language in which */
157 /* character ch could occur at the end of a sentence. */
158 /* */
159 /*****************************************************************************/
160
161 BOOLEAN LanguageSentenceEnds[MAX_CHARS];
162
163
164 /*@::LanguageInit(), LanguageDefine()@****************************************/
165 /* */
166 /* LanguageInit() */
167 /* */
168 /* Initialize this module. */
169 /* */
170 /*****************************************************************************/
171
LanguageInit(void)172 void LanguageInit(void)
173 { int i;
174 debug0(DLS, D, "LanguageInit()");
175 names_tab = ltab_new(INIT_LANGUAGE_NUM);
176 lang_count = 0;
177 lang_tabsize = INIT_LANGUAGE_NUM;
178 ifdebug(DMA, D, DebugRegisterUsage(MEM_LANG_TAB, 0,
179 INIT_LANGUAGE_NUM * sizeof(OBJECT)));
180 hyph_tab = (OBJECT *) malloc(INIT_LANGUAGE_NUM * sizeof(OBJECT));
181 ifdebug(DMA, D, DebugRegisterUsage(MEM_LANG_TAB, 0,
182 INIT_LANGUAGE_NUM * sizeof(OBJECT)));
183 canonical_tab = (OBJECT *) malloc(INIT_LANGUAGE_NUM * sizeof(OBJECT));
184 for( i = 0; i < MAX_CHARS; i++ ) LanguageSentenceEnds[i] = FALSE;
185 debug0(DLS, D, "LanguageInit returning.");
186 } /* end LanguageInit */
187
188
189 /*****************************************************************************/
190 /* */
191 /* LanguageDefine(names, inside) */
192 /* */
193 /* Define a language whose names are given by ACAT of words names, and */
194 /* whose associated hyphenation patterns file name is hyph_file. */
195 /* */
196 /*****************************************************************************/
197
LanguageDefine(OBJECT names,OBJECT inside)198 void LanguageDefine(OBJECT names, OBJECT inside)
199 { OBJECT link, y, hyph_file; BOOLEAN junk; FULL_CHAR ch;
200 int len;
201 assert( names != nilobj && type(names) == ACAT, "LanguageDefine: names!");
202 assert( Down(names) != names, "LanguageDefine: names is empty!");
203 debug2(DLS, D, "LanguageDefine(%s, %s)",
204 EchoObject(names), EchoObject(inside));
205
206 /* double table size if overflow */
207 if( ++lang_count >= lang_tabsize )
208 {
209 ifdebug(DMA, D, DebugRegisterUsage(MEM_LANG_TAB, 0,
210 -lang_tabsize * sizeof(OBJECT)));
211 ifdebug(DMA, D, DebugRegisterUsage(MEM_LANG_TAB, 0,
212 -lang_tabsize * sizeof(OBJECT)));
213 lang_tabsize *= 2;
214 ifdebug(DMA, D, DebugRegisterUsage(MEM_LANG_TAB, 0,
215 lang_tabsize * sizeof(OBJECT)));
216 ifdebug(DMA, D, DebugRegisterUsage(MEM_LANG_TAB, 0,
217 lang_tabsize * sizeof(OBJECT)));
218 hyph_tab = (OBJECT *) realloc(hyph_tab, lang_tabsize * sizeof(OBJECT) );
219 canonical_tab = (OBJECT *) realloc(canonical_tab, lang_tabsize * sizeof(OBJECT) );
220 }
221
222 /* insert each language name into names_tab */
223 for( link = Down(names); link != names; link = NextDown(link) )
224 { Child(y, link);
225 assert( is_word(type(y)), "LanguageDefine: type(y) != WORD!" );
226 word_language(y) = lang_count;
227 ltab_insert(y, &names_tab);
228 }
229
230 /* initialize canonical language name entry */
231 Child(y, Down(names));
232 canonical_tab[lang_count] = y;
233
234 /* make inside an ACAT if it isn't already */
235 if( type(inside) != ACAT )
236 { New(y, ACAT);
237 FposCopy(fpos(y), fpos(inside));
238 Link(y, inside);
239 inside = y;
240 }
241
242 /* initialize hyphenation file entry (first child of inside) */
243 Child(hyph_file, Down(inside));
244 DeleteLink(Down(inside));
245 if( !is_word(type(hyph_file)) )
246 Error(43, 3, "hyphenation file name expected here",
247 FATAL, &fpos(inside));
248 if( StringEqual(string(hyph_file), STR_EMPTY) ||
249 StringEqual(string(hyph_file), STR_HYPHEN) )
250 { Dispose(hyph_file);
251 hyph_tab[lang_count] = nilobj;
252 }
253 else hyph_tab[lang_count] = hyph_file;
254
255 /* initialize sentence ends */
256 lang_ends[lang_count] = inside;
257 for( link = Down(inside); link != inside; link = NextDown(link) )
258 { Child(y, link);
259 if( type(y) == GAP_OBJ )
260 { link = PrevDown(link);
261 DisposeChild(NextDown(link));
262 continue;
263 }
264 if( !is_word(type(y)) )
265 { debug2(DLS, D, "word patterns failing on %s %s", Image(type(y)),
266 EchoObject(y));
267 Error(43, 4, "expected word ending pattern here", FATAL, &fpos(y));
268 }
269 len = StringLength(string(y));
270 if( len == 0 )
271 Error(43, 5, "empty word ending pattern", FATAL, &fpos(y));
272 ch = string(y)[len - 1];
273 LanguageSentenceEnds[ch] = TRUE;
274 }
275
276 /* if initializing run, initialize the hyphenation table */
277 if( InitializeAll )
278 { if( hyph_tab[lang_count] != nilobj )
279 junk = ReadHyphTable(lang_count);
280 }
281
282 debug0(DLS, D, "LanguageDefine returning.");
283 } /* end LanguageDefine */
284
285
286 /*****************************************************************************/
287 /* */
288 /* BOOLEAN LanguageWordEndsSentence(OBJECT wd, BOOLEAN lc_prec) */
289 /* */
290 /* Returns TRUE if word ends a sentence in the current language. This is */
291 /* so if it ends with a string in the list associated with the current */
292 /* language. If lc_prec is TRUE, it is also necessary for the character */
293 /* preceding this suffix to be lower-case. */
294 /* */
295 /*****************************************************************************/
296
LanguageWordEndsSentence(OBJECT wd,BOOLEAN lc_prec)297 BOOLEAN LanguageWordEndsSentence(OBJECT wd, BOOLEAN lc_prec)
298 { OBJECT x, y, link; int pos;
299 assert( is_word(type(wd)), "LanguageWordEndsSentence: wd!" );
300 debug2(DLS, D, "LanguageWordEndsSentence(%d %s)",
301 word_language(wd), EchoObject(wd));
302 x = lang_ends[word_language(wd)];
303 for( link = Down(x); link != x; link = NextDown(link) )
304 { Child(y, link);
305 if( StringEndsWith(string(wd), string(y)) )
306 {
307 if( !lc_prec )
308 { debug0(DLS, D, "LanguageWordEndsSentence returning TRUE (!lc_prec)");
309 return TRUE;
310 }
311
312 /* now check whether the preceding character is lower case */
313 pos = StringLength(string(wd)) - StringLength(string(y)) - 1;
314 if( pos >= 0 &&
315 MapIsLowerCase(string(wd)[pos], FontMapping(word_font(wd), &fpos(wd))))
316 {
317 debug0(DLS, D, "LanguageWordEndsSentence returning TRUE (!lc_prec)");
318 return TRUE;
319 }
320 }
321 }
322 debug0(DLS, D, "LanguageWordEndsSentence returning FALSE");
323 return FALSE;
324 } /* end LanguageWordEndsSentence */
325
326
327 /*@::LanguageChange(), LanguageString(), LanguageHyph()@**********************/
328 /* */
329 /* LanguageChange(style, x) */
330 /* */
331 /* Change the current style to contain the language of language command x. */
332 /* */
333 /*****************************************************************************/
334
LanguageChange(STYLE * style,OBJECT x)335 void LanguageChange(STYLE *style, OBJECT x)
336 { OBJECT lname;
337 debug2(DLS, D, "LanguageChange(%s, %s)", EchoStyle(style), EchoObject(x));
338
339 /* if argument is not a word, fail and exit */
340 if( !is_word(type(x)) )
341 { Error(43, 6, "%s ignored (illegal left parameter)", WARN, &fpos(x),
342 KW_LANGUAGE);
343 debug0(DLS, D, "LanguageChange returning (language unchanged)");
344 return;
345 }
346
347 /* if argument is empty, return unchanged */
348 if( StringEqual(string(x), STR_EMPTY) )
349 { debug0(DLS, D, "LanguageChange returning (empty, language unchanged)");
350 return;
351 }
352
353 /* retrieve language record if present, else leave style unchanged */
354 lname = ltab_retrieve(string(x), names_tab);
355 if( lname == nilobj )
356 Error(43, 7, "%s ignored (unknown language %s)", WARN, &fpos(x),
357 KW_LANGUAGE, string(x));
358 else language(*style) = word_language(lname);
359
360 debug1(DLS, D, "LanguageChange returning (language = %s)", string(lname));
361 ifdebug(DLS, DD, ltab_debug(names_tab, stderr));
362 } /* LanguageChange */
363
364
365 /*****************************************************************************/
366 /* */
367 /* FULL_CHAR *LanguageString(lnum) */
368 /* */
369 /* Return the canonical name of language lnum. */
370 /* */
371 /*****************************************************************************/
372
LanguageString(LANGUAGE_NUM lnum)373 FULL_CHAR *LanguageString(LANGUAGE_NUM lnum)
374 { FULL_CHAR *res;
375 debug1(DLS, D, "LanguageString(%d)", lnum);
376 assert( lnum > 0 && lnum <= lang_count, "LanguageString: unknown number" );
377
378 res = string(canonical_tab[lnum]);
379
380 debug1(DLS, D, "LanguageString returning %s", res);
381 return res;
382 } /* end LanguageString */
383
384
385 /*****************************************************************************/
386 /* */
387 /* OBJECT LanguageHyph(lnum) */
388 /* */
389 /* Return the hyphenation file name object for language lnum. */
390 /* */
391 /*****************************************************************************/
392
LanguageHyph(LANGUAGE_NUM lnum)393 OBJECT LanguageHyph(LANGUAGE_NUM lnum)
394 { OBJECT res;
395 debug1(DLS, D, "LanguageHyph(%d)", lnum);
396 assert( lnum > 0 && lnum <= lang_count, "LanguageHyph: unknown number" );
397
398 res = hyph_tab[lnum];
399
400 debug1(DLS, D, "LanguageHyph returning %s", EchoObject(res));
401 return res;
402 } /* end LanguageHyph */
403