1 /**
2 * @file
3 * @brief Functions and data structures dealing with the syntax,
4 * morphology, and orthography of the English language.
5 **/
6
7 #include "AppHdr.h"
8
9 #include "english.h"
10
11 #include <cstddef>
12 #include <cwctype>
13 #include <string>
14
15 #include "stringutil.h"
16
17 const char * const standard_plural_qualifiers[] =
18 {
19 " of ", " labelled ", " from ", nullptr
20 };
21
is_vowel(const char32_t chr)22 bool is_vowel(const char32_t chr)
23 {
24 const char low = towlower(chr);
25 return low == 'a' || low == 'e' || low == 'i' || low == 'o' || low == 'u';
26 }
27
28 // Pluralises a monster or item name. This'll need to be updated for
29 // correctness whenever new monsters/items are added.
pluralise(const string & name,const char * const qualifiers[],const char * const no_qualifier[])30 string pluralise(const string &name, const char * const qualifiers[],
31 const char * const no_qualifier[])
32 {
33 string::size_type pos;
34
35 if (qualifiers)
36 {
37 for (int i = 0; qualifiers[i]; ++i)
38 if ((pos = name.find(qualifiers[i])) != string::npos
39 && !ends_with(name, no_qualifier))
40 {
41 return pluralise(name.substr(0, pos)) + name.substr(pos);
42 }
43 }
44
45 if (!name.empty() && name[name.length() - 1] == ')'
46 && (pos = name.rfind(" (")) != string::npos)
47 {
48 return pluralise(name.substr(0, pos)) + name.substr(pos);
49 }
50
51 if (!name.empty() && name[name.length() - 1] == ']'
52 && (pos = name.rfind(" [")) != string::npos)
53 {
54 return pluralise(name.substr(0, pos)) + name.substr(pos);
55 }
56
57 if (ends_with(name, "us"))
58 {
59 if (ends_with(name, "lotus") || ends_with(name, "status"))
60 return name + "es";
61 else
62 // Fungus, ufetubus, for instance.
63 return name.substr(0, name.length() - 2) + "i";
64 }
65 else if (ends_with(name, "larva") || ends_with(name, "antenna")
66 || ends_with(name, "hypha") || ends_with(name, "noma"))
67 {
68 return name + "e";
69 }
70 else if (ends_with(name, "ex"))
71 {
72 // Vortex; vortexes is legal, but the classic plural is cooler.
73 return name.substr(0, name.length() - 2) + "ices";
74 }
75 else if (ends_with(name, "mosquito") || ends_with(name, "ss"))
76 return name + "es";
77 else if (ends_with(name, "cyclops"))
78 return name.substr(0, name.length() - 1) + "es";
79 else if (name == "catoblepas")
80 return "catoblepae";
81 else if (ends_with(name, "s"))
82 return name;
83 else if (ends_with(name, "y"))
84 {
85 if (name == "y")
86 return "ys";
87 // day -> days, boy -> boys, etc
88 else if (is_vowel(name[name.length() - 2]))
89 return name + "s";
90 // jelly -> jellies
91 else
92 return name.substr(0, name.length() - 1) + "ies";
93 }
94 else if (ends_with(name, "fe"))
95 {
96 // knife -> knives
97 return name.substr(0, name.length() - 2) + "ves";
98 }
99 else if (ends_with(name, "staff"))
100 {
101 // staff -> staves
102 return name.substr(0, name.length() - 2) + "ves";
103 }
104 else if (ends_with(name, "f") && !ends_with(name, "ff"))
105 {
106 // elf -> elves, but not hippogriff -> hippogrives.
107 // TODO: if someone defines a "goblin chief", this should be revisited.
108 return name.substr(0, name.length() - 1) + "ves";
109 }
110 else if (ends_with(name, "mage"))
111 {
112 // mage -> magi
113 return name.substr(0, name.length() - 1) + "i";
114 }
115 else if (name == "gold" || ends_with(name, "fish")
116 || ends_with(name, "folk") || ends_with(name, "spawn")
117 || ends_with(name, "tengu") || ends_with(name, "sheep")
118 || ends_with(name, "swine") || ends_with(name, "efreet")
119 || ends_with(name, "jiangshi") || ends_with(name, "raiju")
120 || ends_with(name, "meliai"))
121 {
122 return name;
123 }
124 else if (ends_with(name, "ch") || ends_with(name, "sh")
125 || ends_with(name, "x"))
126 {
127 // To handle cockroaches, sphinxes, and bushes.
128 return name + "es";
129 }
130 else if (ends_with(name, "simulacrum") || ends_with(name, "eidolon"))
131 {
132 // simulacrum -> simulacra (correct Latin pluralisation)
133 // also eidolon -> eidola (correct Greek pluralisation)
134 return name.substr(0, name.length() - 2) + "a";
135 }
136 else if (ends_with(name, "djinni"))
137 {
138 // djinni -> djinn.
139 return name.substr(0, name.length() - 1);
140 }
141 else if (name == "foot")
142 return "feet";
143 else if (name == "ophan" || name == "cherub" || name == "seraph")
144 {
145 // Unlike "angel" which is fully assimilated, and "cherub" and "seraph"
146 // which may be pluralised both ways, "ophan" always uses Hebrew
147 // pluralisation.
148 return name + "im";
149 }
150 else if (ends_with(name, "arachi"))
151 {
152 // Barachi -> Barachim. Kind of Hebrew? Kind of goofy.
153 // (not sure if this is ever used...)
154 return name + "m";
155 }
156 else if (name == "ushabti")
157 {
158 // ushabti -> ushabtiu (correct ancient Egyptian pluralisation)
159 return name + "u";
160 }
161 else if (name == "Tzitzimitl")
162 {
163 // Tzitzimitl -> Tzitzimimeh (correct Nahuatl pluralisation)
164 return name.substr(0, name.length() - 2) + "meh";
165 }
166
167 return name + "s";
168 }
169
170 // For monster names ending with these suffixes, we pluralise directly without
171 // attempting to use the "of" rule. For instance:
172 //
173 // moth of wrath => moths of wrath but
174 // moth of wrath zombie => moth of wrath zombies.
175 static const char * const _monster_suffixes[] =
176 {
177 "zombie", "skeleton", "simulacrum", nullptr
178 };
179
pluralise_monster(const string & name)180 string pluralise_monster(const string &name)
181 {
182 return pluralise(name, standard_plural_qualifiers, _monster_suffixes);
183 }
184
apostrophise(const string & name)185 string apostrophise(const string &name)
186 {
187 if (name.empty())
188 return name;
189
190 if (name == "you" || name == "You")
191 return name + "r";
192
193 if (name == "it" || name == "It")
194 return name + "s";
195
196 if (name == "itself")
197 return "its own";
198
199 if (name == "himself")
200 return "his own";
201
202 if (name == "herself")
203 return "her own";
204
205 if (name == "themselves" || name == "themself")
206 return "their own";
207
208 if (name == "yourself")
209 return "your own";
210
211 // We're going with the assumption that we're finding the possessive of
212 // singular nouns ending in 's' more often than that of plural nouns.
213 // No matter what, we're going to get some cases wrong.
214
215 // const char lastc = name[name.length() - 1];
216 return name + /*(lastc == 's' ? "'" :*/ "'s" /*)*/;
217 }
218
219 /**
220 * Get the singular form of a given plural-agreeing verb.
221 *
222 * An absurd simplification of the english language, but for our purposes...
223 *
224 * @param verb A plural-agreeing or infinitive verb
225 * ("smoulder", "are", "be", etc.) or phrasal verb
226 * ("shout at", "make way for", etc.)
227 * @param plural Should we conjugate the verb for the plural rather than
228 * the singular?
229 * @return The singular ("smoulders", "is", "shouts at") or plural-
230 * agreeing ("smoulder", "are", "shout at", etc.) finite form
231 * of the verb, depending on \c plural .
232 */
conjugate_verb(const string & verb,bool plural)233 string conjugate_verb(const string &verb, bool plural)
234 {
235 if (!verb.empty() && verb[0] == '!')
236 return verb.substr(1);
237
238 // Conjugate the first word of a phrase (e.g. "release spores at")
239 const size_t space = verb.find(" ");
240 if (space != string::npos)
241 {
242 return conjugate_verb(verb.substr(0, space), plural)
243 + verb.substr(space);
244 }
245
246 // Only one verb in English differs between infinitive and plural.
247 if (plural)
248 return verb == "be" ? "are" : verb;
249
250 if (verb == "are" || verb == "be")
251 return "is";
252
253 if (verb == "have")
254 return "has";
255
256 if (ends_with(verb, "f") || ends_with(verb, "fe")
257 || ends_with(verb, "y"))
258 {
259 return verb + "s";
260 }
261
262 return pluralise(verb);
263 }
264
265 static const char * const _pronoun_declension[][NUM_PRONOUN_CASES] =
266 {
267 // subj poss refl obj
268 { "it", "its", "itself", "it" }, // neuter
269 { "he", "his", "himself", "him" }, // masculine
270 { "she", "her", "herself", "her" }, // feminine
271 { "you", "your", "yourself", "you" }, // 2nd person
272 { "they", "their", "themself", "them" }, // neutral
273 };
274
decline_pronoun(gender_type gender,pronoun_type variant)275 const char *decline_pronoun(gender_type gender, pronoun_type variant)
276 {
277 COMPILE_CHECK(ARRAYSZ(_pronoun_declension) == NUM_GENDERS);
278 ASSERT_RANGE(gender, 0, NUM_GENDERS);
279 ASSERT_RANGE(variant, 0, NUM_PRONOUN_CASES);
280 return _pronoun_declension[gender][variant];
281 }
282
283 // Takes a lowercase verb stem like "walk", "glid" or "wriggl"
284 // (as could be used for "walking", "gliding", or "wriggler")
285 // and turn it into the present tense form.
286 // TODO: make this more general. (Does english have rules?)
walk_verb_to_present(string verb)287 string walk_verb_to_present(string verb)
288 {
289 if (verb == "wriggl")
290 return "wriggle";
291 if (verb == "glid")
292 {
293 return "walk"; // it's a lie! tengu only get this
294 // verb when they can't fly!
295 }
296 return verb;
297 }
298
_tens_in_words(unsigned num)299 static string _tens_in_words(unsigned num)
300 {
301 static const char *numbers[] =
302 {
303 "", "one", "two", "three", "four", "five", "six", "seven",
304 "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen",
305 "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"
306 };
307 static const char *tens[] =
308 {
309 "", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy",
310 "eighty", "ninety"
311 };
312
313 if (num < 20)
314 return numbers[num];
315
316 int ten = num / 10, digit = num % 10;
317 return string(tens[ten]) + (digit ? string("-") + numbers[digit] : "");
318 }
319
_join_strings(const string & a,const string & b)320 static string _join_strings(const string &a, const string &b)
321 {
322 if (!a.empty() && !b.empty())
323 return a + " " + b;
324
325 return a.empty() ? b : a;
326 }
327
_hundreds_in_words(unsigned num)328 static string _hundreds_in_words(unsigned num)
329 {
330 unsigned dreds = num / 100, tens = num % 100;
331 string sdreds = dreds? _tens_in_words(dreds) + " hundred" : "";
332 string stens = tens? _tens_in_words(tens) : "";
333 return _join_strings(sdreds, stens);
334 }
335
_number_in_words(unsigned num,unsigned period)336 static string _number_in_words(unsigned num, unsigned period)
337 {
338 static const char * const periods[] = {
339 "", " thousand", " million", " billion", " trillion"
340 };
341
342 ASSERT(period < ARRAYSZ(periods));
343
344 // Handle "eighteen million trillion", should unsigned go that high.
345 if (period == ARRAYSZ(periods) - 1)
346 return _number_in_words(num, 0) + periods[period];
347
348 unsigned thousands = num % 1000, rest = num / 1000;
349 if (!rest && !thousands)
350 return "zero";
351
352 return _join_strings((rest? _number_in_words(rest, period + 1) : ""),
353 (thousands? _hundreds_in_words(thousands)
354 + periods[period]
355 : ""));
356 }
357
number_in_words(unsigned num)358 string number_in_words(unsigned num)
359 {
360 return _number_in_words(num, 0);
361 }
362
_number_to_string(unsigned number,bool in_words)363 static string _number_to_string(unsigned number, bool in_words)
364 {
365 return in_words ? number_in_words(number) : to_string(number);
366 }
367
368 // Naively prefix A/an to a noun.
article_a(const string & name,bool lowercase)369 string article_a(const string &name, bool lowercase)
370 {
371 if (!name.length())
372 return name;
373
374 const char *a = lowercase? "a " : "A ";
375 const char *an = lowercase? "an " : "An ";
376 switch (name[0])
377 {
378 case 'a': case 'e': case 'i': case 'o': case 'u':
379 case 'A': case 'E': case 'I': case 'O': case 'U':
380 // XXX: Hack for hydras.
381 if (starts_with(name, "one-"))
382 return a + name;
383 return an + name;
384 case '1':
385 // XXX: Hack^2 for hydras.
386 if (starts_with(name, "11-") || starts_with(name, "18-"))
387 return an + name;
388 return a + name;
389 case '8':
390 // Eighty, eight hundred, eight thousand, ...
391 return an + name;
392 default:
393 return a + name;
394 }
395 }
396
apply_description(description_level_type desc,const string & name,int quantity,bool in_words)397 string apply_description(description_level_type desc, const string &name,
398 int quantity, bool in_words)
399 {
400 switch (desc)
401 {
402 case DESC_THE:
403 return "the " + name;
404 case DESC_A:
405 return quantity > 1 ? _number_to_string(quantity, in_words) + name
406 : article_a(name, true);
407 case DESC_YOUR:
408 return "your " + name;
409 case DESC_PLAIN:
410 default:
411 return name;
412 }
413 }
414
thing_do_grammar(description_level_type dtype,string desc,bool ignore_case)415 string thing_do_grammar(description_level_type dtype, string desc,
416 bool ignore_case)
417 {
418 // Avoid double articles.
419 if (starts_with(desc, "the ") || starts_with(desc, "The ")
420 || starts_with(desc, "a ") || starts_with(desc, "A ")
421 || starts_with(desc, "an ") || starts_with(desc, "An ")
422 || starts_with(desc, "some ") || starts_with(desc, "Some "))
423 {
424 if (dtype == DESC_THE || dtype == DESC_A)
425 dtype = DESC_PLAIN;
426 }
427
428 if (dtype == DESC_PLAIN || !ignore_case && isupper(desc[0]))
429 return desc;
430
431 switch (dtype)
432 {
433 case DESC_THE:
434 return "the " + desc;
435 case DESC_A:
436 return article_a(desc, true);
437 case DESC_NONE:
438 return "";
439 default:
440 return desc;
441 }
442 }
443
get_desc_quantity(const int quant,const int total,const string & whose)444 string get_desc_quantity(const int quant, const int total, const string &whose)
445 {
446 if (total == quant)
447 return uppercase_first(whose);
448 else if (quant == 1)
449 return "One of " + whose;
450 else if (quant == 2)
451 return "Two of " + whose;
452 else if (quant >= total * 3 / 4)
453 return "Most of " + whose;
454 else
455 return "Some of " + whose;
456 }
457