1 /**
2  * @file
3  * @brief Functions and data structures dealing with the syntax,
4  *        morphology, and orthography of the English language.
5 **/
6 
7 #include "AppHdr.h"
8 
9 #include "english.h"
10 
11 #include <cstddef>
12 #include <cwctype>
13 #include <string>
14 
15 #include "stringutil.h"
16 
17 const char * const standard_plural_qualifiers[] =
18 {
19     " of ", " labelled ", " from ", nullptr
20 };
21 
is_vowel(const char32_t chr)22 bool is_vowel(const char32_t chr)
23 {
24     const char low = towlower(chr);
25     return low == 'a' || low == 'e' || low == 'i' || low == 'o' || low == 'u';
26 }
27 
28 // Pluralises a monster or item name. This'll need to be updated for
29 // correctness whenever new monsters/items are added.
pluralise(const string & name,const char * const qualifiers[],const char * const no_qualifier[])30 string pluralise(const string &name, const char * const qualifiers[],
31                  const char * const no_qualifier[])
32 {
33     string::size_type pos;
34 
35     if (qualifiers)
36     {
37         for (int i = 0; qualifiers[i]; ++i)
38             if ((pos = name.find(qualifiers[i])) != string::npos
39                 && !ends_with(name, no_qualifier))
40             {
41                 return pluralise(name.substr(0, pos)) + name.substr(pos);
42             }
43     }
44 
45     if (!name.empty() && name[name.length() - 1] == ')'
46         && (pos = name.rfind(" (")) != string::npos)
47     {
48         return pluralise(name.substr(0, pos)) + name.substr(pos);
49     }
50 
51     if (!name.empty() && name[name.length() - 1] == ']'
52         && (pos = name.rfind(" [")) != string::npos)
53     {
54         return pluralise(name.substr(0, pos)) + name.substr(pos);
55     }
56 
57     if (ends_with(name, "us"))
58     {
59         if (ends_with(name, "lotus") || ends_with(name, "status"))
60             return name + "es";
61         else
62             // Fungus, ufetubus, for instance.
63             return name.substr(0, name.length() - 2) + "i";
64     }
65     else if (ends_with(name, "larva") || ends_with(name, "antenna")
66              || ends_with(name, "hypha") || ends_with(name, "noma"))
67     {
68         return name + "e";
69     }
70     else if (ends_with(name, "ex"))
71     {
72         // Vortex; vortexes is legal, but the classic plural is cooler.
73         return name.substr(0, name.length() - 2) + "ices";
74     }
75     else if (ends_with(name, "mosquito") || ends_with(name, "ss"))
76         return name + "es";
77     else if (ends_with(name, "cyclops"))
78         return name.substr(0, name.length() - 1) + "es";
79     else if (name == "catoblepas")
80         return "catoblepae";
81     else if (ends_with(name, "s"))
82         return name;
83     else if (ends_with(name, "y"))
84     {
85         if (name == "y")
86             return "ys";
87         // day -> days, boy -> boys, etc
88         else if (is_vowel(name[name.length() - 2]))
89             return name + "s";
90         // jelly -> jellies
91         else
92             return name.substr(0, name.length() - 1) + "ies";
93     }
94     else if (ends_with(name, "fe"))
95     {
96         // knife -> knives
97         return name.substr(0, name.length() - 2) + "ves";
98     }
99     else if (ends_with(name, "staff"))
100     {
101         // staff -> staves
102         return name.substr(0, name.length() - 2) + "ves";
103     }
104     else if (ends_with(name, "f") && !ends_with(name, "ff"))
105     {
106         // elf -> elves, but not hippogriff -> hippogrives.
107         // TODO: if someone defines a "goblin chief", this should be revisited.
108         return name.substr(0, name.length() - 1) + "ves";
109     }
110     else if (ends_with(name, "mage"))
111     {
112         // mage -> magi
113         return name.substr(0, name.length() - 1) + "i";
114     }
115     else if (name == "gold"                 || ends_with(name, "fish")
116              || ends_with(name, "folk")     || ends_with(name, "spawn")
117              || ends_with(name, "tengu")    || ends_with(name, "sheep")
118              || ends_with(name, "swine")    || ends_with(name, "efreet")
119              || ends_with(name, "jiangshi") || ends_with(name, "raiju")
120              || ends_with(name, "meliai"))
121     {
122         return name;
123     }
124     else if (ends_with(name, "ch") || ends_with(name, "sh")
125              || ends_with(name, "x"))
126     {
127         // To handle cockroaches, sphinxes, and bushes.
128         return name + "es";
129     }
130     else if (ends_with(name, "simulacrum") || ends_with(name, "eidolon"))
131     {
132         // simulacrum -> simulacra (correct Latin pluralisation)
133         // also eidolon -> eidola (correct Greek pluralisation)
134         return name.substr(0, name.length() - 2) + "a";
135     }
136     else if (ends_with(name, "djinni"))
137     {
138         // djinni -> djinn.
139         return name.substr(0, name.length() - 1);
140     }
141     else if (name == "foot")
142         return "feet";
143     else if (name == "ophan" || name == "cherub" || name == "seraph")
144     {
145         // Unlike "angel" which is fully assimilated, and "cherub" and "seraph"
146         // which may be pluralised both ways, "ophan" always uses Hebrew
147         // pluralisation.
148         return name + "im";
149     }
150     else if (ends_with(name, "arachi"))
151     {
152         // Barachi -> Barachim. Kind of Hebrew? Kind of goofy.
153         // (not sure if this is ever used...)
154         return name + "m";
155     }
156     else if (name == "ushabti")
157     {
158         // ushabti -> ushabtiu (correct ancient Egyptian pluralisation)
159         return name + "u";
160     }
161     else if (name == "Tzitzimitl")
162     {
163         // Tzitzimitl -> Tzitzimimeh (correct Nahuatl pluralisation)
164         return name.substr(0, name.length() - 2) + "meh";
165     }
166 
167     return name + "s";
168 }
169 
170 // For monster names ending with these suffixes, we pluralise directly without
171 // attempting to use the "of" rule. For instance:
172 //
173 //      moth of wrath           => moths of wrath but
174 //      moth of wrath zombie    => moth of wrath zombies.
175 static const char * const _monster_suffixes[] =
176 {
177     "zombie", "skeleton", "simulacrum", nullptr
178 };
179 
pluralise_monster(const string & name)180 string pluralise_monster(const string &name)
181 {
182     return pluralise(name, standard_plural_qualifiers, _monster_suffixes);
183 }
184 
apostrophise(const string & name)185 string apostrophise(const string &name)
186 {
187     if (name.empty())
188         return name;
189 
190     if (name == "you" || name == "You")
191         return name + "r";
192 
193     if (name == "it" || name == "It")
194         return name + "s";
195 
196     if (name == "itself")
197         return "its own";
198 
199     if (name == "himself")
200         return "his own";
201 
202     if (name == "herself")
203         return "her own";
204 
205     if (name == "themselves" || name == "themself")
206         return "their own";
207 
208     if (name == "yourself")
209         return "your own";
210 
211     // We're going with the assumption that we're finding the possessive of
212     // singular nouns ending in 's' more often than that of plural nouns.
213     // No matter what, we're going to get some cases wrong.
214 
215     // const char lastc = name[name.length() - 1];
216     return name + /*(lastc == 's' ? "'" :*/ "'s" /*)*/;
217 }
218 
219 /**
220  * Get the singular form of a given plural-agreeing verb.
221  *
222  * An absurd simplification of the english language, but for our purposes...
223  *
224  * @param verb   A plural-agreeing or infinitive verb
225  *               ("smoulder", "are", "be", etc.) or phrasal verb
226  *               ("shout at", "make way for", etc.)
227  * @param plural Should we conjugate the verb for the plural rather than
228  *               the singular?
229  * @return       The singular ("smoulders", "is", "shouts at") or plural-
230  *               agreeing ("smoulder", "are", "shout at", etc.) finite form
231  *               of the verb, depending on \c plural .
232  */
conjugate_verb(const string & verb,bool plural)233 string conjugate_verb(const string &verb, bool plural)
234 {
235     if (!verb.empty() && verb[0] == '!')
236         return verb.substr(1);
237 
238     // Conjugate the first word of a phrase (e.g. "release spores at")
239     const size_t space = verb.find(" ");
240     if (space != string::npos)
241     {
242         return conjugate_verb(verb.substr(0, space), plural)
243                + verb.substr(space);
244     }
245 
246     // Only one verb in English differs between infinitive and plural.
247     if (plural)
248         return verb == "be" ? "are" : verb;
249 
250     if (verb == "are" || verb == "be")
251         return "is";
252 
253     if (verb == "have")
254         return "has";
255 
256     if (ends_with(verb, "f") || ends_with(verb, "fe")
257         || ends_with(verb, "y"))
258     {
259         return verb + "s";
260     }
261 
262     return pluralise(verb);
263 }
264 
265 static const char * const _pronoun_declension[][NUM_PRONOUN_CASES] =
266 {
267     // subj  poss    refl        obj
268     { "it",  "its",  "itself",   "it"  }, // neuter
269     { "he",  "his",  "himself",  "him" }, // masculine
270     { "she", "her",  "herself",  "her" }, // feminine
271     { "you", "your", "yourself", "you" }, // 2nd person
272     { "they", "their", "themself", "them" }, // neutral
273 };
274 
decline_pronoun(gender_type gender,pronoun_type variant)275 const char *decline_pronoun(gender_type gender, pronoun_type variant)
276 {
277     COMPILE_CHECK(ARRAYSZ(_pronoun_declension) == NUM_GENDERS);
278     ASSERT_RANGE(gender, 0, NUM_GENDERS);
279     ASSERT_RANGE(variant, 0, NUM_PRONOUN_CASES);
280     return _pronoun_declension[gender][variant];
281 }
282 
283 // Takes a lowercase verb stem like "walk", "glid" or "wriggl"
284 // (as could be used for "walking", "gliding", or "wriggler")
285 // and turn it into the present tense form.
286 // TODO: make this more general. (Does english have rules?)
walk_verb_to_present(string verb)287 string walk_verb_to_present(string verb)
288 {
289     if (verb == "wriggl")
290         return "wriggle";
291     if (verb == "glid")
292     {
293         return "walk"; // it's a lie! tengu only get this
294                        // verb when they can't fly!
295     }
296     return verb;
297 }
298 
_tens_in_words(unsigned num)299 static string _tens_in_words(unsigned num)
300 {
301     static const char *numbers[] =
302     {
303         "", "one", "two", "three", "four", "five", "six", "seven",
304         "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen",
305         "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"
306     };
307     static const char *tens[] =
308     {
309         "", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy",
310         "eighty", "ninety"
311     };
312 
313     if (num < 20)
314         return numbers[num];
315 
316     int ten = num / 10, digit = num % 10;
317     return string(tens[ten]) + (digit ? string("-") + numbers[digit] : "");
318 }
319 
_join_strings(const string & a,const string & b)320 static string _join_strings(const string &a, const string &b)
321 {
322     if (!a.empty() && !b.empty())
323         return a + " " + b;
324 
325     return a.empty() ? b : a;
326 }
327 
_hundreds_in_words(unsigned num)328 static string _hundreds_in_words(unsigned num)
329 {
330     unsigned dreds = num / 100, tens = num % 100;
331     string sdreds = dreds? _tens_in_words(dreds) + " hundred" : "";
332     string stens  = tens? _tens_in_words(tens) : "";
333     return _join_strings(sdreds, stens);
334 }
335 
_number_in_words(unsigned num,unsigned period)336 static string _number_in_words(unsigned num, unsigned period)
337 {
338     static const char * const periods[] = {
339         "", " thousand", " million", " billion", " trillion"
340     };
341 
342     ASSERT(period < ARRAYSZ(periods));
343 
344     // Handle "eighteen million trillion", should unsigned go that high.
345     if (period == ARRAYSZ(periods) - 1)
346         return _number_in_words(num, 0) + periods[period];
347 
348     unsigned thousands = num % 1000, rest = num / 1000;
349     if (!rest && !thousands)
350         return "zero";
351 
352     return _join_strings((rest? _number_in_words(rest, period + 1) : ""),
353                         (thousands? _hundreds_in_words(thousands)
354                                     + periods[period]
355                                   : ""));
356 }
357 
number_in_words(unsigned num)358 string number_in_words(unsigned num)
359 {
360     return _number_in_words(num, 0);
361 }
362 
_number_to_string(unsigned number,bool in_words)363 static string _number_to_string(unsigned number, bool in_words)
364 {
365     return in_words ? number_in_words(number) : to_string(number);
366 }
367 
368 // Naively prefix A/an to a noun.
article_a(const string & name,bool lowercase)369 string article_a(const string &name, bool lowercase)
370 {
371     if (!name.length())
372         return name;
373 
374     const char *a  = lowercase? "a "  : "A ";
375     const char *an = lowercase? "an " : "An ";
376     switch (name[0])
377     {
378         case 'a': case 'e': case 'i': case 'o': case 'u':
379         case 'A': case 'E': case 'I': case 'O': case 'U':
380             // XXX: Hack for hydras.
381             if (starts_with(name, "one-"))
382                 return a + name;
383             return an + name;
384         case '1':
385             // XXX: Hack^2 for hydras.
386             if (starts_with(name, "11-") || starts_with(name, "18-"))
387                 return an + name;
388             return a + name;
389         case '8':
390             // Eighty, eight hundred, eight thousand, ...
391             return an + name;
392         default:
393             return a + name;
394     }
395 }
396 
apply_description(description_level_type desc,const string & name,int quantity,bool in_words)397 string apply_description(description_level_type desc, const string &name,
398                          int quantity, bool in_words)
399 {
400     switch (desc)
401     {
402     case DESC_THE:
403         return "the " + name;
404     case DESC_A:
405         return quantity > 1 ? _number_to_string(quantity, in_words) + name
406                             : article_a(name, true);
407     case DESC_YOUR:
408         return "your " + name;
409     case DESC_PLAIN:
410     default:
411         return name;
412     }
413 }
414 
thing_do_grammar(description_level_type dtype,string desc,bool ignore_case)415 string thing_do_grammar(description_level_type dtype, string desc,
416                         bool ignore_case)
417 {
418     // Avoid double articles.
419     if (starts_with(desc, "the ") || starts_with(desc, "The ")
420         || starts_with(desc, "a ") || starts_with(desc, "A ")
421         || starts_with(desc, "an ") || starts_with(desc, "An ")
422         || starts_with(desc, "some ") || starts_with(desc, "Some "))
423     {
424         if (dtype == DESC_THE || dtype == DESC_A)
425             dtype = DESC_PLAIN;
426     }
427 
428     if (dtype == DESC_PLAIN || !ignore_case && isupper(desc[0]))
429         return desc;
430 
431     switch (dtype)
432     {
433     case DESC_THE:
434         return "the " + desc;
435     case DESC_A:
436         return article_a(desc, true);
437     case DESC_NONE:
438         return "";
439     default:
440         return desc;
441     }
442 }
443 
get_desc_quantity(const int quant,const int total,const string & whose)444 string get_desc_quantity(const int quant, const int total, const string &whose)
445 {
446     if (total == quant)
447         return uppercase_first(whose);
448     else if (quant == 1)
449         return "One of " + whose;
450     else if (quant == 2)
451         return "Two of " + whose;
452     else if (quant >= total * 3 / 4)
453         return "Most of " + whose;
454     else
455         return "Some of " + whose;
456 }
457