1 /*
2 * PROPRIETARY INFORMATION. This software is proprietary to POWDER
3 * Development, and is not to be reproduced, transmitted, or disclosed
4 * in any way without written permission.
5 *
6 * Produced by: Jeff Lait
7 *
8 * POWDER Development
9 *
10 * NAME: grammar.cpp ( POWDER Library, C++ )
11 *
12 * COMMENTS:
13 * Implementation of grammar functions.
14 * These handle all the bizarre exceptions which English can
15 * throw at us. Well, theoritically they are all handled, but
16 * as exceptions are found, this is where to add them.
17 */
18
19 #include "grammar.h"
20
21 #include <ctype.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include "grammar.h"
26 #include "assert.h"
27
28 #include "creature.h"
29 #include "item.h"
30
31 static void
gram_extractofclause(char * tmp,char * & prefix,char * & noun,char * & suffix)32 gram_extractofclause(char *tmp, char *&prefix, char *&noun, char *&suffix)
33 {
34 char *ofclause;
35
36 // Rules:
37 // Last word is the noun.
38 // (3) is not the noun.
39 // The noun occurs before the first of clause.
40
41 // Of clause is either " of ", or, if that doesn't exist, " to ",
42 // or, if that is also missing, " named ".
43 // Technically we should likely extract any of these that occurs first.
44
45 ofclause = strstr(tmp, " of ");
46 if (!ofclause)
47 ofclause = strstr(tmp, " to ");
48 if (!ofclause)
49 ofclause = strstr(tmp, " named ");
50 if (ofclause)
51 {
52 prefix = tmp;
53 // Word preceeding of clause is it...
54 UT_ASSERT(ofclause != tmp);
55 if (ofclause == tmp)
56 {
57 // Original string is " of lbjaslkd", this is bad, real bad.
58 prefix = 0;
59 suffix = 0;
60 noun = tmp;
61 return;
62 }
63 *ofclause = '\0';
64 suffix = ofclause+1;
65 noun = ofclause-1;
66
67 while (!isspace(*noun) && noun > tmp)
68 noun--;
69 if (isspace(*noun))
70 {
71 *noun = '\0';
72 noun++;
73 }
74 else
75 prefix = 0;
76 }
77 else
78 {
79 prefix = tmp;
80
81 // Work back from the end of the temp array for the first
82 // alpha char. We want the word that occurs after there.
83 // To facillitate this, we mark all spaces as '\0'.
84 // (that way box43 will be a noun)
85 // Computer beeping at me. Almost out of power.
86 // Must type faster!
87 noun = tmp + strlen(tmp) - 1;
88 suffix = 0;
89 while (noun > tmp && !isalpha(*noun))
90 {
91 if (isspace(*noun))
92 suffix = noun;
93 noun--;
94 }
95 // Keep back tracking this dude until we hit the start or space...
96 while (noun > tmp && !isspace(*noun))
97 noun--;
98
99 if (suffix)
100 {
101 *suffix = '\0';
102 suffix++;
103 }
104
105 if (isspace(*noun))
106 {
107 *noun = '\0';
108 noun++;
109 }
110 else
111 prefix = 0;
112 }
113 }
114
115 bool
gram_ispronoun(const char * str)116 gram_ispronoun(const char *str)
117 {
118 if (!strcmp(str, "I"))
119 return true;
120 if (!strcmp(str, "you"))
121 return true;
122 if (!strcmp(str, "he"))
123 return true;
124 if (!strcmp(str, "she"))
125 return true;
126 if (!strcmp(str, "it"))
127 return true;
128 if (!strcmp(str, "they"))
129 return true;
130 if (!strcmp(str, "we"))
131 return true;
132
133 return false;
134 }
135
136 bool
gram_isvowel(char c)137 gram_isvowel(char c)
138 {
139 switch (c)
140 {
141 case 'a':
142 case 'e':
143 case 'o':
144 case 'i':
145 case 'u':
146 return true;
147 }
148 // Note: y is not vowel.
149 // Nor is w, Welsh being damned.
150 return false;
151 }
152
153 // Does this char mark the end of a sentence?
154 bool
gram_isendsentence(char c)155 gram_isendsentence(char c)
156 {
157 switch (c)
158 {
159 case '.':
160 case '!':
161 case '?':
162 return true;
163 }
164 return false;
165 }
166
167 BUF
gram_makeplural(const char * phrase)168 gram_makeplural(const char *phrase)
169 {
170 // Since we don't know ownership of phrase, must do this the
171 // hardway
172 BUF buf;
173
174 buf.strcpy(phrase);
175 return gram_makeplural(buf);
176 }
177
178 BUF
gram_makeplural(BUF phrase)179 gram_makeplural(BUF phrase)
180 {
181 // Check for trivialities...
182 if (gram_isnameplural(phrase))
183 return phrase;
184
185 // Extract the of clause...
186 char *tmp;
187 char *noun;
188 char *prefix, *suffix;
189
190 tmp = strdup(phrase.buffer());
191
192 gram_extractofclause(tmp, prefix, noun, suffix);
193
194 // Now, try to end it...
195 char ending[10];
196 BUF buf;
197 size_t nounlen = strlen(noun);
198
199 // Build the ending...
200 const char *e;
201 int epos = 0;
202 for (e = noun; *e; e++);
203 e--;
204 for (epos = 0; epos < 10; epos++)
205 {
206 if (e < noun)
207 ending[epos] = '\0';
208 else
209 ending[epos] = *e;
210 e--;
211 }
212
213 if (!strncmp(ending, "esuo", 4) &&
214 strncmp(ending, "esuoh", 5))
215 {
216 // *ouse, but not house. Thus,
217 // louse->lice
218 noun[nounlen-4] = '\0';
219 buf.sprintf("%s%s%sice%s%s",
220 (prefix ? prefix : ""), (prefix ? " " : ""),
221 noun,
222 (suffix ? " " : ""), (suffix ? suffix : ""));
223 }
224 else if (!strncmp(ending, "dlihc", 5))
225 {
226 // child -> children
227 buf.sprintf("%s%s%sren%s%s",
228 (prefix ? prefix : ""), (prefix ? " " : ""),
229 noun,
230 (suffix ? " " : ""), (suffix ? suffix : ""));
231 }
232 else if (!strncmp(ending, "nam", 3))
233 {
234 // man -> men
235 noun[nounlen-3] = '\0';
236 buf.sprintf("%s%s%smen%s%s",
237 (prefix ? prefix : ""), (prefix ? " " : ""),
238 noun,
239 (suffix ? " " : ""), (suffix ? suffix : ""));
240
241 }
242 else if (!strncmp(ending, "efi", 3))
243 {
244 // knife -> knives
245 noun[nounlen-2] = '\0';
246 buf.sprintf("%s%s%sves%s%s",
247 (prefix ? prefix : ""), (prefix ? " " : ""),
248 noun,
249 (suffix ? " " : ""), (suffix ? suffix : ""));
250 }
251 else if (!strncmp(ending, "fei", 3))
252 {
253 // thief -> thieves
254 noun[nounlen-1] = '\0';
255 buf.sprintf("%s%s%sves%s%s",
256 (prefix ? prefix : ""), (prefix ? " " : ""),
257 noun,
258 (suffix ? " " : ""), (suffix ? suffix : ""));
259 }
260 else if (!strcmp(noun, "ox"))
261 {
262 // ox -> oxen
263 buf.sprintf("%s%s%sen%s%s",
264 (prefix ? prefix : ""), (prefix ? " " : ""),
265 noun,
266 (suffix ? " " : ""), (suffix ? suffix : ""));
267 }
268 else if (ending[0] == 's' ||
269 !strncmp(ending, "hs", 2) || // bush -> bushes
270 !strncmp(ending, "hc", 2) || // lich -> liches
271 (*ending == 'o') ||
272 (*ending == 'x') ||
273 (*ending == 'z'))
274 {
275 // We pluralize by adding es.
276 buf.sprintf("%s%s%ses%s%s",
277 (prefix ? prefix : ""), (prefix ? " " : ""),
278 noun,
279 (suffix ? " " : ""), (suffix ? suffix : ""));
280 }
281 else if (*ending == 'y')
282 {
283 if (gram_isvowel(ending[1]))
284 {
285 // tray -> trays
286 buf.sprintf("%s%s%ss%s%s",
287 (prefix ? prefix : ""), (prefix ? " " : ""),
288 noun,
289 (suffix ? " " : ""), (suffix ? suffix : ""));
290 }
291 else
292 {
293 // fly -> flies
294 noun[nounlen-1] = '\0';
295 buf.sprintf("%s%s%sies%s%s",
296 (prefix ? prefix : ""), (prefix ? " " : ""),
297 noun,
298 (suffix ? " " : ""), (suffix ? suffix : ""));
299 }
300 }
301 else
302 {
303 // Just add s.
304 buf.sprintf("%s%s%ss%s%s",
305 (prefix ? prefix : ""), (prefix ? " " : ""),
306 noun,
307 (suffix ? " " : ""), (suffix ? suffix : ""));
308 }
309
310 return buf;
311 }
312
313 // We have to parse name-phrase to find the noun, and then
314 // determine if that noun is plural.
315 bool
gram_isnameplural(const char * name)316 gram_isnameplural(const char *name)
317 {
318 char *noun, *tmp, *prefix, *suffix;
319 bool isplural = false;
320
321 tmp = strdup(name);
322
323 gram_extractofclause(tmp, prefix, noun, suffix);
324
325 isplural = gram_isplural(noun);
326
327 free(tmp);
328
329 return isplural;
330 }
331
332 bool
gram_isplural(const char * noun)333 gram_isplural(const char *noun)
334 {
335 // As most noun decisions are based off the ending, we store the
336 // string reversed ending here:
337 char ending[10];
338
339 // Build the ending...
340 const char *e;
341 int epos = 0;
342 for (e = noun; *e; e++);
343 e--;
344 for (epos = 0; epos < 10; epos++)
345 {
346 if (e < noun)
347 ending[epos] = '\0';
348 else
349 ending[epos] = *e;
350 e--;
351 }
352
353 // Check if last character is an s..
354 if (ending[0] == 's')
355 {
356 // If the ending is 'ss', like "Grass", it should be "Grasses",
357 // thus "ss" means not plural.
358 if (ending[1] == 's')
359 {
360 // Except some exception I'll find out later.
361 return false;
362 }
363
364 // Ended with an s? Plural.
365 return true;
366 }
367 else if (!strncmp(ending, "nem", 3))
368 {
369 // This is "foomen", like "lizardmen", so is
370 // plural.
371 return true;
372 }
373 else
374 {
375 if (!strcmp(noun, "oxen"))
376 return true;
377 else if (!strcmp(noun, "children"))
378 return true;
379 else if (!strcmp(noun, "feet"))
380 return true;
381
382 // This is, other than specific cases, singular.
383 return false;
384 }
385 }
386
387 const char *
gram_getarticle(const char * noun)388 gram_getarticle(const char *noun)
389 {
390 if (gram_isnameplural(noun))
391 return "";
392
393 if (gram_ispronoun(noun))
394 return "";
395
396 // Check for proper nouns if the noun is capped.
397 // This, I think, is wrong. A proper noun only has a definite article
398 // if we want a definite, and in that case everyone has one, cf: creature
399 // usedefinite clause.
400 // We need to search through our possible noun phrase for any
401 // capped word, as "evil Baezl'bub's black heart" will cause
402 // problems otherwise.
403 // We do not want to search of clauses so "scroll of READ ME" isn't
404 // flagged as a proper noun.
405 // This is still not correct. It should be "the corpse of
406 // Baezl'bug", not "a" or just "corpse".
407 {
408 char *prefix, *propernoun, *suffix;
409 char *tmp;
410 const char *capsearch;
411 int lastspace = true;
412 bool isproper = false;
413 bool isplural;
414
415 tmp = strdup(noun);
416
417 gram_extractofclause(tmp, prefix, propernoun, suffix);
418
419 // We want to search the prefix and proper noun for
420 // caps, not the suffix as we care not for " of " clauses.
421 isplural = gram_isplural(noun);
422
423 for (capsearch = prefix; capsearch && *capsearch; capsearch++)
424 {
425 if (lastspace && isupper(*capsearch))
426 isproper = true;
427 lastspace = isspace(*capsearch);
428 }
429 lastspace = true;
430 for (capsearch = propernoun; capsearch && *capsearch; capsearch++)
431 {
432 if (lastspace && isupper(*capsearch))
433 isproper = true;
434 lastspace = isspace(*capsearch);
435 }
436 free(tmp);
437
438 if (isproper)
439 return "";
440 }
441
442 // Check if first letter is a vowel.
443 if (!gram_isvowel(*noun))
444 {
445 // These are usually pretty straight forward. However,
446 // some words such as "honourable" cause problems. Contrast
447 // with "horse" and "hone".
448 if (*noun == 'h')
449 {
450 // Honour:
451 if (!strncmp(noun, "hono", 4))
452 return "an ";
453
454 // Lots of other cases likely follow...
455 }
456
457 return "a ";
458 }
459 // It is likely "an", however, a eucliedean geometry.
460 // However, "an eulerian proof".
461 if (!strncmp(noun, "euc", 3))
462 return "a ";
463
464 // The entire class of "u" causes problems. Many words, such as
465 // "usually", are pronounced with a "y" prefix, so should use "a ".
466 // Some, such as "urban" remain to cause us unfortunate problems.
467 // The rough rule here is:
468 // Determine if u is hard or soft. If two letters after the
469 // u is a vowel, it is "utility", "usual", or "ubiquitous", so
470 // it is "a ".
471 // If it is two consonents in a row, it is a "urbane" usage, so
472 // should use "an ".
473 if (*noun == 'u')
474 {
475 // The single letter 'u' also use "a ".
476 // Yet, if there is no third character, (Great city of Ur?) it
477 // should be treated as the double consonent case.
478 if (!noun[1] || gram_isvowel(noun[2]))
479 {
480 return "a ";
481 }
482 // We have either a u followed by two consonents or two vowels
483 // in a row. Two vowels in a row we consider to be a "a " case,
484 // though I can't think of any.
485 if (gram_isvowel(noun[1]))
486 return "a ";
487
488 // Chain to default to "an "....
489 }
490
491 // Default to "an "...
492 return "an ";
493 }
494
495 BUF
gram_createcount(const char * basename,int count,bool article)496 gram_createcount(const char *basename, int count, bool article)
497 {
498 BUF buf;
499
500 buf.strcpy(basename);
501 return gram_createcount(buf, count, article);
502 }
503
504 BUF
gram_createcount(BUF basename,int count,bool article)505 gram_createcount(BUF basename, int count, bool article)
506 {
507 BUF result;
508 BUF plural;
509
510 if (count != 1)
511 plural = gram_makeplural(basename);
512
513 if (!count)
514 {
515 result.sprintf("no %s", plural.buffer());
516 }
517 else if (count == 1)
518 {
519 if (article)
520 {
521 result.strcpy(gram_getarticle(basename));
522 result.strcat(basename);
523 }
524 else
525 {
526 return basename;
527 }
528 }
529 else
530 {
531 result.sprintf("%d %s", count, plural.buffer());
532 }
533
534 return result;
535 }
536
537 BUF
gram_createplace(int place)538 gram_createplace(int place)
539 {
540 BUF buf;
541 const char *ext;
542 int rem, upperrem;
543
544 rem = place % 10;
545 // I fucking hate this misfeature of C!
546 if (rem < 0)
547 rem += 10;
548
549 upperrem = (place - rem) / 10;
550 upperrem %= 10;
551 if (upperrem < 0)
552 upperrem += 10;
553
554 if (upperrem == 1)
555 {
556 // Eleventies!
557 rem = 0;
558 }
559
560 switch (rem)
561 {
562 case 1:
563 ext = "st";
564 break;
565 case 2:
566 ext = "nd";
567 break;
568 case 3:
569 ext = "rd";
570 break;
571 default:
572 ext = "th";
573 break;
574 }
575
576 buf.sprintf("%d%s", place, ext);
577
578 return buf;
579 }
580
581 // Static tables of verbs...
582 const char *glb_verbBE[2][NUM_VERBS] =
583 { { "am", "are", "is", "is", "is", "are", "are", "are", "are", "are" },
584 { "was", "were", "was", "was", "was", "were", "were", "were", "were", "were" } };
585
586 const char *glb_pronoun[NUM_VERBS] =
587 { "I", "you", "he", "she", "it", "we", "you", "they", "they", "they" };
588 const char *glb_possessive[NUM_VERBS] =
589 { "my", "your", "his", "her", "its", "our", "your", "their", "their", "their" };
590 const char *glb_ownership[NUM_VERBS] =
591 { "mine", "yours", "his", "hers", "its", "ours", "yours", "theirs", "theirs", "theirs" };
592 const char *glb_reflexive[NUM_VERBS] =
593 { "myself", "yourself", "himself", "herself", "itself", "ourselves", "yourselves", "themselves", "themselves", "themselves" };
594 const char *glb_accusative[NUM_VERBS] =
595 { "me", "you", "him", "her", "it", "us", "you", "them", "them" , "them" };
596
597 const char *
gram_getpronoun(VERB_PERSON person)598 gram_getpronoun(VERB_PERSON person)
599 {
600 return glb_pronoun[person];
601 }
602
603 const char *
gram_getpossessive(VERB_PERSON person)604 gram_getpossessive(VERB_PERSON person)
605 {
606 return glb_possessive[person];
607 }
608
609 const char *
gram_getownership(VERB_PERSON person)610 gram_getownership(VERB_PERSON person)
611 {
612 return glb_ownership[person];
613 }
614
615 const char *
gram_getreflexive(VERB_PERSON person)616 gram_getreflexive(VERB_PERSON person)
617 {
618 return glb_reflexive[person];
619 }
620
621 const char *
gram_getaccusative(VERB_PERSON person)622 gram_getaccusative(VERB_PERSON person)
623 {
624 return glb_accusative[person];
625 }
626
627 BUF
gram_conjugate(const char * verb,VERB_PERSON person,bool past)628 gram_conjugate(const char *verb, VERB_PERSON person, bool past)
629 {
630 // First, we determine if it is a multiword verb. For example,
631 // "spit at" should be conjugated with the preposition isolated.
632 // We take the first full english word, conjugate it, and append
633 // the rest of the initial verb.
634 //
635 // As I write this, I'm sitting in the Maple Leaf lounge relaxing
636 // on a comfortable leather seat.
637 // First class travel - If only I could get used to it!
638 const char *space;
639 BUF buf;
640
641 space = strchr(verb, ' ');
642 if (space)
643 {
644 char *tmp;
645
646 tmp = (char *)malloc(space - verb + 1);
647 memcpy(tmp, verb, space - verb);
648 tmp[space-verb] = '\0';
649
650 buf = gram_conjugate(tmp, person, past);
651 buf.strcat(space);
652 free(tmp);
653 return buf;
654 }
655
656 // As most verb decisions are based off the ending, we store the
657 // string reversed ending here:
658 char ending[10];
659 size_t verblen = strlen(verb);
660
661 // Build the ending...
662 const char *e;
663 int epos = 0;
664 for (e = verb; *e; e++);
665 e--;
666 for (epos = 0; epos < 10; epos++)
667 {
668 if (e < verb)
669 ending[epos] = '\0';
670 else
671 ending[epos] = *e;
672 e--;
673 }
674
675 // Check for crazy verbs...
676 if (!strcmp(verb, "be"))
677 {
678 buf.reference(glb_verbBE[past][person]);
679 return buf;
680 }
681
682 if (!strcmp(verb, "have"))
683 {
684 if (past)
685 buf.reference("had");
686 else if (person == VERB_HE || person == VERB_SHE || person == VERB_IT)
687 buf.reference("has");
688 else
689 buf.reference("have");
690 return buf;
691 }
692
693 // Now, build the verb from the infinitive...
694 if (past)
695 {
696 buf.strcpy(verb);
697 }
698 else
699 {
700 switch (person)
701 {
702 case VERB_I:
703 // hit->hit
704 buf.strcpy(verb);
705 break;
706
707 case VERB_HE:
708 case VERB_SHE:
709 case VERB_IT:
710 // hit->hits
711 // miss->misses
712 // bash->bashes.
713 // fly->flies.
714 // say->says
715 // go->goes
716 // watch->watches
717 // fix->fixes
718 // buzz->buzzes
719 // have->has
720 // catch->catches
721 if (!strcmp(verb, "have"))
722 {
723 buf.reference("has");
724 }
725 else if ((*ending == 's') ||
726 !strncmp(ending, "hs", 2) ||
727 !strncmp(ending, "hc", 2) ||
728 (*ending == 'o') ||
729 (*ending == 'x') ||
730 (*ending == 'z'))
731 {
732 buf.sprintf("%ses", verb);
733 }
734 else if (*ending == 'y')
735 {
736 if (gram_isvowel(ending[1]))
737 {
738 // say -> says
739 buf.sprintf("%ss", verb);
740 }
741 else
742 {
743 // fly -> flies
744 buf.strcat(verb);
745 // Have to manually end it. strncpy
746 // doesn't write the terminating null.
747 buf.evildata()[verblen-1] = '\0';
748 buf.strcat("ies");
749 }
750 }
751 else
752 {
753 buf.sprintf("%ss", verb);
754 }
755 break;
756
757 // These are pretty identical
758 case VERB_YOU:
759 case VERB_YALL:
760 // hit->hit
761 buf.sprintf("%s", verb);
762 break;
763
764 case VERB_WE:
765 // hit->hit
766 buf.sprintf("%s", verb);
767 break;
768
769 case VERB_HES:
770 case VERB_SHES:
771 case VERB_THEY:
772 // hit->hit
773 buf.sprintf("%s", verb);
774 break;
775
776 default:
777 UT_ASSERT(!"Unhandled VERB!");
778 buf.strcpy(verb);
779 break;
780 }
781 }
782
783 return buf;
784 }
785
786 BUF
gram_capitalize(const char * str)787 gram_capitalize(const char *str)
788 {
789 BUF srcbuf;
790
791 // We don't know life time of str so only safe bet is to make
792 // a hard copy :<
793 srcbuf.strcpy(str);
794 return gram_capitalize(srcbuf);
795 }
796
797 BUF
gram_capitalize(BUF buf)798 gram_capitalize(BUF buf)
799 {
800 bool hard = false;
801 char *s;
802 bool docaps = true; // Start of sentence, caps.
803 BUF result;
804 const char *str = buf.buffer();
805
806 result = buf;
807
808 // THis cast to char * is safe as we only assign to s after a harden.
809 for (s = (char *) str; *s; s++)
810 {
811 if (!isspace(*s))
812 {
813 // Note we do not eat up caps if we get a non-alpha, ie:
814 // "foo bar" will become "Foo bar" (ignoring the quote)
815 // Note that numerical keys are ignored, so
816 // "+1 mace" becomes "+1 Mace"
817 if (docaps && isalpha(*s))
818 {
819 if (islower(*s))
820 {
821 if (!hard)
822 {
823 result.uniquify();
824 s = result.evildata() + (s - str);
825 hard = true;
826 }
827 *s = toupper(*s);
828 }
829 // Eat the caps.
830 docaps = false;
831 }
832
833 // Determine if this is end-sentence, if so, the next
834 // char should be capped.
835 if (gram_isendsentence(*s))
836 docaps = true;
837 }
838 }
839
840 return result;
841 }
842
843 //
844 // This turns the given string into a possessive
845 // you goes to your, foo to foo's, and bars to bars'.
846 //
847 BUF
gram_makepossessive(const char * str)848 gram_makepossessive(const char *str)
849 {
850 BUF strbuf;
851
852 strbuf.strcpy(str);
853 return gram_makepossessive(strbuf);
854 }
855
856 BUF
gram_makepossessive(BUF str)857 gram_makepossessive(BUF str)
858 {
859 BUF result;
860
861 // Special cases...
862 if (!str.strcmp("I"))
863 result.reference("my");
864 if (!str.strcmp("you"))
865 result.reference("your");
866 if (!str.strcmp("it"))
867 result.reference("its"); // Better not let this go normal rule :>
868 if (!str.strcmp("he"))
869 result.reference("his");
870 if (!str.strcmp("she"))
871 result.reference("her");
872 if (!str.strcmp("we"))
873 result.reference("our");
874 if (!str.strcmp("they"))
875 result.reference("their");
876
877 if (result.isstring())
878 return result;
879
880 // Now, standard case...
881 result = str;
882 result.uniquify();
883
884 if (result.lastchar() == 's')
885 result.strcat("'");
886 else
887 result.strcat("'s");
888
889 return result;
890 }
891