1-- ********************************
2-- *  Hugo Interactive Fiction    *  v1.1.0        || by Tristano Ajmone:
3-- *  www.generalcoffee.com/hugo  *  2019/11/14    || https://github.com/tajmone
4-- ********************************  public domain || http://unlicense.org
5--------------------------------------------------------------------------------
6Description = "Hugo"  Categories = {"source", "interactive fiction"}
7--------------------------------------------------------------------------------
8-- file extensions:
9--   .hug -- adventure source
10--   .h   -- library source
11--   .g   -- grammar source
12--------------------------------------------------------------------------------
13-- Syntax definition for Hugo language v3.1.03 (2006).
14-- http://www.generalcoffee.com/hugo/gethugo.html
15
16-- The Hugo Interactive Fiction Development System (1995-2006) is a language and
17-- a set of cross-platform tools for creating text-adventures with sound and
18-- graphics, developed by Kent Tessman for The General Coffee Company Film
19-- Productions, released under BSD-2-Clause License.
20--------------------------------------------------------------------------------
21-- Syntax elements:
22--   * Comments       -- single line (!) and block (!/ .. /!).
23--   * Strings        -- double quotes (") escapable with (\")
24--   * Escape         -- various + ASCII escapes.
25--   * Interpolation  -- special non-ASCII characters.
26--   * PreProcessor
27--   * Operators
28--   * Digits         -- decimal integers only.
29--   * Keywords 1     -- Hugo reserved keywords.
30--   * Keywords 2     -- Predefined Hugo stored values:
31--                         * Built-in Global Variables.
32--                         * Built-in Properties.
33--                         * Built-in Engine Variables.
34--   * Keywords 3     -- Various elements that didn't fit elsewhere:
35--                         * Chars constants.
36--                         * System Words.
37--                         * Properties Qualifiers.
38--                         * Boolean Constants (true/false).
39--   * Keywords 4     -- Limit Settings.
40--------------------------------------------------------------------------------
41IgnoreCase  = true    EnableIndentation = false
42Identifiers = [[ [\$\~]?[a-zA-Z_]\w*\$? ]]
43Comments = {{
44  Block  = true,
45  Nested = false,
46  Delimiter = { [[  ^(?:\s*)!\\  ]],
47                [[  \\!$         ]]}},{
48  Block = false,
49  Delimiter = { [[  (?<!\\)!(?!\\)  ]]}}}
50Strings = {
51  Delimiter = [=[  (?<!\\)" ]=],
52  AssertEqualLength = true,
53--[[----------------------------------------------------------------------------
54                                ESCAPE SEQUENCES
55--------------------------------------------------------------------------------
56Hugo allows various escape sequences inside strings, some of them have been
57defined as Interpolation for visual improvement.
58
59Basic escapes:
60  \"  quotation marks
61  \\  a literal backslash character
62  \_  a forced space, overriding left-justification for the rest of the string
63  \n  a newline
64
65Formatting sequences for styles:
66  \B  boldface on
67  \b  boldface off
68  \I  italics on
69  \i  italics off
70  \P  proportional printing on
71  \p  proportional printing off
72  \U  underlining on
73  \u  underlining off
74
75ASCII Escapes:
76  \#xxx   any ASCII or Latin-1 character where xxx represents the three-digit
77          ASCII number (or Latin-1 code). --]]
78  Escape = [=[ (\\(?:["\\_nBbIiPpUu]|#\d{3})) ]=],
79--[[----------------------------------------------------------------------------
80                                  INTERPOLATION
81--------------------------------------------------------------------------------
82We define the special char sequences as Interpolation to allow visual separation
83between them and the other escape sequences, which will make the code easier to
84read since in real-code the prose strings might contain many of both in a same
85string, side by side.
86
87Special characters formatting sequences (ISO-8859-1):
88  \`  accent grave followed by a letter (e.g. "\`a" -> "à")
89  \’  accent acute followed by a letter (e.g. "\’E" -> "É")
90  \~  tilde followed by a letter        (e.g. "\~n" -> "ñ")
91  \^  circumflex followed by a letter   (e.g. "\^i" -> "î")
92  \:  umlaut followed by a letter       (e.g. "\:u" -> "ü")
93  \,  cedilla followed by c or C        (e.g. "\,c" -> "ç")
94  \< or \> Spanish quotation marks (« »)
95  \!  upside-down exclamation point (¡)
96  \?  upside-down question mark (¿)
97  \ae ae ligature (æ)
98  \AE AE ligature (Æ)
99  \c  cents symbol (¢)
100  \L  British pound (£)
101  \Y  Japanese Yen (¥)
102
103NOTE: The RegEx below defines twice the acute accent (´) char because depending
104      on whether the source is in ASCII/ISO-8859-1 or UTF-8 its encoding will
105      differ (the former is the expected encoding for Hugo sourceS, but the
106      latter might be encountered in documentation projects).               --]]
107  Interpolation = [=[ (?x)(\\(?:
108    \xC2\xB4[a-zA-Z]  | # Acute accent (´) in UTF-8 docs will be $c2 $b4.
109    [`´~\^:][a-zA-Z]  | # Note: acute accent in ASCII format also found here.
110    ,[cC]             | # Cedilla.
111    [<>!?]            | # Square brackets and upside-down ¡ ¿ marks.
112    ae|AE             | # Æ ligatures.
113    [cLY]               # Currencies: ¢ £ ¥.
114    )) ]=] }
115PreProcessor = {
116  Prefix = [=[ \A(?!x)x ]=], -- never matching RegEx!
117  Continuation = "\\" }
118Operators = [[ \&|\#|<|>|\||\=|\/|\*|\+|\-|~ ]]
119Digits = [[ \d+ ]]
120Keywords = {{
121  ------------------------------------------------------------------------------
122  Id = 1, List = {                                          -- Hugo keywords # 1
123  ------------------------------------------------------------------------------
124    "addcontext", "alias", "and", "anything", "array", "attribute", "break",
125    "call", "capital", "case", "child", "children", "class", "cls", "color",
126    "colour", "compound", "constant", "dict", "do", "elder", "eldest", "else",
127    "elseif", "enumerate", "event", "for", "global", "graphics", "held", "hex",
128    "if", "in", "input", "is", "jump", "local", "locate", "move", "multi",
129    "multiheld", "multinotheld", "music", "nearby", "newline", "not", "notheld",
130    "number", "or", "parent", "pause", "picture", "playback", "print",
131    "printchar", "property", "punctuation", "quit", "random", "readfile",
132    "readval", "recordoff", "recordon", "removal", "remove", "repeat",
133    "replace", "resource", "restart", "restore", "return", "routine", "run",
134    "runevents", "save", "scriptoff", "scripton", "select", "sibling", "sound",
135    "start", "step", "string", "synonym", "system", "text", "to", "undo",
136    "verb", "video", "while", "window", "word", "writefile", "writeval",
137    "xverb", "younger", "youngest",
138  }},{
139  ------------------------------------------------------------------------------
140  Id = 2, List = {                        -- Predefined Engine Stored Values # 2
141  ------------------------------------------------------------------------------
142    --| Built-in Global Variables |---------------------------------------------
143    ----------------------------------------------------------------------------
144    "actor", "endflag", "location", "object", "objects", "player", "prompt",
145    "self", "system_status", "verbroutine", "words", "xobject",
146    ----------------------------------------------------------------------------
147    --| Built-in Properties |---------------------------------------------------
148    ----------------------------------------------------------------------------
149    -- NOTE: "adjectives" and "nouns" are aliases defined by Hugo library, and
150    --       not tokens defined in the Hugo engine and compiler.
151    ----------------------------------------------------------------------------
152    "adjective", "after", "article", "before", "name", "noun",
153    ----------------------------------------------------------------------------
154    --| Built-in Engine Variables |---------------------------------------------
155    ----------------------------------------------------------------------------
156    "parse$", "serial$",
157  }},{
158  ------------------------------------------------------------------------------
159  Id = 3,          -- Chars Constants + System Words + Properties Qualifiers # 3
160  ------------------------------------------------------------------------------
161    --| ASCII Chars constants |-------------------------------------------------
162    ----------------------------------------------------------------------------
163    Regex = [=[  '[\x00-\x7F]'  ]=] , },{
164    ----------------------------------------------------------------------------
165    --| System Words |----------------------------------------------------------
166    ----------------------------------------------------------------------------
167    Id = 3, List = {
168      "~all", "~and", "~any", "~except", "~oops",
169    ----------------------------------------------------------------------------
170    --| Properties Qualifiers |-------------------------------------------------
171    ----------------------------------------------------------------------------
172      "$additive", "$complex",
173    ----------------------------------------------------------------------------
174    --| Boolean Constants |-----------------------------------------------------
175    ----------------------------------------------------------------------------
176      "true", "false",
177  }},{
178  ------------------------------------------------------------------------------
179  Id = 4,                                                  -- Limit Settings # 4
180  ------------------------------------------------------------------------------
181    Regex = [=[ (?x-i)
182      (\$MAX (?: ALIASES | ARRAYS | ATTRIBUTES | CONSTANTS | DICTEXTEND | DICT |
183        DIRECTORIES | EVENTS | FLAGS | GLOBALS | LABELS | LOCALS | OBJECTS |
184        PROPERTIES | ROUTINES | SPECIALWORDS )) ]=],
185    Group = 0
186  },{
187  ------------------------------------------------------------------------------
188  Id = 5,                                                       -- PreProcessor
189  ------------------------------------------------------------------------------
190  -- These tokens are captured as keywords but then thrown back as PreProcessor
191  -- via OnStateChange(). This is needed because setting '#' as PreProcessor
192  -- delimiter would prevent capturing the '#' token for counting properties.
193
194    Regex = [=[ (?x-i)
195      (\#(?:
196        if(clear | set | defined | undefined) | if | elseif | else | endif |
197        clear | set | include | link | message | switches | version
198      )\b) ]=],
199    Group = 0
200  }}
201
202function OnStateChange(oldState, newState, token, kwgroup)
203  --============================================================================
204  -- #01 -- Ignore Escape Sequences Outside Strings
205  --============================================================================
206  if  newState == HL_ESC_SEQ and         -- An escape seq. must follow either:
207      oldState ~= HL_STRING  and         --  * a string
208      oldState ~= HL_ESC_SEQ and         --  * an escape sequence
209      oldState ~= HL_INTERPOLATION then  --  * an interpolation
210        return HL_REJECT                 -- otherwise, reject it.
211  --============================================================================
212  -- #02 -- Ignore Interpolations Inside Preprocessor Strings
213  --============================================================================
214  elseif
215      newState == HL_INTERPOLATION and
216      oldState == HL_PREPROC_STRING then
217        return HL_REJECT
218  --============================================================================
219  -- #03 -- Throw Back Keywords from Group 5 as PreProcessor
220  --============================================================================
221  elseif
222      newState == HL_KEYWORD and
223      kwgroup == 5 then
224        return HL_PREPROC
225  end
226  return newState
227end
228--[[============================================================================
229                                  KNOWN ISSUES
230================================================================================
231FilePath Strings:
232
233  Escape sequences and interpolations shouldn't show up inside file path strings
234  following keywords like `resource`, `picture` and other similar keywords which
235  expect a file string after them. The syntax should track strings immediately
236  following these keywords and discard escapes/interpolations accordingly. The
237  `resource` keyword is going to be trickier because it allows multiple strings
238  inside a `{..}` block (and strings might be followed by comments).
239================================================================================
240                                  CHANGELOG
241================================================================================
242v1.1.0 (2019/11/14) | Highlight v3.54
243  - Polish source.
244  - List keywords one kwd per line to simplify WIP and tracking changes.
245  - NEW:
246    - Added missing keyword tokens.
247    - Removed keyword "adjectives" and "nouns" (library defined aliases).
248    - Kwd Group 2 now hosts predefined Engine Values:
249      - Built-in Global Variables. (moved here from Group 1)
250      - Built-in Properties.       (moved here from Group 1)
251    - Kwd Groups 3 and 4 are shifted and become 4 and 5.
252    - Kwd Group 3: (was 2) added also, along with Char Constants:
253      - System Words.
254      - Properties Qualifiers.
255      - Boolean Constants (true/false: moved from  Group 1).
256  - FIXES:
257    - Identifiers patterns tweaked to include tokens starting with tilde.
258    - Ignore interpolations inside preprocessor strings.
259    - PreProcessor: prevent matching the '#' for proprieties count as the
260      beginning of a preprocessor directive.
261v1.0.0 (2019/05/24) | Highlight v3.51
262  - First release. --]]
263