1 /*
2  *   Copyright (c) 2002 by Michael J. Roberts.  All Rights Reserved.
3  *
4  *   Please see the accompanying license file, LICENSE.TXT, for information
5  *   on using and copying this software.
6  */
7 /*
8 Name
9   vmcore.cpp - T3 VM "core" interpreter - example main entrypoint
10 Function
11   This is an example of how to link the T3 VM "core" into a separate
12   application.  This is just a placeholder file; in a real system, the
13   application itself would provide the main entrypoint, so this file would
14   not be included in the build.
15 
16   In addition to the main entrypoint, we define a skeleton intrinsic
17   function set.  A real application would fill in the definitions of the
18   intrinsic functions so that they expose functionality of the application
19   to the T3 program.  This way, the T3 program can call the host application.
20 
21   The core interpreter does not have any user interface features at all.
22   It is the responsibility of the host application to provide access to user
23   interface features.  In most cases, the whole point of linking the T3 VM
24   into another application is to provide an alternative user interface to T3
25   programs, so the entire extent of the work of integrating T3 and the host
26   application is implementing the intrinsic function set to provide UI
27   features to the T3 program.
28 Notes
29 
30 Modified
31   04/05/02 MJRoberts  - Creation
32 */
33 
34 #include <stdio.h>
35 #include <stdlib.h>
36 
37 #include "t3std.h"
38 #include "vmglob.h"
39 #include "vmtype.h"
40 #include "vmmain.h"
41 #include "vmhost.h"
42 #include "vmcore.h"
43 #include "charmap.h"
44 #include "vmstr.h"
45 
46 
47 /* we need this #include only if we're using CVmHostIfcStdio (see below) */
48 #include "vmhostsi.h"
49 
50 /* ------------------------------------------------------------------------ */
51 /*
52  *   Client Interface implementation.  See the explanation in main() below
53  *   for more details on the purpose of this class.
54  *
55  *   This isn't a very fancy implementation.  Most real applications will
56  *   want to provide implementations tied to their UI's.
57  */
58 class MyClientIfc: public CVmMainClientIfc
59 {
60 public:
61     /*
62      *   Set plain ASCII mode - most GUI-type applications need do nothing
63      *   here.  The purpose of this is to allow the user to set a
64      *   console-mode application to use simple stream output only, for
65      *   purposes such as interoperating with a text-to-speech converter.
66      *   If the application doesn't have a way of switching between
67      *   character mode and graphics mode, there's nothing for this routine
68      *   to do.
69      */
set_plain_mode()70     void set_plain_mode() { }
71 
72     /*
73      *   Create the main system console - we don't want a console, because
74      *   we don't want to make assumptions about the nature of the user
75      *   interface.
76      *
77      *   In a real application, you would only want to create a console if
78      *   you want to implement the stream-oriented output features of the
79      *   standard TADS interpreter UI.  The console subsystem provides text
80      *   formatting features (word wrapping, spacing, bold/highlighting, and
81      *   so on) that are suitable for terminal-style display.
82      */
create_console(struct vm_globals *)83     class CVmConsoleMain *create_console(struct vm_globals *) { return 0; }
84 
85     /* delete the console - we never created one, so there's nothing to do */
delete_console(struct vm_globals *,class CVmConsoleMain *)86     void delete_console(struct vm_globals *, class CVmConsoleMain *) { }
87 
88     /*
89      *   Initialize.  This particular application doesn't request scripting
90      *   files, log files, command log files, or banner strings when calling
91      *   the VM, so there's no need for us to deal with any of those
92      *   possibilities.
93      */
client_init(struct vm_globals *,const char *,const char *,const char *,const char *)94     void client_init(struct vm_globals *,
95                      const char * /*script_file*/,
96                      const char * /*log_file*/,
97                      const char * /*cmd_log_file*/,
98                      const char * /*banner_str*/)
99     {
100         /* do nothing */
101     }
102 
103     /* we don't do anything at initialization, so there's nothing to undo */
client_terminate(struct vm_globals *)104     void client_terminate(struct vm_globals *) { }
105 
106     /* pre-execution notification */
pre_exec(struct vm_globals *)107     void pre_exec(struct vm_globals *) { }
108 
109     /* post-execution notification */
post_exec(struct vm_globals *)110     void post_exec(struct vm_globals *) { }
111 
112     /* post-execution notification with errors */
post_exec_err(struct vm_globals *)113     void post_exec_err(struct vm_globals *) { }
114 
115     /*
116      *   Display an error message - we'll simply show the message on the
117      *   standard output.  Real implementations would normally want to
118      *   integrate this with their own user interface; on a GUI platform,
119      *   for example, we might want to pop up an alert box with the mesage.
120      */
display_error(struct vm_globals *,const char * msg,int add_blank_line)121     void display_error(struct vm_globals *,
122                        const char *msg, int add_blank_line)
123     {
124         /* show the message */
125         printf("%s\n", msg);
126 
127         /* add a blank line after it, if requested */
128         if (add_blank_line)
129             printf("\n");
130     }
131 };
132 
133 /* ------------------------------------------------------------------------ */
134 /*
135  *   Main program entrypoint.  This is meant to be replaced by the actual
136  *   host application.
137  *
138  *   Don't worry if you're using an OS that uses something other than the
139  *   standard Unix-style "main()" as its entrypoint - you're going to remove
140  *   this entire file anyway and replace it with your own, so you can use
141  *   whatever style of OS entrypoint is appropriate.  The only reason this
142  *   is here is to serve as an example of how you invoke the T3 VM to get
143  *   your compiled T3 program running in the first place.
144  *
145  *   Note also that you don't have to call the VM directly from your
146  *   entrypoint function.  We call the VM from main() only because we have
147  *   nothing else useful to do.  You can do as much as you want to set up
148  *   your program or even run interactively for a while before calling the
149  *   VM to run the T3 program.
150  */
main(int argc,char ** argv)151 int main(int argc, char **argv)
152 {
153     int stat;
154     int load_from_exe;
155     const char *image_file_name;
156 
157     /*
158      *   For our purposes, we will assume that our argument vector contains
159      *   only one argument, which is the name of the program to run.  If the
160      *   arguments don't look right, terminate with an error.
161      */
162     if (argc != 2)
163     {
164         printf("usage: t3core <program-name>\n");
165         exit(1);
166     }
167 
168     /*
169      *   The image (.t3) file's name is given by the first argument.
170      *
171      *   Some applications might want to bind the .t3 file directly into the
172      *   application's executable file (the .exe file on Windows, for
173      *   example) rather than loading a separate .t3 file.  Fortunately,
174      *   this is easy.  Two steps are required.
175      *
176      *   1.  After building the application executable and compiling the T3
177      *   program to yield a .t3 file, use the appropriate OS-specific TADS
178      *   tool to bind two together into a single executable file.  On
179      *   DOS/Windows, this tool is 'maketrx32' - simply specify the name of
180      *   your application executable, the name of your .t3 file, and the
181      *   name of a new executable, and the tool will generate a new
182      *   executable that has both files bound together.
183      *
184      *   2.  In our call to vm_run_image(), rather than passing the name of
185      *   a .t3 file to laod, we'd pass the name of the application
186      *   executable file itself (this is simply argv[0] with our unix-style
187      *   main(), but on other systems we might have to obtain this
188      *   information some other way), and we'd pass TRUE for the
189      *   'load_from_exe' argument to vm_run_image().  This will make the VM
190      *   look for the .t3 file bound into the application executable using
191      *   an appropriate OS-specific mechanism.
192      */
193     image_file_name = argv[1];
194     load_from_exe = FALSE;
195 
196     /*
197      *   Create the "host interface."  For our purposes, we use the simple
198      *   "stdio" host interface implementation, which the T3 source code
199      *   provides for convenience.  The host interface lets the main
200      *   application (the "host") communicate certain information about the
201      *   execution environment to the VM, and carries out certain tasks on
202      *   behalf of the VM; the purpose of this is to allow the VM adapt more
203      *   easily to different application environments by deferring certain
204      *   operations to the host application, so that the VM doesn't have to
205      *   make assumptions about the host environment that might not always
206      *   be true.
207      *
208      *   Most real applications will not want to use the simple "stdio" host
209      *   interface implementation, because this standard implementation does
210      *   make lots of assumptions about the host environment that might not
211      *   always be true.  That's why we have to create the object here - the
212      *   VM can't create it, because it can't know what kind of object we'd
213      *   want to use.
214      *
215      *   If you do want to customize the host interface, you'll need to
216      *   implement a C++ class as a subclass of CVmHostIfc - see vmhost.h.
217      *   You'll need to provide an implementation for each method defined in
218      *   CVmHostIfc.
219      *
220      *   Note that we use "new" to allocate this object, rather than
221      *   allocating it on the stack, because of a logistical detail:
222      *   CVmHostIfcStdio actually allocates some memory upon creating an
223      *   instance, which it frees when the CVmHostIfcStdio instance itself
224      *   is destroyed.  If we allocated this instance on the stack, the
225      *   instance wouldn't be destroyed until this function returns.
226      *   However, we want to run a memory leak test (by calling
227      *   t3_list_memory_blocks(), below) before we return from the function.
228      *   If we allocated this object on the stack, it wouldn't be deleted
229      *   until after we return, and so the memory it allocates won't be
230      *   deleted until after we return, so our memory test would show those
231      *   blocks still allocated and warn of a memory leak.  We deal with
232      *   this by explicitly allocating it with 'new' here so that we can
233      *   explicitly destroy it with 'delete' before running the memory
234      *   check.
235      */
236     CVmHostIfc *hostifc = new CVmHostIfcStdio(argv[0]);
237 
238     /*
239      *   Create the "client interface" object.  This is similar in purpose
240      *   to the host interface; this is defined as a separate interface
241      *   because it provides functionality that is somewhat orthogonal to
242      *   the host interface, so in some cases we might want to mix and match
243      *   different pairs of client and host interface implementations.  For
244      *   example, HTML TADS uses its own custom host interface, and most
245      *   character-mode TADS interpreters use the "stdio" host interface,
246      *   but both types of interpreters use the same "console" client
247      *   interface implementation.
248      *
249      *   There's only one standard system client interface implementation,
250      *   which is based on the system "console."  The console is the TADS
251      *   output formatter subsystem.  We do NOT use this standard
252      *   implementation, because we don't want to depend on the console
253      *   layer: the whole point of this core VM configuration is to provide
254      *   a version of the VM without any UI dependencies, and including the
255      *   console formatter introduces all kinds of dependencies.
256      *
257      *   So, we define our own custom implementation of the client
258      *   interface.  See the definition earlier in the file.
259      */
260     MyClientIfc clientifc;
261 
262     /*
263      *   Load and run the image file.  This is how we run the T3 program:
264      *   this loads the program, sets everything up in the VM, and executes
265      *   the program.  This call doesn't return until the program terminates.
266      *
267      *   If your application runs on an event-oriented operating system,
268      *   such as Windows or Macintosh, you might be wondering at this point
269      *   exactly where you're supposed to put your message loop if the T3
270      *   program is going to hog the CPU from now until the program
271      *   terminates.  There are several approaches you can use:
272      *
273      *   1.  You can use a separate thread to run the T3 program, and run
274      *   the UI event loop in the main thread.  To do this, rather than
275      *   calling the VM directly here, you'd intead spawn another thread,
276      *   and let that thread call the VM.  So, the VM would run in that new
277      *   thread, leaving the main thread free to proces UI events.  This
278      *   would require proper synchronization any time one of your intrinsic
279      *   functions needs to access UI features, but otherwise it would be
280      *   pretty simple, because the VM is otherwise fairly self-contained.
281      *
282      *   2.  You can write the event loop in the T3 program itself (i.e., in
283      *   the interpreted code running under the VM).  You would have to
284      *   write an intrinsic function to retrieve and dispatch events.  This
285      *   approach would probably be a lot of work, because it would mean
286      *   that you'd have to provide access to a fair chunk of your OS's GUI
287      *   API in your intrinsic function set or sets.  In all likelihood,
288      *   you've already implemented most or all of your UI in the C++ part
289      *   of your application, and you won't have any desire to provide broad
290      *   access to the low-level OS GUI API directly to the T3 program, so
291      *   this option is probably not suitable in most cases.
292      *
293      *   3.  You can do what HTML TADS does, which is run everything in one
294      *   thread, and run *recursive* event loops in the intrinsic functions.
295      *   In HTML TADS, the T3 program runs happily along, oblivious to the
296      *   UI, until it wants to read some text from the keyboard or display
297      *   some text on the console, at which point it has to call an
298      *   intrinsic function.  The intrinsic makes the appropriate OS-level
299      *   API calls to display the text or whatever.  If the intrinsic's
300      *   purpose is to read some input from the user, HTML TADS runs a
301      *   recursive event loop to allow the user to interact with the
302      *   program.  The recursive event loop monitors a flag, controlled by
303      *   the intrinsic, that indicates when the recursive loop is done.  For
304      *   example, if the intrinsic's purpose is to wait for a keystroke from
305      *   the user, the intrinsic tells the main window that it's waiting for
306      *   a key, then calls the recursive event loop; the event loop simply
307      *   reads and dispatches events as long as the "done" flag is false;
308      *   and the main window, when it receives a keystroke event, notices
309      *   that it's waiting for a key, so it stashes the keystroke for
310      *   retrieval by the intrinsic and sets the "done" flag to true; the
311      *   recursive event loop notices this and returns to the intrinsic,
312      *   which finds the keystroke it wanted stashed away, and returns.
313      */
314     stat = vm_run_image(&clientifc, image_file_name, hostifc, 0, 0,
315                         0, 0, 0, load_from_exe, FALSE, 0, 0, 0);
316 
317     /* we're done with the host interface object, so delete it */
318     delete hostifc;
319 
320     /*
321      *   Show any unfreed memory.  This is purely for debugging purposes
322      *   during development of T3 itself; most real applications that link
323      *   in T3 won't want to bother with this, unless they're suspicious
324      *   that T3 is leaking memory on them and want to check it.
325      */
326     t3_list_memory_blocks(0);
327 
328     /* terminate with the status code from the VM */
329     exit(stat);
330     return stat;
331 }
332 
333 /* ------------------------------------------------------------------------ */
334 /*
335  *   Define the "core-sample" intrinsic function set.  This is an example of
336  *   how to define native C++ functions that can be called from TADS code
337  *   running in the VM.
338  */
339 
340 /*
341  *   displayText(str) - display a text string.
342  */
display_text(VMG_ uint argc)343 void CVmBifSample::display_text(VMG_ uint argc)
344 {
345     const char *strp;
346     size_t len;
347 
348     /*
349      *   Check to make sure we have the right number of arguments.  'argc'
350      *   tells us how many arguments we received from the T3 program.
351      */
352     check_argc(vmg_ argc, 1);
353 
354     /*
355      *   Get the first argument, which is the string to display.
356      *
357      *   This will give us a pointer to a string in internal format, which
358      *   has a two-byte length prefix.  So, once we have the string, we must
359      *   get the length from the string pointer, and then skip the length
360      *   prefix to get to the real text of the string.
361      *
362      *   Arguments from the T3 program to a native function always appear on
363      *   the VM stack, which we can access using the pop_xxx_val() functions.
364      *   The arguments appear on the stack in order such that the first
365      *   pop_xxx_val() gives us the first argument, the second pop_xxx_val()
366      *   gives us the second argument, and so on.  The pop_xxx_val()
367      *   functions REMOVE an argument from the stack - once it's removed, we
368      *   can get to the next one.  We MUST remove EXACTLY the number of
369      *   arguments that we receive before we return.
370      */
371     strp = pop_str_val(vmg0_);
372     len = vmb_get_len(strp);
373     strp += VMB_LEN;
374 
375     /*
376      *   Okay, we have our string, but it's in UTF-8 format, which is an
377      *   encoding format for Unicode.  We don't want to display Unicode; we
378      *   want to display the local character set.  How do we do this?
379      *   Fortunately, T3 provides a handy character mapping subsystem that
380      *   will let us convert the string fairly automatically.  The VM also
381      *   gives us a pre-loaded mapper for this specific kind of conversion,
382      *   in the object G_cmap_to_ui.  G_cmap_to_ui will map characters from
383      *   UTF-8 to the local User Interface character set.
384      *
385      *   To avoid the need to allocate a gigantic string buffer to convert
386      *   the characters, the mapper lets us map in chunks of any size.  So,
387      *   we'll simply map and display chunks until we run out of string.
388      */
389     while (len != 0)
390     {
391         char buf[128];
392         size_t cur_out;
393         size_t cur_in;
394 
395         /*
396          *   Map as much as we can into our buffer.  This will set cur_out to
397          *   the number of bytes in the local character set (the output of
398          *   the conversion), and will set cur_in to the number of bytes we
399          *   used from the Unicode string (the input).
400          */
401         cur_out = G_cmap_to_ui->map_utf8(buf, sizeof(buf),
402                                          strp, len, &cur_in);
403 
404         /*
405          *   Show the local characters.
406          *
407          *   "%.*s" is just like "%s", but the ".*" tells printf to show
408          *   exactly the number of characters in the int argument before the
409          *   string, instead of showing everything until it finds a null byte
410          *   in the string.  This is important because map_utf8 does NOT
411          *   null-terminate the result.
412          */
413         printf("%.*s", (int)cur_out, buf);
414 
415         /*
416          *   skip the characters of input we just translated, so that on the
417          *   next iteration of this loop we'll translate the next bunch of
418          *   characters
419          */
420         strp += cur_in;
421         len -= cur_in;
422     }
423 }
424 
425 /*
426  *   readText() - read text from the keyboard and return it as a string.
427  */
read_text(VMG_ uint argc)428 void CVmBifSample::read_text(VMG_ uint argc)
429 {
430     char buf[128];
431     size_t len;
432     vm_obj_id_t str_id;
433     CVmObjString *str_obj;
434 
435     /* check to make sure we have the right number of arguments */
436     check_argc(vmg_ argc, 0);
437 
438     /*
439      *   Read a string from the keyboard.  Use fgets() rather than plain
440      *   gets(), because fgets() lets us limit the buffer size and thus avoid
441      *   any chance of a buffer overflow.  (Someone should tell the people at
442      *   Microsoft about this - it would probably cut out about eighty
443      *   percent of those emergency internet security alerts that require
444      *   everyone to download an IE patch every couple of weeks. :)
445      */
446     fgets(buf, sizeof(buf), stdin);
447 
448     /*
449      *   One small detail about fgets: if the input ended with a newline,
450      *   there will be a newline in the buffer.  Remove it if it's there.
451      */
452     if ((len = strlen(buf)) != 0 && buf[len - 1] == '\n')
453         buf[len-1] = '\0';
454 
455     /*
456      *   As in display_text(), we have to deal with character set mapping
457      *   before we can send the string back to the TADS program.  This time,
458      *   we want to perform the conversion from the local character set to
459      *   Unicode.  Again, T3 provides a handy conversion object for our
460      *   convenience - this time, it's called G_cmap_from_ui.
461      *
462      *   In order to return a string to the TADS program, we have to allocate
463      *   a new string object.  First, let's see how big a string we need to
464      *   allocate, by calling the character mapper with no buffer space at
465      *   all - the mapper will run through the string and check to see how
466      *   big it will be after conversion, but it won't actually store
467      *   anything.
468      */
469     len = G_cmap_from_ui->map_str(0, 0, buf);
470 
471     /*
472      *   Allocate a new string to contain the return value.  This gives us
473      *   back an "object ID" value, which we can convert into an internal C++
474      *   string object pointer using the vm_objp() formula shown.
475      */
476     str_id = CVmObjString::create(vmg_ FALSE, len);
477     str_obj = (CVmObjString *)vm_objp(vmg_ str_id);
478 
479     /*
480      *   The string object has a buffer of the size we requested, which is
481      *   the size we already know we need to contain the mapped string.  So,
482      *   we can call the mapper again to have it perform the actual mapping
483      *   into our string buffer.
484      */
485     G_cmap_from_ui->map_str(str_obj->cons_get_buf(), len, buf);
486 
487     /*
488      *   One last step: we must return the string object to the caller.  To
489      *   do this, use the retval_obj() function to return the ID of the
490      *   string object.
491      */
492     retval_obj(vmg_ str_id);
493 }
494 
495