1 /* $Header$ */
2 
3 /*
4  *   Copyright (c) 2000, 2002 Michael J. Roberts.  All Rights Reserved.
5  *
6  *   Please see the accompanying license file, LICENSE.TXT, for information
7  *   on using and copying this software.
8  */
9 /*
10 Name
11   vmdict.h - VM 'Dictionary' metaclass
12 Function
13 
14 Notes
15 
16 Modified
17   01/24/00 MJRoberts  - Creation
18 */
19 
20 #ifndef VMDICT_H
21 #define VMDICT_H
22 
23 #include <stdlib.h>
24 #include <string.h>
25 
26 #include "t3std.h"
27 #include "vmtype.h"
28 #include "vmglob.h"
29 #include "vmobj.h"
30 #include "vmundo.h"
31 #include "vmhash.h"
32 
33 
34 /* forward-declare the class */
35 class CVmObjDict;
36 
37 /* ------------------------------------------------------------------------ */
38 /*
39  *   undo action codes
40  */
41 enum dict_undo_action
42 {
43     /* we added this word to the dictionary (undo by deleting it) */
44     DICT_UNDO_ADD,
45 
46     /* we deleted this word from the dictionary (undo by adding it back) */
47     DICT_UNDO_DEL,
48 
49     /* we change the comparator object */
50     DICT_UNDO_COMPARATOR
51 };
52 
53 
54 /* ------------------------------------------------------------------------ */
55 /*
56  *   The image file data block is arranged as follows:
57  *
58  *.  UINT4 comparator_object_id
59  *.  UINT2 load_image_entry_count
60  *.  entry 1
61  *.  entry 2
62  *.  ...
63  *.  entry N
64  *
65  *   Each entry has the following structure:
66  *
67  *.  UCHAR key_string_byte_length
68  *.  key_string (UTF-8 characters, not null terminated, XOR'ed with 0xBD)
69  *.  UINT2 number of sub-entries
70  *.  sub-entry 1
71  *.  sub-entry 2
72  *.  etc
73  *
74  *   Each sub-entry is structured like this:
75  *
76  *   UINT4 associated_object_id
77  *.  UINT2 defining_property_id
78  *
79  *   Note that each byte of the key string is XOR'ed with the arbitrary
80  *   byte value 0xBD.  This is simply to provide a minimal level of
81  *   obfuscation in the image file to prevent casual browsing of the image
82  *   contents.
83  */
84 /*
85  *   Separately, we maintain a hash table and entries in the hash table.
86  *   We don't attempt to keep any of the other data in the object in a
87  *   portable internal format, but rather serialize data to a saved state
88  *   file and rebuild the internal format on demand.  We also do not keep
89  *   the hash table in the variable heap mechanism, but simply use the
90  *   standard system heap.  The internal structure of our hash table would
91  *   be too complicated to keep in the variable heap in portable format,
92  *   and furthermore we do not expect dictionary instances to be numerous
93  *   (hence the cost of serializing should be small, since it won't be
94  *   repeated over many objects) or frequently changed (hence keeping
95  *   these in the standard system heap should not cause excessive heap
96  *   fragmentation).
97  */
98 
99 /* comparator object types */
100 enum vm_dict_comp_type
101 {
102     VMDICT_COMP_NONE,                               /* no comparator object */
103     VMDICT_COMP_STRCOMP,                                /* StringComparator */
104     VMDICT_COMP_GENERIC                        /* generic comparator object */
105 };
106 
107 /*
108  *   Dictionary object extension - we use this memory (in the variable
109  *   heap) to keep track of our image data and our hash table.
110  */
111 struct vm_dict_ext
112 {
113     /* pointer to load image data, if any */
114     const char *image_data_;
115     size_t image_data_size_;
116 
117     /* pointer to our hash table (in the system heap) */
118     class CVmHashTable *hashtab_;
119 
120     /* flag: the table has been changed since image load */
121     ulong modified_;
122 
123     /* our comparator object, if any */
124     vm_obj_id_t comparator_;
125     CVmObject *comparator_obj_;
126 
127     /* type of comparator */
128     vm_dict_comp_type comparator_type_;
129 };
130 
131 /* ------------------------------------------------------------------------ */
132 /*
133  *   Dictionary object interface
134  */
135 class CVmObjDict: public CVmObject
136 {
137     friend class CVmMetaclassDict;
138 
139 public:
140     /* metaclass registration object */
141     static class CVmMetaclass *metaclass_reg_;
get_metaclass_reg()142     class CVmMetaclass *get_metaclass_reg() const { return metaclass_reg_; }
143 
144     /* am I of the given metaclass? */
is_of_metaclass(class CVmMetaclass * meta)145     virtual int is_of_metaclass(class CVmMetaclass *meta) const
146     {
147         /* try my own metaclass and my base class */
148         return (meta == metaclass_reg_
149                 || CVmObject::is_of_metaclass(meta));
150     }
151 
152     /* create dynamically using stack arguments */
153     static vm_obj_id_t create_from_stack(VMG_ const uchar **pc_ptr,
154                                          uint argc);
155 
156     /*
157      *   call a static property - we don't have any of our own, so simply
158      *   "inherit" the base class handling
159      */
call_stat_prop(VMG_ vm_val_t * result,const uchar ** pc_ptr,uint * argc,vm_prop_id_t prop)160     static int call_stat_prop(VMG_ vm_val_t *result,
161                               const uchar **pc_ptr, uint *argc,
162                               vm_prop_id_t prop)
163         { return CVmObject::call_stat_prop(vmg_ result, pc_ptr, argc, prop); }
164 
165     /* determine if an object is a Dictionary object */
is_dictionary_obj(VMG_ vm_obj_id_t obj)166     static int is_dictionary_obj(VMG_ vm_obj_id_t obj)
167         { return vm_objp(vmg_ obj)->is_of_metaclass(metaclass_reg_); }
168 
169     /* notify of deletion */
170     void notify_delete(VMG_ int in_root_set);
171 
172     /* get a property */
173     int get_prop(VMG_ vm_prop_id_t prop, vm_val_t *val,
174                  vm_obj_id_t self, vm_obj_id_t *source_obj, uint *argc);
175 
176     /* set a property */
177     void set_prop(VMG_ class CVmUndo *undo,
178                   vm_obj_id_t self, vm_prop_id_t prop, const vm_val_t *val);
179 
180     /* receive notification of a new undo savepoint */
notify_new_savept()181     void notify_new_savept() { }
182 
183     /* apply undo */
184     void apply_undo(VMG_ struct CVmUndoRecord *rec);
185 
186     /* discard additional information associated with an undo record */
187     void discard_undo(VMG_ struct CVmUndoRecord *rec);
188 
189     /* mark a reference in undo */
190     void mark_undo_ref(VMG_ struct CVmUndoRecord *rec);
191 
192     /* remove stale weak references from an undo record */
193     void remove_stale_undo_weak_ref(VMG_ struct CVmUndoRecord *rec);
194 
195     /* mark references */
196     void mark_refs(VMG_ uint);
197 
198     /* remove weak references */
199     void remove_stale_weak_refs(VMG0_);
200 
201     /* load from an image file */
202     void load_from_image(VMG_ vm_obj_id_t, const char *ptr, size_t siz);
203 
204     /* perform post-load initialization */
205     void post_load_init(VMG_ vm_obj_id_t self);
206 
207     /* restore to image file state */
208     void reset_to_image(VMG_ vm_obj_id_t /*self*/);
209 
210     /* determine if the object has been changed since it was loaded */
211     int is_changed_since_load() const;
212 
213     /* save to a file */
214     void save_to_file(VMG_ class CVmFile *fp);
215 
216     /* restore from a file */
217     void restore_from_file(VMG_ vm_obj_id_t self,
218                            class CVmFile *fp, class CVmObjFixup *);
219 
220     /* rebuild for image file */
221     virtual ulong rebuild_image(VMG_ char *buf, ulong buflen);
222 
223     /* convert to constant data */
224     virtual void convert_to_const_data(VMG_ class CVmConstMapper *mapper,
225                                        vm_obj_id_t self);
226 
227     /* enumerate the properties for which a word is defined */
228     void enum_word_props(VMG_ void (*cb_func)(VMG_ void *, vm_prop_id_t,
229                                               const vm_val_t *),
230                          void *cb_ctx, const vm_val_t *strval,
231                          const char *strp, size_t strl);
232 
233     /* get/set my comparator object */
get_comparator()234     vm_obj_id_t get_comparator() const { return get_ext()->comparator_; }
235     void set_comparator(VMG_ vm_obj_id_t obj);
236 
237     /*
238      *   Match a pair of strings.
239      *
240      *   'valstrval' is the vm_val_t with the string value, if one is
241      *   available; if not, this can be given as null and we'll synthesize a
242      *   new string object from 'valstr' and 'vallen' if one is needed.  We
243      *   don't require the caller to synthesize a string object because we
244      *   might not need one; we'll create one only if one is actually needed.
245      *
246      *   'valstr' and 'vallen' directly give the text of the value string,
247      *   and 'refstr' and 'reflen' likewise directly give the text of the
248      *   reference string.
249      */
250     int match_strings(VMG_ const vm_val_t *valstrval,
251                       const char *valstr, size_t vallen,
252                       const char *refstr, size_t reflen,
253                       vm_val_t *result_val);
254 
255     /*
256      *   Calculate the hash value for a string.  As in match_strings(),
257      *   'valstrval' can be passed as null if no vm_val_t for the string text
258      *   is available, in which case we'll synthesize a new string object
259      *   from 'valstr' and 'vallen' if one is needed.
260      */
261     unsigned int calc_hash(VMG_ const vm_val_t *valstrval,
262                            const char *valstr, size_t vallen);
263 
264 protected:
265     CVmObjDict(VMG0_);
266 
267     /* set the comparator type */
268     void set_comparator_type(VMG_ vm_obj_id_t obj);
269 
270     /* create or re-create the hash table */
271     void create_hash_table(VMG0_);
272 
273     /* fill the hash table with entries from the image data */
274     void build_hash_from_image(VMG0_);
275 
276     /* property evaluation - undefined property */
getp_undef(VMG_ vm_obj_id_t,vm_val_t *,uint *)277     int getp_undef(VMG_ vm_obj_id_t, vm_val_t *, uint *) { return FALSE; }
278 
279     /* enumeration callback for getp_find */
280     static void find_cb(void *ctx, class CVmHashEntry *entry);
281 
282     /* enumeration callback for getp_isdef */
283     static void isdef_cb(void *ctx, class CVmHashEntry *entry);
284 
285     /* enumeration callback for enum_word_props */
286     static void enum_word_props_cb(void *ctx, class CVmHashEntry *entry);
287 
288     /* property evaluation - set the comparator object */
289     int getp_set_comparator(VMG_ vm_obj_id_t, vm_val_t *val, uint *argc);
290 
291     /* property evaluation - findWord */
292     int getp_find(VMG_ vm_obj_id_t, vm_val_t *val, uint *argc);
293 
294     /* property evaluation - addWord */
295     int getp_add(VMG_ vm_obj_id_t self, vm_val_t *val, uint *argc);
296 
297     /* service routine for getp_add - add a single string */
298     void getp_add_string(VMG_ vm_obj_id_t self,
299                          const char *str, vm_obj_id_t obj,
300                          vm_prop_id_t voc_prop);
301 
302     /* property evaluation - delWord */
303     int getp_del(VMG_ vm_obj_id_t self, vm_val_t *val, uint *argc);
304 
305     /* service routine for getp_del - remove a single string */
306     void getp_del_string(VMG_ vm_obj_id_t self,
307                          const char *str, vm_obj_id_t obj,
308                          vm_prop_id_t voc_prop);
309 
310     /* property evaluation - isWordDefined */
311     int getp_is_defined(VMG_ vm_obj_id_t self, vm_val_t *val, uint *argc);
312 
313     /* property evaluation - forEachWord */
314     int getp_for_each_word(VMG_ vm_obj_id_t self, vm_val_t *retval,
315                            uint *argc);
316 
317     /* get my extension, properly cast */
get_ext()318     vm_dict_ext *get_ext() const { return (vm_dict_ext *)ext_; }
319 
320     /*
321      *   add an entry; returns true if successful, false if the entry
322      *   already exists, in which case no new entry is made
323      */
324     int add_hash_entry(VMG_ const char *p, size_t len, int copy,
325                        vm_obj_id_t obj, vm_prop_id_t prop, int from_image);
326 
327     /* delete an entry */
328     int del_hash_entry(VMG_ const char *p, size_t len,
329                        vm_obj_id_t obj, vm_prop_id_t prop);
330 
331     /* callback for hash table enumeration - delete stale weak refs */
332     static void remove_weak_ref_cb(void *ctx, class CVmHashEntry *entry);
333 
334     /* callback for hash table enumeration - save file */
335     static void save_file_cb(void *ctx, class CVmHashEntry *entry);
336 
337     /* callback for hash table enumeration - convert to constant data */
338     static void cvt_const_cb(void *ctx, class CVmHashEntry *entry);
339 
340     /* callback for hash table enumeration - rebuild image, phase 1 */
341     static void rebuild_cb_1(void *ctx, class CVmHashEntry *entry);
342 
343     /* callback for hash table enumeration - rebuild image, phase 2 */
344     static void rebuild_cb_2(void *ctx, class CVmHashEntry *entry);
345 
346     /* allocate an undo record */
347     struct dict_undo_rec *alloc_undo_rec(enum dict_undo_action action,
348                                          const char *txt, size_t len);
349 
350     /* add a record to the global undo stream */
351     void add_undo_rec(VMG_ vm_obj_id_t self, struct dict_undo_rec *rec);
352 
353     /* function table */
354     static int (CVmObjDict::*func_table_[])(VMG_ vm_obj_id_t self,
355                                             vm_val_t *retval, uint *argc);
356 };
357 
358 
359 /* ------------------------------------------------------------------------ */
360 /*
361  *   hash table list entry - each hash entry has a list of associated
362  *   objects; this structure represents an entry in one of these lists
363  */
364 struct vm_dict_entry
365 {
vm_dict_entryvm_dict_entry366     vm_dict_entry(vm_obj_id_t obj, vm_prop_id_t prop, int from_image)
367     {
368         obj_ = obj;
369         prop_ = prop;
370         from_image_ = from_image;
371         nxt_ = 0;
372     }
373 
374     /* object ID of this entry */
375     vm_obj_id_t obj_;
376 
377     /* defining property ID */
378     vm_prop_id_t prop_;
379 
380     /* next entry in the list */
381     vm_dict_entry *nxt_;
382 
383     /* flag: entry is from the image file */
384     int from_image_;
385 };
386 
387 /*
388  *   Dictionary object hash table entry
389  */
390 class CVmHashEntryDict: public CVmHashEntryCS
391 {
392 public:
CVmHashEntryDict(const char * str,size_t len,int copy,int from_image)393     CVmHashEntryDict(const char *str, size_t len, int copy, int from_image)
394         : CVmHashEntryCS(str, len, copy || from_image)
395     {
396         /* nothing in our item list yet */
397         list_ = 0;
398     }
399 
~CVmHashEntryDict()400     ~CVmHashEntryDict()
401     {
402         /* delete all of our entries */
403         while (list_ != 0)
404         {
405             vm_dict_entry *nxt;
406 
407             /* note the next entry */
408             nxt = list_->nxt_;
409 
410             /* delete the current entry */
411             delete list_;
412 
413             /* move on to the next entry */
414             list_ = nxt;
415         }
416     }
417 
418     /* get the first entry in our list */
get_head()419     vm_dict_entry *get_head() const { return list_; }
420 
421     /*
422      *   add an entry to our list - returns true if we added the entry,
423      *   false if an entry with the same object ID was already present, in
424      *   which case we will ignore the addition
425      */
add_entry(vm_obj_id_t obj,vm_prop_id_t prop,int from_image)426     int add_entry(vm_obj_id_t obj, vm_prop_id_t prop, int from_image)
427     {
428         vm_dict_entry *entry;
429         vm_dict_entry *prv;
430 
431         /*
432          *   check to see if this entry is already in our list - if it is,
433          *   don't bother adding the redundant definition
434          */
435         for (prv = 0, entry = list_ ; entry != 0 ; entry = entry->nxt_)
436         {
437             /*
438              *   if this entry matches the object ID and property ID,
439              *   ignore the addition
440              */
441             if (entry->obj_ == obj && entry->prop_ == prop)
442                 return FALSE;
443 
444             /*
445              *   if this entry matches the property, remember it as the
446              *   insertion point, so that we keep all definitions for the
447              *   same word with the same property together in the list
448              */
449             if (entry->prop_ == prop)
450                 prv = entry;
451         }
452 
453         /* create a list entry */
454         entry = new vm_dict_entry(obj, prop, from_image);
455 
456         /*
457          *   link it in after the insertion point, or at the head of our
458          *   list if we didn't find any other insertion point
459          */
460         if (prv != 0)
461         {
462             /* we found an insertion point - link it in */
463             entry->nxt_ = prv->nxt_;
464             prv->nxt_ = entry;
465         }
466         else
467         {
468             /* no insertion point - link it at the head of our list */
469             entry->nxt_ = list_;
470             list_ = entry;
471         }
472 
473         /* we added the entry */
474         return TRUE;
475     }
476 
477     /*
478      *   Delete all entries matching a given object ID from our list.
479      *   Returns true if any entries were deleted, false if not.
480      */
del_entry(CVmHashTable * table,vm_obj_id_t obj,vm_prop_id_t prop)481     int del_entry(CVmHashTable *table, vm_obj_id_t obj, vm_prop_id_t prop)
482     {
483         vm_dict_entry *cur;
484         vm_dict_entry *nxt;
485         vm_dict_entry *prv;
486         int found;
487 
488         /* find the entry in our list */
489         for (found = FALSE, prv = 0, cur = list_ ; cur != 0 ;
490              prv = cur, cur = nxt)
491         {
492             /* remember the next entry */
493             nxt = cur->nxt_;
494 
495             /* if this is our entry, delete it */
496             if (cur->obj_ == obj && cur->prop_ == prop)
497             {
498                 /* unlink this entry */
499                 if (prv != 0)
500                     prv->nxt_ = nxt;
501                 else
502                     list_ = nxt;
503 
504                 /* delete this entry */
505                 delete cur;
506 
507                 /* note that we found at least one entry to delete */
508                 found = TRUE;
509             }
510         }
511 
512         /* if our list is now empty, delete myself from the table */
513         if (list_ == 0)
514         {
515             /* remove myself from the table */
516             table->remove(this);
517 
518             /* delete myself */
519             delete this;
520         }
521 
522         /* tell the caller whether we found anything to delete */
523         return found;
524     }
525 
526 protected:
527     /* list of associated objects */
528     vm_dict_entry *list_;
529 };
530 
531 
532 /* ------------------------------------------------------------------------ */
533 /*
534  *   Registration table object
535  */
536 class CVmMetaclassDict: public CVmMetaclass
537 {
538 public:
539     /* get the global name */
get_meta_name()540     const char *get_meta_name() const { return "dictionary2/030000"; }
541 
542     /* create from image file */
create_for_image_load(VMG_ vm_obj_id_t id)543     void create_for_image_load(VMG_ vm_obj_id_t id)
544         { new (vmg_ id) CVmObjDict(vmg0_); }
545 
546     /* create from restoring from saved state */
create_for_restore(VMG_ vm_obj_id_t id)547     void create_for_restore(VMG_ vm_obj_id_t id)
548         { new (vmg_ id) CVmObjDict(vmg0_); }
549 
550     /* create dynamically using stack arguments */
create_from_stack(VMG_ const uchar ** pc_ptr,uint argc)551     vm_obj_id_t create_from_stack(VMG_ const uchar **pc_ptr, uint argc)
552         { return CVmObjDict::create_from_stack(vmg_ pc_ptr, argc); }
553 
554     /* call a static property */
call_stat_prop(VMG_ vm_val_t * result,const uchar ** pc_ptr,uint * argc,vm_prop_id_t prop)555     int call_stat_prop(VMG_ vm_val_t *result,
556                        const uchar **pc_ptr, uint *argc,
557                        vm_prop_id_t prop)
558     {
559         return CVmObjDict::call_stat_prop(vmg_ result, pc_ptr, argc, prop);
560     }
561 };
562 
563 #endif /* VMDICT_H */
564 
565 /*
566  *   Register the class
567  */
568 VM_REGISTER_METACLASS(CVmObjDict)
569