1 /* $Header: d:/cvsroot/tads/tads3/VMSTR.H,v 1.2 1999/05/17 02:52:28 MJRoberts Exp $ */
2 
3 /*
4  *   Copyright (c) 1998, 2002 Michael J. Roberts.  All Rights Reserved.
5  *
6  *   Please see the accompanying license file, LICENSE.TXT, for information
7  *   on using and copying this software.
8  */
9 /*
10 Name
11   vmstr.h - VM dynamic string implementation
12 Function
13 
14 Notes
15 
16 Modified
17   10/28/98 MJRoberts  - Creation
18 */
19 
20 #ifndef VMSTR_H
21 #define VMSTR_H
22 
23 #include "vmglob.h"
24 #include "vmobj.h"
25 
26 class CVmObjString: public CVmObject
27 {
28     friend class CVmMetaclassString;
29 
30 public:
31     /* metaclass registration object */
32     static class CVmMetaclass *metaclass_reg_;
get_metaclass_reg()33     class CVmMetaclass *get_metaclass_reg() const { return metaclass_reg_; }
34 
35     /* am I of the given metaclass? */
is_of_metaclass(class CVmMetaclass * meta)36     virtual int is_of_metaclass(class CVmMetaclass *meta) const
37     {
38         /* try my own metaclass and my base class */
39         return (meta == metaclass_reg_
40                 || CVmObject::is_of_metaclass(meta));
41     }
42 
43     /* create from stack arguments */
44     static vm_obj_id_t create_from_stack(VMG_ const uchar **pc_ptr,
45                                          uint argc);
46 
47     /* reserve constant data */
48     virtual void reserve_const_data(VMG_ class CVmConstMapper *mapper,
49                                     vm_obj_id_t self);
50 
51     /* convert to constant data */
52     virtual void convert_to_const_data(VMG_ class CVmConstMapper *mapper,
53                                        vm_obj_id_t self);
54 
55     /* get my datatype when converted to constant data */
get_convert_to_const_data_type()56     virtual vm_datatype_t get_convert_to_const_data_type() const
57       { return VM_SSTRING; }
58 
59     /* create a string with no initial contents */
60     static vm_obj_id_t create(VMG_ int in_root_set);
61 
62     /* create a string to hold a string of the given byte length */
63     static vm_obj_id_t create(VMG_ int in_root_set, size_t bytelen);
64 
65     /* create from a constant UTF-8 string */
66     static vm_obj_id_t create(VMG_ int in_root_set,
67                               const char *str, size_t bytelen);
68 
69     /*
70      *   For construction: get a pointer to the string's underlying
71      *   buffer.  Returns a pointer into which the caller can write.  The
72      *   buffer starts after the length prefix.
73      */
cons_get_buf()74     char *cons_get_buf() const { return ext_ + 2; }
75 
76     /*
77      *   For construction: set my length.  This can be used if the string
78      *   stored is smaller than the buffer allocated.  This cannot be used
79      *   to expand the buffer, since this merely writes the length prefix
80      *   and does not reallocate the buffer.
81      */
cons_set_len(size_t len)82     void cons_set_len(size_t len) { vmb_put_len(ext_, len); }
83 
84     /* notify of deletion */
85     void notify_delete(VMG_ int in_root_set);
86 
87     /* set a property */
88     void set_prop(VMG_ class CVmUndo *undo,
89                   vm_obj_id_t self, vm_prop_id_t prop, const vm_val_t *val);
90 
91     /*
92      *   call a static property - we don't have any of our own, so simply
93      *   "inherit" the base class handling
94      */
call_stat_prop(VMG_ vm_val_t * result,const uchar ** pc_ptr,uint * argc,vm_prop_id_t prop)95     static int call_stat_prop(VMG_ vm_val_t *result,
96                               const uchar **pc_ptr, uint *argc,
97                               vm_prop_id_t prop)
98         { return CVmObject::call_stat_prop(vmg_ result, pc_ptr, argc, prop); }
99 
100     /* undo operations - strings are immutable and hence keep no undo */
notify_new_savept()101     void notify_new_savept() { }
apply_undo(VMG_ struct CVmUndoRecord *)102     void apply_undo(VMG_ struct CVmUndoRecord *) { };
mark_undo_ref(VMG_ struct CVmUndoRecord *)103     void mark_undo_ref(VMG_ struct CVmUndoRecord *) { }
remove_stale_undo_weak_ref(VMG_ struct CVmUndoRecord *)104     void remove_stale_undo_weak_ref(VMG_ struct CVmUndoRecord *) { }
105 
106     /* reference operations - strings reference no other objects */
mark_refs(VMG_ uint state)107     void mark_refs(VMG_ uint state) { }
remove_stale_weak_refs(VMG0_)108     void remove_stale_weak_refs(VMG0_) { }
109 
110     /* load from an image file */
load_from_image(VMG_ vm_obj_id_t,const char * ptr,size_t)111     void load_from_image(VMG_ vm_obj_id_t, const char *ptr, size_t)
112         { ext_ = (char *)ptr; }
113 
114     /* rebuild for image file */
115     virtual ulong rebuild_image(VMG_ char *buf, ulong buflen);
116 
117     /* save to a file */
118     void save_to_file(VMG_ class CVmFile *fp);
119 
120     /* restore from a file */
121     void restore_from_file(VMG_ vm_obj_id_t self,
122                            class CVmFile *fp, class CVmObjFixup *fixups);
123 
124     /*
125      *   add a value to the string -- this creates a new string by
126      *   appending the value to this string
127      */
128     void add_val(VMG_ vm_val_t *result,
129                  vm_obj_id_t self, const vm_val_t *val);
130 
131     /*
132      *   Get a string representation of the object.  This is trivial for a
133      *   string object - we simply return our extension, which contains
134      *   the string in the required format.
135      */
cast_to_string(VMG_ vm_obj_id_t self,vm_val_t * new_str)136     const char *cast_to_string(VMG_ vm_obj_id_t self,
137                                vm_val_t *new_str) const
138     {
139         /* we are the string object */
140         new_str->set_obj(self);
141 
142         /* return our extension directly */
143         return ext_;
144     }
145 
146     /* get the underlying string */
get_as_string(VMG0_)147     const char *get_as_string(VMG0_) const { return ext_; }
148 
149     /*
150      *   Static routine to add a value to a string constant.  Creates a
151      *   new string by appending the given value to the given string
152      *   constant.  The string constant must be stored in portable format:
153      *   the first two bytes are the length prefix, in UINT2 format,
154      *   giving the length of the string's contents not counting the
155      *   prefix itself; immediately following the length prefix are the
156      *   bytes of the string's contents.
157      */
158     static void add_to_str(VMG_ vm_val_t *result,
159                            vm_obj_id_t self, const char *strval,
160                            const vm_val_t *val);
161 
162     /*
163      *   Check a value for equality.  We will match any constant string
164      *   that contains the same text as our string, and any other string
165      *   object with the same text.
166      */
167     int equals(VMG_ vm_obj_id_t self, const vm_val_t *val, int depth) const;
168 
169     /*
170      *   Compare the string to another value.  If the other value is a
171      *   constant string or string object, we'll perform a lexical
172      *   comparison of the string; other types are not comparable to
173      *   strings, so we'll throw an error for any other type.
174      */
175     int compare_to(VMG_ vm_obj_id_t self, const vm_val_t *val) const;
176 
177     /* calculate a hash */
178     uint calc_hash(VMG_ vm_obj_id_t self, int depth) const;
179 
180     /*
181      *   Convert a value to a string.  Throws an error if the value is not
182      *   convertible to a string.
183      *
184      *   The result is stored in the given buffer, if possible, in
185      *   portable string format (with a portable UINT2 length prefix
186      *   followed by the string's bytes).  If the buffer is not provided
187      *   or is not large enough to contain the result, we will allocate a
188      *   new string object and return its contents; since the string
189      *   object will never be referenced by anyone, it will be deleted in
190      *   the next garbage collection pass.  In any case, we will return a
191      *   pointer to a buffer containing the result string.
192      *
193      *   We'll fill in *new_obj with the new string object value, or nil
194      *   if we don't create a new string; this allows the caller to
195      *   protect the allocated object from garbage collection if
196      *   necessary.
197      */
198     static const char *cvt_to_str(VMG_ vm_val_t *new_obj,
199                                   char *result_buf, size_t result_buf_size,
200                                   const vm_val_t *val, int radix);
201 
202     /*
203      *   Convert an integer to a string, storing the result in the given
204      *   buffer in portable string format (with length prefix).  The radix
205      *   must be 8, 10, or 16.
206      *
207      *   Decimal numbers are treated as signed, and a leading dash is
208      *   included if the number is negative.  Octal and hex numbers are
209      *   treated as unsigned.
210      *
211      *   For efficiency, we store the number at the end of the buffer
212      *   (this makes it easy to generate the number, since we need to
213      *   generate numerals in reverse order).  We return a pointer to the
214      *   result, which may not start at the beginning of the buffer.
215      */
216     static char *cvt_int_to_str(char *buf, size_t buflen,
217                                 int32 inval, int radix);
218 
219     /*
220      *   Allocate a string buffer large enough to hold a given value.
221      *   We'll use the provided buffer if possible.
222      *
223      *   If the provided buffer is null or is not large enough, we'll
224      *   allocate a new string object with a large enough buffer to hold
225      *   the value, and return the object's extension as the buffer.
226      *
227      *   The buffer size and requested size are in bytes.
228      *
229      *   If we allocate a new object, we'll set new_obj to the object
230      *   value; otherwise we'll set new_obj to nil.
231      */
232     static char *alloc_str_buf(VMG_ vm_val_t *new_obj,
233                                char *buf, size_t buf_size,
234                                size_t required_size);
235 
236     /*
237      *   Constant string equality test routine.  Compares the given
238      *   constant string (in portable format, with leading UINT2 length
239      *   prefix followed by the string's text in UTF8 format) to the other
240      *   value.  Returns true if the values are lexically identical, false
241      *   if not or if the other value is not a string of some kind.
242      */
243     static int const_equals(VMG_ const char *str, const vm_val_t *val);
244 
245     /*
246      *   Constant string hash value calculation
247      */
248     static uint const_calc_hash(const char *str);
249 
250     /*
251      *   Constant string magnitude comparison routine.  Compares the given
252      *   constant string (in portable format) to the other value.  Returns
253      *   a positive value if the constant string is lexically greater than
254      *   the other value, a negative value if the constant string is
255      *   lexically less than the other value, or zero if the two values
256      *   are identical.  Throws an error for any other type of value.
257      */
258     static int const_compare(VMG_ const char *str, const vm_val_t *val);
259 
260     /*
261      *   Find a substring within a string.  Returns a pointer to to the
262      *   start of the substring within the string, or null if the
263      *   substring isn't found.  If 'idxp' is non-null, we'll fill in
264      *   *idxp with the character index, starting at zero for the first
265      *   character, of the substring within the string.
266      *
267      *   Both strings are in standard constant string format, with UINT2
268      *   length prefixes.
269      */
270     static const char *find_substr(VMG_ const char *str, int start_idx,
271                                    const char *substr, size_t *idxp);
272 
273     /*
274      *   Evaluate a property of a constant string value.  Returns true if
275      *   we successfully evaluated the property, false if the property is
276      *   not one of the properties that the string class defines.
277      */
278     static int const_get_prop(VMG_ vm_val_t *retval, const vm_val_t *self_val,
279                               const char *str, vm_prop_id_t prop,
280                               vm_obj_id_t *srcobj, uint *argc);
281 
282     /* evaluate a property */
283     virtual int get_prop(VMG_ vm_prop_id_t prop, vm_val_t *val,
284                          vm_obj_id_t self, vm_obj_id_t *source_obj,
285                          uint *argc);
286 
287     /* property evaluator - undefined property */
getp_undef(VMG_ vm_val_t *,const vm_val_t *,const char *,uint *)288     static int getp_undef(VMG_ vm_val_t *, const vm_val_t *,
289                           const char *, uint *)
290         { return FALSE; }
291 
292     /* property evaluator - get the length */
293     static int getp_len(VMG_ vm_val_t *retval, const vm_val_t *self_val,
294                         const char *str, uint *argc);
295 
296     /* property evaluator - extract a substring */
297     static int getp_substr(VMG_ vm_val_t *retval, const vm_val_t *self_val,
298                            const char *str, uint *argc);
299 
300     /* property evaluator - toUpper */
301     static int getp_upper(VMG_ vm_val_t *retval, const vm_val_t *self_val,
302                           const char *str, uint *argc);
303 
304     /* property evaluator - toLower */
305     static int getp_lower(VMG_ vm_val_t *retval, const vm_val_t *self_val,
306                           const char *str, uint *argc);
307 
308     /* property evaluator - find substring */
309     static int getp_find(VMG_ vm_val_t *retval, const vm_val_t *self_val,
310                          const char *str, uint *argc);
311 
312     /* property evaluator - convert to unicode */
313     static int getp_to_uni(VMG_ vm_val_t *retval, const vm_val_t *self_val,
314                            const char *str, uint *argc);
315 
316     /* property evaluator - htmlify */
317     static int getp_htmlify(VMG_ vm_val_t *retval, const vm_val_t *self_val,
318                             const char *str, uint *argc);
319 
320     /* property evaluator - startsWith */
321     static int getp_starts_with(VMG_ vm_val_t *retval,
322                                 const vm_val_t *self_val,
323                                 const char *str, uint *argc);
324 
325     /* property evaluator - endsWith */
326     static int getp_ends_with(VMG_ vm_val_t *retval,
327                               const vm_val_t *self_val,
328                               const char *str, uint *argc);
329 
330     /* property evaluator - mapToByteArray */
331     static int getp_to_byte_array(VMG_ vm_val_t *retval,
332                                   const vm_val_t *self_val,
333                                   const char *str, uint *argc);
334 
335     /* property evaluator - replace substring */
336     static int getp_replace(VMG_ vm_val_t *retval, const vm_val_t *self_val,
337                             const char *str, uint *argc);
338 
339 protected:
340     /* create a string with no initial contents */
CVmObjString()341     CVmObjString() { ext_ = 0; }
342 
343     /* create with a given buffer size in bytes */
344     CVmObjString(VMG_ size_t bytelen);
345 
346     /* create from a constant UTF-8 string */
347     CVmObjString(VMG_ const char *str, size_t bytelen);
348 
349     /*
350      *   Set the length of the string.  This can be used after a string is
351      *   constructed to set the size of the actual stored string.
352      */
set_length(size_t bytelen)353     void set_length(size_t bytelen) { vmb_put_len(ext_, bytelen); }
354 
355     /* copy bytes into the string buffer */
copy_into_str(const char * str,size_t bytelen)356     void copy_into_str(const char *str, size_t bytelen)
357         { memcpy(ext_ + VMB_LEN, str, bytelen); }
358 
359     /* copy bytes into the string buffer starting at the given byte offset */
copy_into_str(size_t ofs,const char * str,size_t bytelen)360     void copy_into_str(size_t ofs, const char *str, size_t bytelen)
361         { memcpy(ext_ + VMB_LEN + ofs, str, bytelen); }
362 
363     /* property evaluation function table */
364     static int (*func_table_[])(VMG_ vm_val_t *retval,
365                                 const vm_val_t *self_val,
366                                 const char *str, uint *argc);
367 };
368 
369 /* ------------------------------------------------------------------------ */
370 /*
371  *   A constant string is exactly like an ordinary string, except that our
372  *   contents come from the constant pool.  We store a pointer directly to
373  *   our constant pool data rather than making a separate copy.  The only
374  *   thing we have to do differently from an ordinary string is that we don't
375  *   delete our extension when we're deleted, since our extension is really
376  *   just a pointer into the constant pool.
377  */
378 class CVmObjStringConst: public CVmObjString
379 {
380 public:
381     /* notify of deletion */
notify_delete(VMG_ int)382     void notify_delete(VMG_ int /*in_root_set*/)
383     {
384         /*
385          *   do nothing, since our extension is just a pointer into the
386          *   constant pool
387          */
388     }
389 
390     /* create from constant pool data */
391     static vm_obj_id_t create(VMG_ const char *const_ptr);
392 
393 protected:
394     /* construct from constant pool data */
CVmObjStringConst(VMG_ const char * const_ptr)395     CVmObjStringConst(VMG_ const char *const_ptr)
396     {
397         /* point our extension directly to the constant pool data */
398         ext_ = (char *)const_ptr;
399     }
400 };
401 
402 
403 /* ------------------------------------------------------------------------ */
404 /*
405  *   Registration table object
406  */
407 class CVmMetaclassString: public CVmMetaclass
408 {
409 public:
410     /* get the global name */
get_meta_name()411     const char *get_meta_name() const { return "string/030005"; }
412 
413     /* create from image file */
create_for_image_load(VMG_ vm_obj_id_t id)414     void create_for_image_load(VMG_ vm_obj_id_t id)
415     {
416         new (vmg_ id) CVmObjString();
417         G_obj_table->set_obj_gc_characteristics(id, FALSE, FALSE);
418     }
419 
420     /* create from restoring from saved state */
create_for_restore(VMG_ vm_obj_id_t id)421     void create_for_restore(VMG_ vm_obj_id_t id)
422     {
423         new (vmg_ id) CVmObjString();
424         G_obj_table->set_obj_gc_characteristics(id, FALSE, FALSE);
425     }
426 
427     /* create dynamically using stack arguments */
create_from_stack(VMG_ const uchar ** pc_ptr,uint argc)428     vm_obj_id_t create_from_stack(VMG_ const uchar **pc_ptr, uint argc)
429         { return CVmObjString::create_from_stack(vmg_ pc_ptr, argc); }
430 
431     /* call a static property */
call_stat_prop(VMG_ vm_val_t * result,const uchar ** pc_ptr,uint * argc,vm_prop_id_t prop)432     int call_stat_prop(VMG_ vm_val_t *result,
433                        const uchar **pc_ptr, uint *argc,
434                        vm_prop_id_t prop)
435     {
436         return CVmObjString::call_stat_prop(vmg_ result, pc_ptr, argc, prop);
437     }
438 };
439 
440 #endif /* VMSTR_H */
441 
442 /*
443  *   Register the class
444  */
445 VM_REGISTER_METACLASS(CVmObjString)
446 
447