1 /* $Header: d:/cvsroot/tads/tads3/VMSTR.H,v 1.2 1999/05/17 02:52:28 MJRoberts Exp $ */ 2 3 /* 4 * Copyright (c) 1998, 2002 Michael J. Roberts. All Rights Reserved. 5 * 6 * Please see the accompanying license file, LICENSE.TXT, for information 7 * on using and copying this software. 8 */ 9 /* 10 Name 11 vmstr.h - VM dynamic string implementation 12 Function 13 14 Notes 15 16 Modified 17 10/28/98 MJRoberts - Creation 18 */ 19 20 #ifndef VMSTR_H 21 #define VMSTR_H 22 23 #include "vmglob.h" 24 #include "vmobj.h" 25 26 class CVmObjString: public CVmObject 27 { 28 friend class CVmMetaclassString; 29 30 public: 31 /* metaclass registration object */ 32 static class CVmMetaclass *metaclass_reg_; get_metaclass_reg()33 class CVmMetaclass *get_metaclass_reg() const { return metaclass_reg_; } 34 35 /* am I of the given metaclass? */ is_of_metaclass(class CVmMetaclass * meta)36 virtual int is_of_metaclass(class CVmMetaclass *meta) const 37 { 38 /* try my own metaclass and my base class */ 39 return (meta == metaclass_reg_ 40 || CVmObject::is_of_metaclass(meta)); 41 } 42 43 /* create from stack arguments */ 44 static vm_obj_id_t create_from_stack(VMG_ const uchar **pc_ptr, 45 uint argc); 46 47 /* reserve constant data */ 48 virtual void reserve_const_data(VMG_ class CVmConstMapper *mapper, 49 vm_obj_id_t self); 50 51 /* convert to constant data */ 52 virtual void convert_to_const_data(VMG_ class CVmConstMapper *mapper, 53 vm_obj_id_t self); 54 55 /* get my datatype when converted to constant data */ get_convert_to_const_data_type()56 virtual vm_datatype_t get_convert_to_const_data_type() const 57 { return VM_SSTRING; } 58 59 /* create a string with no initial contents */ 60 static vm_obj_id_t create(VMG_ int in_root_set); 61 62 /* create a string to hold a string of the given byte length */ 63 static vm_obj_id_t create(VMG_ int in_root_set, size_t bytelen); 64 65 /* create from a constant UTF-8 string */ 66 static vm_obj_id_t create(VMG_ int in_root_set, 67 const char *str, size_t bytelen); 68 69 /* 70 * For construction: get a pointer to the string's underlying 71 * buffer. Returns a pointer into which the caller can write. The 72 * buffer starts after the length prefix. 73 */ cons_get_buf()74 char *cons_get_buf() const { return ext_ + 2; } 75 76 /* 77 * For construction: set my length. This can be used if the string 78 * stored is smaller than the buffer allocated. This cannot be used 79 * to expand the buffer, since this merely writes the length prefix 80 * and does not reallocate the buffer. 81 */ cons_set_len(size_t len)82 void cons_set_len(size_t len) { vmb_put_len(ext_, len); } 83 84 /* notify of deletion */ 85 void notify_delete(VMG_ int in_root_set); 86 87 /* set a property */ 88 void set_prop(VMG_ class CVmUndo *undo, 89 vm_obj_id_t self, vm_prop_id_t prop, const vm_val_t *val); 90 91 /* 92 * call a static property - we don't have any of our own, so simply 93 * "inherit" the base class handling 94 */ call_stat_prop(VMG_ vm_val_t * result,const uchar ** pc_ptr,uint * argc,vm_prop_id_t prop)95 static int call_stat_prop(VMG_ vm_val_t *result, 96 const uchar **pc_ptr, uint *argc, 97 vm_prop_id_t prop) 98 { return CVmObject::call_stat_prop(vmg_ result, pc_ptr, argc, prop); } 99 100 /* undo operations - strings are immutable and hence keep no undo */ notify_new_savept()101 void notify_new_savept() { } apply_undo(VMG_ struct CVmUndoRecord *)102 void apply_undo(VMG_ struct CVmUndoRecord *) { }; mark_undo_ref(VMG_ struct CVmUndoRecord *)103 void mark_undo_ref(VMG_ struct CVmUndoRecord *) { } remove_stale_undo_weak_ref(VMG_ struct CVmUndoRecord *)104 void remove_stale_undo_weak_ref(VMG_ struct CVmUndoRecord *) { } 105 106 /* reference operations - strings reference no other objects */ mark_refs(VMG_ uint state)107 void mark_refs(VMG_ uint state) { } remove_stale_weak_refs(VMG0_)108 void remove_stale_weak_refs(VMG0_) { } 109 110 /* load from an image file */ load_from_image(VMG_ vm_obj_id_t,const char * ptr,size_t)111 void load_from_image(VMG_ vm_obj_id_t, const char *ptr, size_t) 112 { ext_ = (char *)ptr; } 113 114 /* rebuild for image file */ 115 virtual ulong rebuild_image(VMG_ char *buf, ulong buflen); 116 117 /* save to a file */ 118 void save_to_file(VMG_ class CVmFile *fp); 119 120 /* restore from a file */ 121 void restore_from_file(VMG_ vm_obj_id_t self, 122 class CVmFile *fp, class CVmObjFixup *fixups); 123 124 /* 125 * add a value to the string -- this creates a new string by 126 * appending the value to this string 127 */ 128 void add_val(VMG_ vm_val_t *result, 129 vm_obj_id_t self, const vm_val_t *val); 130 131 /* 132 * Get a string representation of the object. This is trivial for a 133 * string object - we simply return our extension, which contains 134 * the string in the required format. 135 */ cast_to_string(VMG_ vm_obj_id_t self,vm_val_t * new_str)136 const char *cast_to_string(VMG_ vm_obj_id_t self, 137 vm_val_t *new_str) const 138 { 139 /* we are the string object */ 140 new_str->set_obj(self); 141 142 /* return our extension directly */ 143 return ext_; 144 } 145 146 /* get the underlying string */ get_as_string(VMG0_)147 const char *get_as_string(VMG0_) const { return ext_; } 148 149 /* 150 * Static routine to add a value to a string constant. Creates a 151 * new string by appending the given value to the given string 152 * constant. The string constant must be stored in portable format: 153 * the first two bytes are the length prefix, in UINT2 format, 154 * giving the length of the string's contents not counting the 155 * prefix itself; immediately following the length prefix are the 156 * bytes of the string's contents. 157 */ 158 static void add_to_str(VMG_ vm_val_t *result, 159 vm_obj_id_t self, const char *strval, 160 const vm_val_t *val); 161 162 /* 163 * Check a value for equality. We will match any constant string 164 * that contains the same text as our string, and any other string 165 * object with the same text. 166 */ 167 int equals(VMG_ vm_obj_id_t self, const vm_val_t *val, int depth) const; 168 169 /* 170 * Compare the string to another value. If the other value is a 171 * constant string or string object, we'll perform a lexical 172 * comparison of the string; other types are not comparable to 173 * strings, so we'll throw an error for any other type. 174 */ 175 int compare_to(VMG_ vm_obj_id_t self, const vm_val_t *val) const; 176 177 /* calculate a hash */ 178 uint calc_hash(VMG_ vm_obj_id_t self, int depth) const; 179 180 /* 181 * Convert a value to a string. Throws an error if the value is not 182 * convertible to a string. 183 * 184 * The result is stored in the given buffer, if possible, in 185 * portable string format (with a portable UINT2 length prefix 186 * followed by the string's bytes). If the buffer is not provided 187 * or is not large enough to contain the result, we will allocate a 188 * new string object and return its contents; since the string 189 * object will never be referenced by anyone, it will be deleted in 190 * the next garbage collection pass. In any case, we will return a 191 * pointer to a buffer containing the result string. 192 * 193 * We'll fill in *new_obj with the new string object value, or nil 194 * if we don't create a new string; this allows the caller to 195 * protect the allocated object from garbage collection if 196 * necessary. 197 */ 198 static const char *cvt_to_str(VMG_ vm_val_t *new_obj, 199 char *result_buf, size_t result_buf_size, 200 const vm_val_t *val, int radix); 201 202 /* 203 * Convert an integer to a string, storing the result in the given 204 * buffer in portable string format (with length prefix). The radix 205 * must be 8, 10, or 16. 206 * 207 * Decimal numbers are treated as signed, and a leading dash is 208 * included if the number is negative. Octal and hex numbers are 209 * treated as unsigned. 210 * 211 * For efficiency, we store the number at the end of the buffer 212 * (this makes it easy to generate the number, since we need to 213 * generate numerals in reverse order). We return a pointer to the 214 * result, which may not start at the beginning of the buffer. 215 */ 216 static char *cvt_int_to_str(char *buf, size_t buflen, 217 int32 inval, int radix); 218 219 /* 220 * Allocate a string buffer large enough to hold a given value. 221 * We'll use the provided buffer if possible. 222 * 223 * If the provided buffer is null or is not large enough, we'll 224 * allocate a new string object with a large enough buffer to hold 225 * the value, and return the object's extension as the buffer. 226 * 227 * The buffer size and requested size are in bytes. 228 * 229 * If we allocate a new object, we'll set new_obj to the object 230 * value; otherwise we'll set new_obj to nil. 231 */ 232 static char *alloc_str_buf(VMG_ vm_val_t *new_obj, 233 char *buf, size_t buf_size, 234 size_t required_size); 235 236 /* 237 * Constant string equality test routine. Compares the given 238 * constant string (in portable format, with leading UINT2 length 239 * prefix followed by the string's text in UTF8 format) to the other 240 * value. Returns true if the values are lexically identical, false 241 * if not or if the other value is not a string of some kind. 242 */ 243 static int const_equals(VMG_ const char *str, const vm_val_t *val); 244 245 /* 246 * Constant string hash value calculation 247 */ 248 static uint const_calc_hash(const char *str); 249 250 /* 251 * Constant string magnitude comparison routine. Compares the given 252 * constant string (in portable format) to the other value. Returns 253 * a positive value if the constant string is lexically greater than 254 * the other value, a negative value if the constant string is 255 * lexically less than the other value, or zero if the two values 256 * are identical. Throws an error for any other type of value. 257 */ 258 static int const_compare(VMG_ const char *str, const vm_val_t *val); 259 260 /* 261 * Find a substring within a string. Returns a pointer to to the 262 * start of the substring within the string, or null if the 263 * substring isn't found. If 'idxp' is non-null, we'll fill in 264 * *idxp with the character index, starting at zero for the first 265 * character, of the substring within the string. 266 * 267 * Both strings are in standard constant string format, with UINT2 268 * length prefixes. 269 */ 270 static const char *find_substr(VMG_ const char *str, int start_idx, 271 const char *substr, size_t *idxp); 272 273 /* 274 * Evaluate a property of a constant string value. Returns true if 275 * we successfully evaluated the property, false if the property is 276 * not one of the properties that the string class defines. 277 */ 278 static int const_get_prop(VMG_ vm_val_t *retval, const vm_val_t *self_val, 279 const char *str, vm_prop_id_t prop, 280 vm_obj_id_t *srcobj, uint *argc); 281 282 /* evaluate a property */ 283 virtual int get_prop(VMG_ vm_prop_id_t prop, vm_val_t *val, 284 vm_obj_id_t self, vm_obj_id_t *source_obj, 285 uint *argc); 286 287 /* property evaluator - undefined property */ getp_undef(VMG_ vm_val_t *,const vm_val_t *,const char *,uint *)288 static int getp_undef(VMG_ vm_val_t *, const vm_val_t *, 289 const char *, uint *) 290 { return FALSE; } 291 292 /* property evaluator - get the length */ 293 static int getp_len(VMG_ vm_val_t *retval, const vm_val_t *self_val, 294 const char *str, uint *argc); 295 296 /* property evaluator - extract a substring */ 297 static int getp_substr(VMG_ vm_val_t *retval, const vm_val_t *self_val, 298 const char *str, uint *argc); 299 300 /* property evaluator - toUpper */ 301 static int getp_upper(VMG_ vm_val_t *retval, const vm_val_t *self_val, 302 const char *str, uint *argc); 303 304 /* property evaluator - toLower */ 305 static int getp_lower(VMG_ vm_val_t *retval, const vm_val_t *self_val, 306 const char *str, uint *argc); 307 308 /* property evaluator - find substring */ 309 static int getp_find(VMG_ vm_val_t *retval, const vm_val_t *self_val, 310 const char *str, uint *argc); 311 312 /* property evaluator - convert to unicode */ 313 static int getp_to_uni(VMG_ vm_val_t *retval, const vm_val_t *self_val, 314 const char *str, uint *argc); 315 316 /* property evaluator - htmlify */ 317 static int getp_htmlify(VMG_ vm_val_t *retval, const vm_val_t *self_val, 318 const char *str, uint *argc); 319 320 /* property evaluator - startsWith */ 321 static int getp_starts_with(VMG_ vm_val_t *retval, 322 const vm_val_t *self_val, 323 const char *str, uint *argc); 324 325 /* property evaluator - endsWith */ 326 static int getp_ends_with(VMG_ vm_val_t *retval, 327 const vm_val_t *self_val, 328 const char *str, uint *argc); 329 330 /* property evaluator - mapToByteArray */ 331 static int getp_to_byte_array(VMG_ vm_val_t *retval, 332 const vm_val_t *self_val, 333 const char *str, uint *argc); 334 335 /* property evaluator - replace substring */ 336 static int getp_replace(VMG_ vm_val_t *retval, const vm_val_t *self_val, 337 const char *str, uint *argc); 338 339 protected: 340 /* create a string with no initial contents */ CVmObjString()341 CVmObjString() { ext_ = 0; } 342 343 /* create with a given buffer size in bytes */ 344 CVmObjString(VMG_ size_t bytelen); 345 346 /* create from a constant UTF-8 string */ 347 CVmObjString(VMG_ const char *str, size_t bytelen); 348 349 /* 350 * Set the length of the string. This can be used after a string is 351 * constructed to set the size of the actual stored string. 352 */ set_length(size_t bytelen)353 void set_length(size_t bytelen) { vmb_put_len(ext_, bytelen); } 354 355 /* copy bytes into the string buffer */ copy_into_str(const char * str,size_t bytelen)356 void copy_into_str(const char *str, size_t bytelen) 357 { memcpy(ext_ + VMB_LEN, str, bytelen); } 358 359 /* copy bytes into the string buffer starting at the given byte offset */ copy_into_str(size_t ofs,const char * str,size_t bytelen)360 void copy_into_str(size_t ofs, const char *str, size_t bytelen) 361 { memcpy(ext_ + VMB_LEN + ofs, str, bytelen); } 362 363 /* property evaluation function table */ 364 static int (*func_table_[])(VMG_ vm_val_t *retval, 365 const vm_val_t *self_val, 366 const char *str, uint *argc); 367 }; 368 369 /* ------------------------------------------------------------------------ */ 370 /* 371 * A constant string is exactly like an ordinary string, except that our 372 * contents come from the constant pool. We store a pointer directly to 373 * our constant pool data rather than making a separate copy. The only 374 * thing we have to do differently from an ordinary string is that we don't 375 * delete our extension when we're deleted, since our extension is really 376 * just a pointer into the constant pool. 377 */ 378 class CVmObjStringConst: public CVmObjString 379 { 380 public: 381 /* notify of deletion */ notify_delete(VMG_ int)382 void notify_delete(VMG_ int /*in_root_set*/) 383 { 384 /* 385 * do nothing, since our extension is just a pointer into the 386 * constant pool 387 */ 388 } 389 390 /* create from constant pool data */ 391 static vm_obj_id_t create(VMG_ const char *const_ptr); 392 393 protected: 394 /* construct from constant pool data */ CVmObjStringConst(VMG_ const char * const_ptr)395 CVmObjStringConst(VMG_ const char *const_ptr) 396 { 397 /* point our extension directly to the constant pool data */ 398 ext_ = (char *)const_ptr; 399 } 400 }; 401 402 403 /* ------------------------------------------------------------------------ */ 404 /* 405 * Registration table object 406 */ 407 class CVmMetaclassString: public CVmMetaclass 408 { 409 public: 410 /* get the global name */ get_meta_name()411 const char *get_meta_name() const { return "string/030005"; } 412 413 /* create from image file */ create_for_image_load(VMG_ vm_obj_id_t id)414 void create_for_image_load(VMG_ vm_obj_id_t id) 415 { 416 new (vmg_ id) CVmObjString(); 417 G_obj_table->set_obj_gc_characteristics(id, FALSE, FALSE); 418 } 419 420 /* create from restoring from saved state */ create_for_restore(VMG_ vm_obj_id_t id)421 void create_for_restore(VMG_ vm_obj_id_t id) 422 { 423 new (vmg_ id) CVmObjString(); 424 G_obj_table->set_obj_gc_characteristics(id, FALSE, FALSE); 425 } 426 427 /* create dynamically using stack arguments */ create_from_stack(VMG_ const uchar ** pc_ptr,uint argc)428 vm_obj_id_t create_from_stack(VMG_ const uchar **pc_ptr, uint argc) 429 { return CVmObjString::create_from_stack(vmg_ pc_ptr, argc); } 430 431 /* call a static property */ call_stat_prop(VMG_ vm_val_t * result,const uchar ** pc_ptr,uint * argc,vm_prop_id_t prop)432 int call_stat_prop(VMG_ vm_val_t *result, 433 const uchar **pc_ptr, uint *argc, 434 vm_prop_id_t prop) 435 { 436 return CVmObjString::call_stat_prop(vmg_ result, pc_ptr, argc, prop); 437 } 438 }; 439 440 #endif /* VMSTR_H */ 441 442 /* 443 * Register the class 444 */ 445 VM_REGISTER_METACLASS(CVmObjString) 446 447