1 #ifdef RCSID
2 static char RCSid[] =
3 "$Header: d:/cvsroot/tads/tads3/VMSTR.CPP,v 1.3 1999/05/17 02:52:28 MJRoberts Exp $";
4 #endif
5
6 /*
7 * Copyright (c) 1998, 2002 Michael J. Roberts. All Rights Reserved.
8 *
9 * Please see the accompanying license file, LICENSE.TXT, for information
10 * on using and copying this software.
11 */
12 /*
13 Name
14 vmstr.cpp - VM string metaclass implementation
15 Function
16
17 Notes
18
19 Modified
20 10/28/98 MJRoberts - Creation
21 */
22
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26
27 #include "t3std.h"
28 #include "vmmcreg.h"
29 #include "vmobj.h"
30 #include "vmstr.h"
31 #include "utf8.h"
32 #include "vmerr.h"
33 #include "vmerrnum.h"
34 #include "vmfile.h"
35 #include "vmstack.h"
36 #include "vmpool.h"
37 #include "vmmeta.h"
38 #include "vmrun.h"
39 #include "vmbif.h"
40 #include "vmpredef.h"
41 #include "vmlst.h"
42 #include "vmuni.h"
43 #include "vmcset.h"
44 #include "vmbytarr.h"
45 #include "charmap.h"
46
47
48 /* ------------------------------------------------------------------------ */
49 /*
50 * statics
51 */
52
53 /* metaclass registration object */
54 static CVmMetaclassString metaclass_reg_obj;
55 CVmMetaclass *CVmObjString::metaclass_reg_ = &metaclass_reg_obj;
56
57 /* function table */
58 int (*CVmObjString::func_table_[])(VMG_ vm_val_t *retval,
59 const vm_val_t *self_val,
60 const char *str, uint *argc) =
61 {
62 &CVmObjString::getp_undef,
63 &CVmObjString::getp_len,
64 &CVmObjString::getp_substr,
65 &CVmObjString::getp_upper,
66 &CVmObjString::getp_lower,
67 &CVmObjString::getp_find,
68 &CVmObjString::getp_to_uni,
69 &CVmObjString::getp_htmlify,
70 &CVmObjString::getp_starts_with,
71 &CVmObjString::getp_ends_with,
72 &CVmObjString::getp_to_byte_array,
73 &CVmObjString::getp_replace
74 };
75
76 /* ------------------------------------------------------------------------ */
77 /*
78 * Static creation methods
79 */
80
81
82 /* create dynamically using stack arguments */
create_from_stack(VMG_ const uchar **,uint)83 vm_obj_id_t CVmObjString::create_from_stack(VMG_ const uchar **, uint)
84 {
85 /* dynamic string construction is not currently supported */
86 err_throw(VMERR_BAD_DYNAMIC_NEW);
87
88 /* the compiler doesn't know we won't make it here */
89 return VM_INVALID_OBJ;
90 }
91
92 /* create a string with no initial contents */
create(VMG_ int in_root_set)93 vm_obj_id_t CVmObjString::create(VMG_ int in_root_set)
94 {
95 vm_obj_id_t id = vm_new_id(vmg_ in_root_set, FALSE, FALSE);
96 new (vmg_ id) CVmObjString();
97 return id;
98 }
99
100 /* create with a given buffer size */
create(VMG_ int in_root_set,size_t byte_size)101 vm_obj_id_t CVmObjString::create(VMG_ int in_root_set, size_t byte_size)
102 {
103 vm_obj_id_t id = vm_new_id(vmg_ in_root_set, FALSE, FALSE);
104 new (vmg_ id) CVmObjString(vmg_ byte_size);
105 return id;
106 }
107
108 /* create from a constant UTF-8 string */
create(VMG_ int in_root_set,const char * str,size_t bytelen)109 vm_obj_id_t CVmObjString::create(VMG_ int in_root_set,
110 const char *str, size_t bytelen)
111 {
112 vm_obj_id_t id = vm_new_id(vmg_ in_root_set, FALSE, FALSE);
113 new (vmg_ id) CVmObjString(vmg_ str, bytelen);
114 return id;
115 }
116
117 /* ------------------------------------------------------------------------ */
118 /*
119 * Constructors
120 */
121
122 /*
123 * create a string object with a given buffer size
124 */
CVmObjString(VMG_ size_t len)125 CVmObjString::CVmObjString(VMG_ size_t len)
126 {
127 /*
128 * the length is limited to an unsigned 16-bit value (NB: it really is
129 * 65535 on ALL PLATFORMS - this is a portable limit imposed by the
130 * portable storage format, not a local platform limit)
131 */
132 if (len > 65535)
133 {
134 ext_ = 0;
135 err_throw(VMERR_STR_TOO_LONG);
136 }
137
138 /*
139 * allocate space for the buffer plus the length prefix in the
140 * variable heap
141 */
142 ext_ = (char *)G_mem->get_var_heap()->alloc_mem(len + VMB_LEN, this);
143
144 /* set the length */
145 vmb_put_len(ext_, len);
146 }
147
148 /*
149 * create a string object from a given UTF8 string constant
150 */
CVmObjString(VMG_ const char * str,size_t len)151 CVmObjString::CVmObjString(VMG_ const char *str, size_t len)
152 {
153 /* check for the length limit */
154 if (len > 65535)
155 {
156 ext_ = 0;
157 err_throw(VMERR_STR_TOO_LONG);
158 }
159
160 /*
161 * allocate space for the string plus the length prefix in the
162 * variable heap
163 */
164 ext_ = (char *)G_mem->get_var_heap()->alloc_mem(len + VMB_LEN, this);
165
166 /*
167 * store the length prefix in portable format (so that we can easily
168 * write our contents to a saved state file)
169 */
170 vmb_put_len(ext_, len);
171
172 /* copy the string's bytes */
173 memcpy(ext_ + VMB_LEN, str, len);
174 }
175
176 /* ------------------------------------------------------------------------ */
177 /*
178 * receive notification of deletion
179 */
notify_delete(VMG_ int in_root_set)180 void CVmObjString::notify_delete(VMG_ int in_root_set)
181 {
182 /* free our extension */
183 if (ext_ != 0 && !in_root_set)
184 G_mem->get_var_heap()->free_mem(ext_);
185 }
186
187 /* ------------------------------------------------------------------------ */
188 /*
189 * Set a property. Strings have no settable properties, so simply
190 * signal an error indicating that the set-prop call is invalid.
191 */
set_prop(VMG_ CVmUndo *,vm_obj_id_t,vm_prop_id_t,const vm_val_t *)192 void CVmObjString::set_prop(VMG_ CVmUndo *, vm_obj_id_t,
193 vm_prop_id_t, const vm_val_t *)
194 {
195 err_throw(VMERR_INVALID_SETPROP);
196 }
197
198 /* ------------------------------------------------------------------------ */
199 /*
200 * Save the object to a file
201 */
save_to_file(VMG_ CVmFile * fp)202 void CVmObjString::save_to_file(VMG_ CVmFile *fp)
203 {
204 size_t len;
205
206 /* get our length */
207 len = vmb_get_len(ext_);
208
209 /* write the length prefix and the string */
210 fp->write_bytes(ext_, len + VMB_LEN);
211 }
212
213 /*
214 * Restore the object from a file
215 */
restore_from_file(VMG_ vm_obj_id_t,CVmFile * fp,CVmObjFixup *)216 void CVmObjString::restore_from_file(VMG_ vm_obj_id_t,
217 CVmFile *fp, CVmObjFixup *)
218 {
219 size_t len;
220
221 /* read the length prefix */
222 len = fp->read_uint2();
223
224 /* free any existing extension */
225 if (ext_ != 0)
226 {
227 G_mem->get_var_heap()->free_mem(ext_);
228 ext_ = 0;
229 }
230
231 /*
232 * allocate our extension - make room for the length prefix plus the
233 * bytes of the string
234 */
235 ext_ = (char *)G_mem->get_var_heap()->alloc_mem(len + VMB_LEN, this);
236
237 /* store our length prefix */
238 vmb_put_len(ext_, len);
239
240 /* read the string */
241 fp->read_bytes(ext_ + VMB_LEN, len);
242 }
243
244 /* ------------------------------------------------------------------------ */
245 /*
246 * Add a value to this string
247 */
add_val(VMG_ vm_val_t * result,vm_obj_id_t self,const vm_val_t * val)248 void CVmObjString::add_val(VMG_ vm_val_t *result,
249 vm_obj_id_t self, const vm_val_t *val)
250 {
251 /*
252 * Use the generic string adder, using my extension as the constant
253 * string. We store our extension in the general string format
254 * required by the static adder.
255 */
256 add_to_str(vmg_ result, self, ext_, val);
257 }
258
259 /*
260 * Static string adder. This creates a new string object that results
261 * from appending the given value to the given string constant. This is
262 * defined statically so that this same code can be shared for adding to
263 * constant pool strings and adding to CVmObjString objects.
264 *
265 * 'strval' must point to a constant string. The first two bytes of the
266 * string are stored in portable UINT2 format and give the length in
267 * bytes of the string, not including the length prefix; immediately
268 * following the length prefix are the bytes of the string.
269 *
270 * Note that we *always* create a new object to hold the result, even if
271 * the new string is identical to the first, so that we consistently
272 * return a distinct reference from the original.
273 */
add_to_str(VMG_ vm_val_t * result,vm_obj_id_t self,const char * strval1,const vm_val_t * val)274 void CVmObjString::add_to_str(VMG_ vm_val_t *result,
275 vm_obj_id_t self, const char *strval1,
276 const vm_val_t *val)
277 {
278 const char *strval2;
279 char buf[128];
280 vm_obj_id_t obj;
281 size_t len1, len2;
282 CVmObjString *objptr;
283 vm_val_t new_obj2;
284
285 /* convert the value to be appended to a string */
286 strval2 = cvt_to_str(vmg_ &new_obj2, buf, sizeof(buf), val, 10);
287
288 /*
289 * push the new string (if any) and self, to protect the two strings
290 * from garbage collection
291 */
292 G_stk->push()->set_obj(self);
293 G_stk->push(&new_obj2);
294
295 /* get the lengths of the two strings */
296 len1 = vmb_get_len(strval1);
297 len2 = vmb_get_len(strval2);
298
299 /* create a new string object to hold the result */
300 obj = create(vmg_ FALSE, len1 + len2);
301 objptr = (CVmObjString *)vm_objp(vmg_ obj);
302
303 /* copy the two strings into the new object's string buffer */
304 objptr->copy_into_str(0, strval1 + VMB_LEN, len1);
305 objptr->copy_into_str(len1, strval2 + VMB_LEN, len2);
306
307 /* we're done with the garbage collection protection */
308 G_stk->discard(2);
309
310 /* return the new object in the result */
311 result->set_obj(obj);
312 }
313
314
315 /* ------------------------------------------------------------------------ */
316 /*
317 * Allocate a string buffer large enough to hold a given value. We'll
318 * use the provided buffer if possible.
319 *
320 * If the provided buffer is null or is not large enough, we'll allocate
321 * a new string object with a large enough buffer to hold the value, and
322 * return the object's extension as the buffer. This object will never
323 * be referenced by anyone, so it will be deleted at the next garbage
324 * collection.
325 *
326 * The buffer size and requested size are in bytes.
327 */
alloc_str_buf(VMG_ vm_val_t * new_obj,char * buf,size_t buf_size,size_t required_size)328 char *CVmObjString::alloc_str_buf(VMG_ vm_val_t *new_obj,
329 char *buf, size_t buf_size,
330 size_t required_size)
331 {
332 vm_obj_id_t obj;
333
334 /* if the provided buffer is large enough, use it */
335 if (buf != 0 && buf_size >= required_size)
336 {
337 /* there's no new object */
338 new_obj->set_nil();
339
340 /* return the buffer */
341 return buf;
342 }
343
344 /* allocate a new string object */
345 obj = create(vmg_ FALSE, required_size);
346
347 /* return the new object's string buffer */
348 return (char *)vm_objp(vmg_ obj)->cast_to_string(vmg_ obj, new_obj);
349 }
350
351 /* ------------------------------------------------------------------------ */
352 /*
353 * Convert a value to a string
354 */
cvt_to_str(VMG_ vm_val_t * new_str,char * result_buf,size_t result_buf_size,const vm_val_t * val,int radix)355 const char *CVmObjString::cvt_to_str(VMG_ vm_val_t *new_str,
356 char *result_buf,
357 size_t result_buf_size,
358 const vm_val_t *val, int radix)
359 {
360 /* presume we won't need to create a new string object */
361 new_str->set_nil();
362
363 /* check the type of the value */
364 switch(val->typ)
365 {
366 case VM_SSTRING:
367 /* it's a string constant - no conversion is necessary */
368 return G_const_pool->get_ptr(val->val.ofs);
369
370 case VM_OBJ:
371 /* it's an object - ask it for its string representation */
372 return vm_objp(vmg_ val->val.obj)
373 ->cast_to_string(vmg_ val->val.obj, new_str);
374 break;
375
376 case VM_INT:
377 /*
378 * It's a number - convert it to a string. Use the provided
379 * result buffer if possible, but make sure we have room for the
380 * number. The unicode values we're storing are in the ascii
381 * range, so we only need one byte per character.
382 */
383 result_buf = alloc_str_buf(vmg_ new_str,
384 result_buf, result_buf_size, 20);
385
386 /* generate the string */
387 return cvt_int_to_str(result_buf, 20, val->val.intval, radix);
388
389 case VM_NIL:
390 /* nil - use the literal string "nil" */
391 return "\003\000nil";
392 break;
393
394 case VM_TRUE:
395 /* true - use the literal string "true" */
396 return "\004\000true";
397 break;
398
399 default:
400 /* other types cannot be added to a string */
401 err_throw(VMERR_NO_STR_CONV);
402
403 /* we never really get here, but the compiler doesn't know that */
404 return 0;
405 }
406 }
407
408 /* ------------------------------------------------------------------------ */
409 /*
410 * Convert an integer to a string, storing the result in the given
411 * buffer in portable string format (with length prefix). The radix
412 * must be 8, 10, or 16.
413 *
414 * Decimal numbers are treated as signed, and a leading dash is included
415 * if the number is negative. Octal and hex numbers are treated as
416 * unsigned.
417 *
418 * For efficiency, we store the number at the end of the buffer (this
419 * makes it easy to generate the number, since we need to generate
420 * numerals in reverse order). We return a pointer to the result, which
421 * may not start at the beginning of the buffer.
422 */
cvt_int_to_str(char * buf,size_t buflen,int32 inval,int radix)423 char *CVmObjString::cvt_int_to_str(char *buf, size_t buflen,
424 int32 inval, int radix)
425 {
426 int neg;
427 uint32 val;
428 char *p;
429 size_t len;
430
431 /* start at the end of the buffer */
432 p = buf + buflen;
433
434 /*
435 * if it's negative, and we're converting to decimal representation,
436 * treat the value as signed and use a leading minus sign;
437 * otherwise, treat the value as unsigned
438 */
439 if (radix == 10 && inval < 0)
440 {
441 /* note that we need a minus sign */
442 neg = TRUE;
443
444 /* use the positive value for the conversion */
445 val = (uint32)(-inval);
446 }
447 else
448 {
449 /* the value is positive (or at least unsigned) */
450 neg = FALSE;
451
452 /* use the value as-is */
453 val = (uint32)inval;
454 }
455
456 /* store numerals in reverse order */
457 do
458 {
459 char c;
460
461 /* if we have no more room, throw an error */
462 if (p == buf)
463 err_throw(VMERR_CONV_BUF_OVF);
464
465 /* move on to the next available character in the buffer */
466 --p;
467
468 /* figure the character representation of this numeral */
469 c = (char)(val % radix);
470 if (c < 10)
471 c += '0';
472 else
473 c += 'A' - 10;
474
475 /* store the numeral at the current location */
476 *p = c;
477
478 /* divide the remaining number by the radix */
479 val /= radix;
480 } while (val != 0);
481
482 /* store the leading minus sign if necessary */
483 if (neg)
484 {
485 /* if we don't have room, throw an error */
486 if (p == buf)
487 err_throw(VMERR_CONV_BUF_OVF);
488
489 /* move to the next byte */
490 --p;
491
492 /* store the minus sign */
493 *p = '-';
494 }
495
496 /* calculate the length */
497 len = buflen - (p - buf);
498
499 /* make sure we have room for the length prefix */
500 if (p < buf + 2)
501 err_throw(VMERR_CONV_BUF_OVF);
502
503 /* store the length prefix */
504 p -= 2;
505 vmb_put_len(p, len);
506
507 /* return the pointer to the start of the number */
508 return p;
509 }
510
511 /* ------------------------------------------------------------------------ */
512 /*
513 * Check a value for equality
514 */
equals(VMG_ vm_obj_id_t self,const vm_val_t * val,int) const515 int CVmObjString::equals(VMG_ vm_obj_id_t self,
516 const vm_val_t *val, int /*depth*/) const
517 {
518 /* if the other value is a reference to myself, we certainly match */
519 if (val->typ == VM_OBJ && val->val.obj == self)
520 return TRUE;
521
522 /*
523 * use the constant string comparison routine, using our underlying
524 * string as the constant string data
525 */
526 return const_equals(vmg_ ext_, val);
527 }
528
529 /*
530 * Constant string equality test
531 */
const_equals(VMG_ const char * str,const vm_val_t * val)532 int CVmObjString::const_equals(VMG_ const char *str, const vm_val_t *val)
533 {
534 const char *str2;
535 size_t len;
536
537 /* get the other value as a string */
538 str2 = val->get_as_string(vmg0_);
539
540 /* if the object doesn't have an underlying string, we don't match */
541 if (str2 == 0)
542 return FALSE;
543
544 /*
545 * if their lengths match, and the bytes match exactly, we have a
546 * match; otherwise, they're not equal
547 */
548 len = vmb_get_len(str);
549 return (len == vmb_get_len(str2)
550 && memcmp(str + VMB_LEN, str2 + VMB_LEN, len) == 0);
551 }
552
553 /* ------------------------------------------------------------------------ */
554 /*
555 * Hash value
556 */
calc_hash(VMG_ vm_obj_id_t self,int) const557 uint CVmObjString::calc_hash(VMG_ vm_obj_id_t self, int /*depth*/) const
558 {
559 return const_calc_hash(ext_);
560 }
561
562 /*
563 * Hash value calculation
564 */
const_calc_hash(const char * str)565 uint CVmObjString::const_calc_hash(const char *str)
566 {
567 size_t len;
568 uint hash;
569 utf8_ptr p;
570
571 /* get and skip the length prefix */
572 len = vmb_get_len(str);
573 str += VMB_LEN;
574
575 /* scan the string and calculate the hash */
576 for (p.set((char *)str), hash = 0 ; len != 0 ; p.inc(&len))
577 hash += p.getch();
578
579 /* return the result */
580 return hash;
581 }
582
583
584 /* ------------------------------------------------------------------------ */
585 /*
586 * Compare this string to another value
587 */
compare_to(VMG_ vm_obj_id_t,const vm_val_t * val) const588 int CVmObjString::compare_to(VMG_ vm_obj_id_t /*self*/,
589 const vm_val_t *val) const
590 {
591 /* use the static string magnitude comparison routine */
592 return const_compare(vmg_ ext_, val);
593 }
594
595 /*
596 * Compare a constant string value to another value. Returns a positive
597 * number if the constant string is lexically greater than the other
598 * value, a negative number if the constant string is lexically less
599 * than the other value, or zero if the constant string is lexically
600 * identical to the other value.
601 *
602 * The other value must be a string constant or an object with an
603 * underlying string value. We'll throw an error for any other type of
604 * value.
605 */
const_compare(VMG_ const char * str1,const vm_val_t * val)606 int CVmObjString::const_compare(VMG_ const char *str1, const vm_val_t *val)
607 {
608 const char *str2;
609 size_t len1, len2;
610
611 /* get the other value as a string */
612 str2 = val->get_as_string(vmg0_);
613
614 /* if it's not a string, we can't compare it */
615 if (str2 == 0)
616 err_throw(VMERR_INVALID_COMPARISON);
617
618 /* get the lengths of the two strings */
619 len1 = vmb_get_len(str1);
620 len2 = vmb_get_len(str2);
621
622 /* perform a lexical comparison and return the result */
623 return utf8_ptr::s_compare_to(str1 + VMB_LEN, len1, str2 + VMB_LEN, len2);
624 }
625
626 /* ------------------------------------------------------------------------ */
627 /*
628 * Find a substring within a string
629 */
find_substr(VMG_ const char * str,int start_idx,const char * substr,size_t * idxp)630 const char *CVmObjString::find_substr(VMG_ const char *str, int start_idx,
631 const char *substr, size_t *idxp)
632 {
633 utf8_ptr p;
634 size_t rem;
635 size_t sublen;
636 size_t char_ofs;
637 int i;
638
639 /* get the lengths */
640 rem = vmb_get_len(str);
641 sublen = vmb_get_len(substr);
642
643 /* set up utf8 pointer into the string */
644 p.set((char *)str + 2);
645
646 /* skip to the starting index */
647 for (i = start_idx ; i > 0 && rem >= sublen ; --i, p.inc(&rem)) ;
648
649 /* scan for the substring */
650 for (char_ofs = 0 ; rem != 0 && rem >= sublen ; ++char_ofs, p.inc(&rem))
651 {
652 /* check for a match */
653 if (memcmp(p.getptr(), substr + VMB_LEN, sublen) == 0)
654 {
655 /* it's a match - set the return index if they are interested */
656 if (idxp != 0)
657 *idxp = char_ofs + start_idx;
658
659 /* return the current pointer */
660 return p.getptr();
661 }
662 }
663
664 /* we didn't find it - so indicate by returning null */
665 return 0;
666 }
667
668 /* ------------------------------------------------------------------------ */
669 /*
670 * Evaluate a property
671 */
get_prop(VMG_ vm_prop_id_t prop,vm_val_t * retval,vm_obj_id_t self,vm_obj_id_t * source_obj,uint * argc)672 int CVmObjString::get_prop(VMG_ vm_prop_id_t prop, vm_val_t *retval,
673 vm_obj_id_t self, vm_obj_id_t *source_obj,
674 uint *argc)
675 {
676 vm_val_t self_val;
677
678 /* use the constant evaluator */
679 self_val.set_obj(self);
680 if (const_get_prop(vmg_ retval, &self_val, ext_, prop, source_obj, argc))
681 {
682 *source_obj = metaclass_reg_->get_class_obj(vmg0_);
683 return TRUE;
684 }
685
686 /* inherit default handling from the base object class */
687 return CVmObject::get_prop(vmg_ prop, retval, self, source_obj, argc);
688 }
689
690 /* ------------------------------------------------------------------------ */
691 /*
692 * Evaluate a property of a constant string value
693 */
const_get_prop(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,vm_prop_id_t prop,vm_obj_id_t * src_obj,uint * argc)694 int CVmObjString::const_get_prop(VMG_ vm_val_t *retval,
695 const vm_val_t *self_val, const char *str,
696 vm_prop_id_t prop, vm_obj_id_t *src_obj,
697 uint *argc)
698 {
699 ushort func_idx;
700
701 /* presume no source object */
702 *src_obj = VM_INVALID_OBJ;
703
704 /* translate the property index to an index into our function table */
705 func_idx = G_meta_table
706 ->prop_to_vector_idx(metaclass_reg_->get_reg_idx(), prop);
707
708 /* call the appropriate function */
709 if ((*func_table_[func_idx])(vmg_ retval, self_val, str, argc))
710 return TRUE;
711
712 /*
713 * If this is a constant string (which is indicated by an invalid
714 * 'self' object ID), try inheriting the default object
715 * interpretation, passing the constant string placeholder object
716 * for its type information.
717 */
718 if (self_val->typ != VM_OBJ)
719 {
720 /* try going to CVmObject directly */
721 if (vm_objp(vmg_ G_predef->const_str_obj)
722 ->CVmObject::get_prop(vmg_ prop, retval, G_predef->const_str_obj,
723 src_obj, argc))
724 return TRUE;
725 }
726
727 /* not handled */
728 return FALSE;
729 }
730
731 /* ------------------------------------------------------------------------ */
732 /*
733 * property evaluator - get the length
734 */
getp_len(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * argc)735 int CVmObjString::getp_len(VMG_ vm_val_t *retval, const vm_val_t *self_val,
736 const char *str, uint *argc)
737 {
738 utf8_ptr p;
739 static CVmNativeCodeDesc desc(0);
740
741 /* check arguments */
742 if (get_prop_check_argc(retval, argc, &desc))
743 return TRUE;
744
745 /* set up a utf-8 pointer to the string's contents */
746 p.set((char *)str + VMB_LEN);
747
748 /* return the character length of the string */
749 retval->set_int(p.len(vmb_get_len(str)));
750
751 /* handled */
752 return TRUE;
753 }
754
755 /* ------------------------------------------------------------------------ */
756 /*
757 * property evaluator - extract a substring
758 */
getp_substr(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * in_argc)759 int CVmObjString::getp_substr(VMG_ vm_val_t *retval, const vm_val_t *self_val,
760 const char *str, uint *in_argc)
761 {
762 long start;
763 ulong len;
764 size_t rem;
765 utf8_ptr p;
766 utf8_ptr start_p;
767 size_t start_rem;
768 size_t new_len;
769 vm_obj_id_t obj;
770 uint argc = (in_argc == 0 ? 0 : *in_argc);
771 static CVmNativeCodeDesc desc(1, 1);
772
773 /* check arguments */
774 if (get_prop_check_argc(retval, in_argc, &desc))
775 return TRUE;
776
777 /* pop the starting index */
778 start = CVmBif::pop_long_val(vmg0_);
779
780 /* pop the length, if present */
781 if (argc >= 2)
782 len = CVmBif::pop_long_val(vmg0_);
783
784 /* push a self-reference to protect against GC */
785 G_stk->push(self_val);
786
787 /* set up a utf8 pointer to traverse the string */
788 p.set((char *)str + VMB_LEN);
789
790 /* get the byte length of the string */
791 rem = vmb_get_len(str);
792
793 /*
794 * Skip ahead to the starting index. If the index is positive, it's
795 * an index from the start of the string; if it's negative, it's an
796 * offset from the end of the string.
797 */
798 if (start > 0)
799 {
800 /*
801 * it's an index from the start - skip ahead by start-1 characters
802 * (since a start value of 1 tells us to start at the first
803 * character)
804 */
805 for ( ; start > 1 && rem != 0 ; --start)
806 p.inc(&rem);
807 }
808 else if (start < 0)
809 {
810 /*
811 * It's an index from the end of the string: -1 tells us to start
812 * at the last character, -2 at the second to last, and so on.
813 * Move to the first byte past the end of the string, and work
814 * backwards by the given number of characters.
815 */
816 for (p.set((char *)str + VMB_LEN + rem), rem = 0 ;
817 start < 0 && p.getptr() != (char *)str + VMB_LEN ; ++start)
818 {
819 /* move back one character */
820 p.dec(&rem);
821 }
822 }
823
824 /* this is the starting position */
825 start_p = p;
826 start_rem = rem;
827
828 /*
829 * if a length was specified, calculate the number of bytes in the
830 * given length; otherwise, use the entire remainder of the string
831 */
832 if (argc >= 2)
833 {
834 /* keep skipping ahead by the desired length */
835 for ( ; len > 0 && rem != 0 ; --len)
836 p.inc(&rem);
837
838 /* use the difference in lengths from the starting point to here */
839 new_len = start_rem - rem;
840 }
841 else
842 {
843 /* use the entire remainder of the string */
844 new_len = start_rem;
845 }
846
847 /* create the new string */
848 obj = CVmObjString::create(vmg_ FALSE, start_p.getptr(), new_len);
849
850 /* return the new object */
851 retval->set_obj(obj);
852
853 /* discard the GC protection references */
854 G_stk->discard();
855
856 /* handled */
857 return TRUE;
858 }
859
860 /* ------------------------------------------------------------------------ */
861 /*
862 * property evaluator - toUpper
863 */
getp_upper(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * argc)864 int CVmObjString::getp_upper(VMG_ vm_val_t *retval, const vm_val_t *self_val,
865 const char *str, uint *argc)
866 {
867 size_t srclen;
868 size_t dstlen;
869 size_t rem;
870 utf8_ptr srcp;
871 utf8_ptr dstp;
872 vm_obj_id_t result_obj;
873 static CVmNativeCodeDesc desc(0);
874
875 /* check arguments */
876 if (get_prop_check_argc(retval, argc, &desc))
877 return TRUE;
878
879 /* get my length */
880 srclen = vmb_get_len(str);
881
882 /* leave the string on the stack as GC protection */
883 G_stk->push(self_val);
884
885 /*
886 * Scan the string to determine how long the result will be. The
887 * result won't necessarily be the same length as the original,
888 * because a two-byte character in the original could turn into a
889 * three-byte character in the result, and vice versa. (We could
890 * allocate a result buffer three times the length of the original,
891 * but this seems more wasteful of space than scanning the string
892 * twice is wasteful of time. It's a trade-off, though.)
893 */
894 for (dstlen = 0, srcp.set((char *)str + VMB_LEN), rem = srclen ;
895 rem != 0 ; srcp.inc(&rem))
896 {
897 /* get the size of the mapping for this character */
898 dstlen += utf8_ptr::s_wchar_size(t3_to_upper(srcp.getch()));
899 }
900
901 /* allocate the result string */
902 result_obj = CVmObjString::create(vmg_ FALSE, dstlen);
903
904 /* get a pointer to the result buffer */
905 dstp.set(((CVmObjString *)vm_objp(vmg_ result_obj))->cons_get_buf());
906
907 /* write the string */
908 for (srcp.set((char *)str + VMB_LEN), rem = srclen ;
909 rem != 0 ; srcp.inc(&rem))
910 {
911 /* write the next character */
912 dstp.setch(t3_to_upper(srcp.getch()));
913 }
914
915 /* return the value */
916 retval->set_obj(result_obj);
917
918 /* discard GC protection */
919 G_stk->discard();
920
921 /* handled */
922 return TRUE;
923 }
924
925 /* ------------------------------------------------------------------------ */
926 /*
927 * property evaluator - toLower
928 */
getp_lower(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * argc)929 int CVmObjString::getp_lower(VMG_ vm_val_t *retval, const vm_val_t *self_val,
930 const char *str, uint *argc)
931 {
932 size_t srclen;
933 size_t dstlen;
934 size_t rem;
935 utf8_ptr srcp;
936 utf8_ptr dstp;
937 vm_obj_id_t result_obj;
938 static CVmNativeCodeDesc desc(0);
939
940 /* check arguments */
941 if (get_prop_check_argc(retval, argc, &desc))
942 return TRUE;
943
944 /* get my length */
945 srclen = vmb_get_len(str);
946
947 /* leave the string on the stack as GC protection */
948 G_stk->push(self_val);
949
950 /*
951 * Scan the string to determine how long the result will be. The
952 * result won't necessarily be the same length as the original,
953 * because a two-byte character in the original could turn into a
954 * three-byte character in the result, and vice versa. (We could
955 * allocate a result buffer three times the length of the original,
956 * but this seems more wasteful of space than scanning the string
957 * twice is wasteful of time. It's a trade-off, though.)
958 */
959 for (dstlen = 0, srcp.set((char *)str + VMB_LEN), rem = srclen ;
960 rem != 0 ; srcp.inc(&rem))
961 {
962 /* get the size of the mapping for this character */
963 dstlen += utf8_ptr::s_wchar_size(t3_to_lower(srcp.getch()));
964 }
965
966 /* allocate the result string */
967 result_obj = CVmObjString::create(vmg_ FALSE, dstlen);
968
969 /* get a pointer to the result buffer */
970 dstp.set(((CVmObjString *)vm_objp(vmg_ result_obj))->cons_get_buf());
971
972 /* write the string */
973 for (srcp.set((char *)str + VMB_LEN), rem = srclen ;
974 rem != 0 ; srcp.inc(&rem))
975 {
976 /* write the next character */
977 dstp.setch(t3_to_lower(srcp.getch()));
978 }
979
980 /* return the value */
981 retval->set_obj(result_obj);
982
983 /* discard GC protection */
984 G_stk->discard();
985
986 /* handled */
987 return TRUE;
988 }
989
990 /* ------------------------------------------------------------------------ */
991 /*
992 * property evaluator - find
993 */
getp_find(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * argc)994 int CVmObjString::getp_find(VMG_ vm_val_t *retval, const vm_val_t *self_val,
995 const char *str, uint *argc)
996 {
997 const char *str2;
998 size_t idx;
999 uint orig_argc = (argc != 0 ? *argc : 0);
1000 static CVmNativeCodeDesc desc(1, 1);
1001 int start_idx;
1002
1003 /* check arguments */
1004 if (get_prop_check_argc(retval, argc, &desc))
1005 return TRUE;
1006
1007 /* retrieve the string to find */
1008 str2 = CVmBif::pop_str_val(vmg0_);
1009
1010 /* if there's a starting index, retrieve it */
1011 start_idx = (orig_argc >= 2 ? CVmBif::pop_int_val(vmg0_) - 1 : 0);
1012
1013 /* find the substring */
1014 if (find_substr(vmg_ str, start_idx, str2, &idx) != 0)
1015 {
1016 /* we found it - adjust to a 1-based value for return */
1017 retval->set_int(idx + 1);
1018 }
1019 else
1020 {
1021 /* didn't find it - return nil */
1022 retval->set_nil();
1023 }
1024
1025 /* handled */
1026 return TRUE;
1027 }
1028
1029 /* ------------------------------------------------------------------------ */
1030 /*
1031 * replace flags
1032 */
1033 #define GETP_RPL_ALL 0x0001
1034
1035 /*
1036 * property evaluator - replace
1037 */
getp_replace(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * argc)1038 int CVmObjString::getp_replace(VMG_ vm_val_t *retval,
1039 const vm_val_t *self_val,
1040 const char *str, uint *argc)
1041 {
1042 vm_val_t arg1;
1043 vm_val_t arg2;
1044 const char *substr;
1045 const char *rplstr;
1046 size_t sublen;
1047 size_t rpllen;
1048 uint orig_argc = (argc != 0 ? *argc : 0);
1049 static CVmNativeCodeDesc desc(3, 1);
1050 int flags;
1051 utf8_ptr p;
1052 size_t rem;
1053 size_t new_len;
1054 int found;
1055 int start_idx;
1056 const char *rpl_start;
1057
1058 /* check arguments */
1059 if (get_prop_check_argc(retval, argc, &desc))
1060 return TRUE;
1061
1062 /*
1063 * make copies of the string references, so we can put them back on the
1064 * stack as gc protection while we're working
1065 */
1066 arg1 = *G_stk->get(0);
1067 arg2 = *G_stk->get(1);
1068
1069 /* retrieve the search and replacement substrings */
1070 substr = CVmBif::pop_str_val(vmg0_);
1071 rplstr = CVmBif::pop_str_val(vmg0_);
1072
1073 /* note the string lengths */
1074 sublen = vmb_get_len(substr);
1075 rpllen = vmb_get_len(rplstr);
1076
1077 /* get the flags */
1078 flags = CVmBif::pop_int_val(vmg0_);
1079
1080 /* if there's a starting index, retrieve it */
1081 start_idx = (orig_argc >= 4 ? CVmBif::pop_int_val(vmg0_) - 1 : 0);
1082
1083 /* put the string references back on the stack for gc protection */
1084 G_stk->push(&arg1);
1085 G_stk->push(&arg2);
1086
1087 /* start at the beginning of the string to search */
1088 rem = new_len = vmb_get_len(str);
1089 p.set((char *)str + 2);
1090
1091 /* skip ahead to the starting index */
1092 for ( ; start_idx > 0 && rem >= sublen ; --start_idx, p.inc(&rem)) ;
1093
1094 /*
1095 * note the starting index for replacements - we don't want to replace
1096 * anything before this point
1097 */
1098 rpl_start = p.getptr();
1099
1100 /*
1101 * Scan for instances of the substring, so we can figure out how big
1102 * the result string will be. Don't actually do any replacements yet;
1103 * we'll scan again once we know how the result size.
1104 */
1105 for (found = FALSE ; rem >= sublen ; )
1106 {
1107 /* if this is a match for the substring, note it */
1108 if (memcmp(p.getptr(), substr + VMB_LEN, vmb_get_len(substr)) == 0)
1109 {
1110 /* note the find */
1111 found = TRUE;
1112
1113 /* it's a match - adjust the result length for the replacement */
1114 new_len += rpllen - sublen;
1115
1116 /* if we're replacing one instance only, look no further */
1117 if ((flags & GETP_RPL_ALL) == 0)
1118 break;
1119
1120 /* skip the entire substring in the source */
1121 p.set(p.getptr() + sublen);
1122 rem -= sublen;
1123 }
1124 else
1125 {
1126 /* skip one character */
1127 p.inc(&rem);
1128 }
1129 }
1130
1131 /*
1132 * if we found no instances of the search substring, the result is
1133 * simply the source string; otherwise, we must create a new string
1134 * with the substitution(s)
1135 */
1136 if (found)
1137 {
1138 utf8_ptr dst;
1139
1140 /* allocate the new string */
1141 retval->set_obj(create(vmg_ FALSE, new_len));
1142
1143 /* get a pointer to the buffer */
1144 dst.set(((CVmObjString *)vm_objp(vmg_ retval->val.obj))
1145 ->cons_get_buf());
1146
1147 /* scan the string for replacements */
1148 for (p.set((char *)str + 2), rem = vmb_get_len(str) ;
1149 rem >= sublen ; )
1150 {
1151 /*
1152 * If this is a match for the substring, and we've reached the
1153 * starting point for replacements, replace the substring.
1154 */
1155 if (p.getptr() >= rpl_start
1156 && memcmp(p.getptr(), substr + VMB_LEN, sublen) == 0)
1157 {
1158 /* it's a match - copy the replacement into the result */
1159 memcpy(dst.getptr(), rplstr + VMB_LEN, rpllen);
1160
1161 /* move past the replacement in the result */
1162 dst.set(dst.getptr() + rpllen);
1163
1164 /* move past the search substring in the source */
1165 p.set(p.getptr() + sublen);
1166 rem -= sublen;
1167
1168 /* if we're replacing one instance only, look no further */
1169 if ((flags & GETP_RPL_ALL) == 0)
1170 break;
1171 }
1172 else
1173 {
1174 /* copy the current character to the result */
1175 dst.setch(p.getch());
1176
1177 /* skip the current character of input */
1178 p.inc(&rem);
1179 }
1180 }
1181
1182 /* copy the remaining source into the result */
1183 if (rem != 0)
1184 memcpy(dst.getptr(), p.getptr(), rem);
1185 }
1186 else
1187 {
1188 /* we didn't find it - the result is simply the original string */
1189 *retval = *self_val;
1190 }
1191
1192 /* discard the gc protection */
1193 G_stk->discard(2);
1194
1195 /* handled */
1196 return TRUE;
1197 }
1198
1199 /* ------------------------------------------------------------------------ */
1200 /*
1201 * property evaluator - convert to unicode
1202 */
getp_to_uni(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * in_argc)1203 int CVmObjString::getp_to_uni(VMG_ vm_val_t *retval,
1204 const vm_val_t *self_val,
1205 const char *str, uint *in_argc)
1206 {
1207 uint argc = (in_argc != 0 ? *in_argc : 0);
1208 size_t bytelen;
1209 ulong idx;
1210 utf8_ptr p;
1211 static CVmNativeCodeDesc desc(0, 1);
1212
1213 /* check arguments */
1214 if (get_prop_check_argc(retval, in_argc, &desc))
1215 return TRUE;
1216
1217 /* retrieve the index argument if present */
1218 if (argc >= 1)
1219 idx = CVmBif::pop_long_val(vmg0_);
1220
1221 /* push a self-reference as GC protection */
1222 G_stk->push(self_val);
1223
1224 /* get and skip the string's length prefix */
1225 bytelen = vmb_get_len(str);
1226 str += VMB_LEN;
1227
1228 /* set up a utf8 pointer to the string */
1229 p.set((char *)str);
1230
1231 /* check for an index argument */
1232 if (argc >= 1)
1233 {
1234 /* skip through the string until we get to the desired index */
1235 for ( ; idx > 1 && bytelen != 0 ; --idx, p.inc(&bytelen)) ;
1236
1237 /* check to see if we have a character available */
1238 if (idx == 1 && bytelen != 0)
1239 {
1240 /* the index is valid - return the character here */
1241 retval->set_int((long)p.getch());
1242 }
1243 else
1244 {
1245 /*
1246 * the index is past the end of the string or is less than 1
1247 * - return nil to indicate that there's no character here
1248 */
1249 retval->set_nil();
1250 }
1251 }
1252 else
1253 {
1254 size_t charlen;
1255 vm_obj_id_t lst_obj;
1256 CVmObjList *lst;
1257 size_t i;
1258
1259 /*
1260 * There's no index argument - they want a list of all of the
1261 * code points in the string. First, get the number of
1262 * characters in the string.
1263 */
1264 charlen = p.len(bytelen);
1265
1266 /* create a list to hold the results */
1267 lst_obj = CVmObjList::create(vmg_ FALSE, charlen);
1268 lst = (CVmObjList *)vm_objp(vmg_ lst_obj);
1269
1270 /* set the list's elements to the unicode characters values */
1271 for (i = 0 ; i < charlen ; ++i, p.inc())
1272 {
1273 wchar_t ch;
1274 vm_val_t ele_val;
1275
1276 /* get this character */
1277 ch = p.getch();
1278
1279 /* set this list element */
1280 ele_val.set_int((long)ch);
1281 lst->cons_set_element(i, &ele_val);
1282 }
1283
1284 /* return the list object */
1285 retval->set_obj(lst_obj);
1286 }
1287
1288 /* discard the GC protection */
1289 G_stk->discard();
1290
1291 /* handled */
1292 return TRUE;
1293 }
1294
1295 /* ------------------------------------------------------------------------ */
1296 /*
1297 * property evaluator - htmlify
1298 */
1299
1300 /*
1301 * htmlify flags
1302 */
1303
1304 /* preserve spaces */
1305 #define VMSTR_HTMLIFY_KEEP_SPACES 0x0001
1306
1307 /* preserve newlines */
1308 #define VMSTR_HTMLIFY_KEEP_NEWLINES 0x0002
1309
1310 /* preserve tabs */
1311 #define VMSTR_HTMLIFY_KEEP_TABS 0x0004
1312
1313 /*
1314 * htmlify implementation
1315 */
getp_htmlify(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * in_argc)1316 int CVmObjString::getp_htmlify(VMG_ vm_val_t *retval,
1317 const vm_val_t *self_val,
1318 const char *str, uint *in_argc)
1319 {
1320 uint argc = (in_argc != 0 ? *in_argc : 0);
1321 size_t bytelen;
1322 utf8_ptr p;
1323 utf8_ptr dstp;
1324 size_t rem;
1325 size_t extra;
1326 long flags;
1327 vm_obj_id_t result_obj;
1328 int prv_was_sp;
1329 static CVmNativeCodeDesc desc(0, 1);
1330
1331 /* check arguments */
1332 if (get_prop_check_argc(retval, in_argc, &desc))
1333 return TRUE;
1334
1335 /* if they specified flags, pop them */
1336 if (argc >= 1)
1337 {
1338 /* retrieve the flags */
1339 flags = CVmBif::pop_long_val(vmg0_);
1340 }
1341 else
1342 {
1343 /* no flags */
1344 flags = 0;
1345 }
1346
1347 /* push a self-reference as GC protection */
1348 G_stk->push(self_val);
1349
1350 /* get and skip the string's length prefix */
1351 bytelen = vmb_get_len(str);
1352 str += VMB_LEN;
1353
1354 /*
1355 * scan the string to determine how much space we'll have to add to
1356 * generate the htmlified version
1357 */
1358 for (prv_was_sp = FALSE, extra = 0, p.set((char *)str), rem = bytelen ;
1359 rem != 0 ; p.inc(&rem))
1360 {
1361 int this_is_sp;
1362
1363 /* presume it's not a space */
1364 this_is_sp = FALSE;
1365
1366 /* check what we have */
1367 switch(p.getch())
1368 {
1369 case '&':
1370 /* we must replace '&' with '&' - this adds four bytes */
1371 extra += 4;
1372 break;
1373
1374 case '<':
1375 /* we must replace '<' with '<' - this adds three bytes */
1376 extra += 3;
1377 break;
1378
1379 case ' ':
1380 /*
1381 * If we're in preserve-spaces mode, and the previous space
1382 * was some kind of whitespace character, change this to
1383 * ' ' - this adds five bytes
1384 */
1385 if (prv_was_sp && (flags & VMSTR_HTMLIFY_KEEP_SPACES) != 0)
1386 extra += 5;
1387
1388 /* note that this was a whitespace character */
1389 this_is_sp = TRUE;
1390 break;
1391
1392 case '\t':
1393 /* if we're in preserve-tabs mode, change this to '<tab>' */
1394 if ((flags & VMSTR_HTMLIFY_KEEP_TABS) != 0)
1395 extra += 4;
1396
1397 /* note that this was a whitespace character */
1398 this_is_sp = TRUE;
1399 break;
1400
1401 case '\n':
1402 case 0x2028:
1403 /* if we're in preserve-newlines mode, change this to '<br>' */
1404 if ((flags & VMSTR_HTMLIFY_KEEP_NEWLINES) != 0)
1405 extra += 3;
1406
1407 /* note that this was a whitespace character */
1408 this_is_sp = TRUE;
1409 break;
1410 }
1411
1412 /* for next time, remember whether this is a space */
1413 prv_was_sp = this_is_sp;
1414 }
1415
1416 /* allocate space for the new string */
1417 result_obj = create(vmg_ FALSE, bytelen + extra);
1418
1419 /* get a pointer to the result buffer */
1420 dstp.set(((CVmObjString *)vm_objp(vmg_ result_obj))->cons_get_buf());
1421
1422 /* translate the string and write the result */
1423 for (prv_was_sp = FALSE, p.set((char *)str), rem = bytelen ;
1424 rem != 0 ; p.inc(&rem))
1425 {
1426 wchar_t ch;
1427 int this_is_sp;
1428
1429 /* get this character */
1430 ch = p.getch();
1431
1432 /* presume it's not a space */
1433 this_is_sp = FALSE;
1434
1435 /* check what we have */
1436 switch(ch)
1437 {
1438 case '&':
1439 /* replace '&' with '&' */
1440 dstp.setch_str("&");
1441 break;
1442
1443 case '<':
1444 /* we must replace '<' with '<' - this adds three bytes */
1445 dstp.setch_str("<");
1446 break;
1447
1448 case ' ':
1449 /* note that this was a whitespace character */
1450 this_is_sp = TRUE;
1451
1452 /*
1453 * ignore it if not in preserve-spaces mode, or if the
1454 * previous character wasn't whitespace of some kind
1455 */
1456 if (!prv_was_sp || (flags & VMSTR_HTMLIFY_KEEP_SPACES) == 0)
1457 goto do_default;
1458
1459 /* add the nbsp */
1460 dstp.setch_str(" ");
1461 break;
1462
1463 case '\t':
1464 /* note that this was a whitespace character */
1465 this_is_sp = TRUE;
1466
1467 /* ignore if not in preserve-tabs mode */
1468 if ((flags & VMSTR_HTMLIFY_KEEP_TABS) == 0)
1469 goto do_default;
1470
1471 /* add the <tab> */
1472 dstp.setch_str("<tab>");
1473 break;
1474
1475 case '\n':
1476 case 0x2028:
1477 /* note that this was a whitespace character */
1478 this_is_sp = TRUE;
1479
1480 /* if we're not in preserve-newlines mode, ignore it */
1481 if ((flags & VMSTR_HTMLIFY_KEEP_NEWLINES) == 0)
1482 goto do_default;
1483
1484 /* add the <br> */
1485 dstp.setch_str("<br>");
1486 break;
1487
1488 default:
1489 do_default:
1490 /* copy this character unchanged */
1491 dstp.setch(ch);
1492 break;
1493 }
1494
1495 /* for next time, remember whether this is a space */
1496 prv_was_sp = this_is_sp;
1497 }
1498
1499 /* return the new string */
1500 retval->set_obj(result_obj);
1501
1502 /* discard the GC protection */
1503 G_stk->discard();
1504
1505 /* handled */
1506 return TRUE;
1507 }
1508
1509 /* ------------------------------------------------------------------------ */
1510 /*
1511 * property evaluator - startsWith
1512 */
getp_starts_with(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * argc)1513 int CVmObjString::getp_starts_with(VMG_ vm_val_t *retval,
1514 const vm_val_t *self_val,
1515 const char *str, uint *argc)
1516 {
1517 static CVmNativeCodeDesc desc(1);
1518 const char *str2;
1519 size_t len;
1520 size_t len2;
1521
1522 /* check arguments */
1523 if (get_prop_check_argc(retval, argc, &desc))
1524 return TRUE;
1525
1526 /* retrieve the other string */
1527 str2 = CVmBif::pop_str_val(vmg0_);
1528
1529 /* get the lengths of the two strings */
1530 len = vmb_get_len(str);
1531 len2 = vmb_get_len(str2);
1532
1533 /* move to the contents of each string */
1534 str += VMB_LEN;
1535 str2 += VMB_LEN;
1536
1537 /*
1538 * if the other string is no longer than our string, and the other
1539 * string matches our string exactly for the other string's entire
1540 * length, we start with the other string
1541 */
1542 retval->set_logical(len2 <= len && memcmp(str, str2, len2) == 0);
1543
1544 /* handled */
1545 return TRUE;
1546 }
1547
1548 /*
1549 * property evaluator - endsWith
1550 */
getp_ends_with(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * argc)1551 int CVmObjString::getp_ends_with(VMG_ vm_val_t *retval,
1552 const vm_val_t *self_val,
1553 const char *str, uint *argc)
1554 {
1555 static CVmNativeCodeDesc desc(1);
1556 const char *str2;
1557 size_t len;
1558 size_t len2;
1559
1560 /* check arguments */
1561 if (get_prop_check_argc(retval, argc, &desc))
1562 return TRUE;
1563
1564 /* retrieve the other string */
1565 str2 = CVmBif::pop_str_val(vmg0_);
1566
1567 /* get the lengths of the two strings */
1568 len = vmb_get_len(str);
1569 len2 = vmb_get_len(str2);
1570
1571 /* move to the contents of each string */
1572 str += VMB_LEN;
1573 str2 += VMB_LEN;
1574
1575 /*
1576 * If the other string is no longer than our string, and the other
1577 * string matches our string at the end exactly for the other string's
1578 * entire length, we start with the other string. Note we don't need
1579 * to worry about finding a valid character index in our string for
1580 * the ending offset, because all we care about is whether or not we
1581 * have an exact byte match between our suffix and the other string.
1582 */
1583 retval->set_logical(len2 <= len
1584 && memcmp(str + len - len2, str2, len2) == 0);
1585
1586 /* handled */
1587 return TRUE;
1588 }
1589
1590 /* ------------------------------------------------------------------------ */
1591 /*
1592 * property evaluator - mapToByteArray
1593 */
getp_to_byte_array(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * argc)1594 int CVmObjString::getp_to_byte_array(VMG_ vm_val_t *retval,
1595 const vm_val_t *self_val,
1596 const char *str, uint *argc)
1597 {
1598 static CVmNativeCodeDesc desc(1);
1599 size_t len;
1600 CCharmapToLocal *mapper;
1601 vm_val_t *arg;
1602 size_t byte_len;
1603 size_t src_bytes_used;
1604 size_t out_idx;
1605 CVmObjByteArray *arr;
1606
1607 /* check arguments */
1608 if (get_prop_check_argc(retval, argc, &desc))
1609 return TRUE;
1610
1611 /* retrieve the CharacterSet object and make sure it's valid */
1612 arg = G_stk->get(0);
1613 if (arg->typ != VM_OBJ || !CVmObjCharSet::is_charset(vmg_ arg->val.obj))
1614 err_throw(VMERR_BAD_TYPE_BIF);
1615
1616 /* get the to-local mapping from the character set */
1617 mapper = ((CVmObjCharSet *)vm_objp(vmg_ arg->val.obj))
1618 ->get_to_local(vmg0_);
1619
1620 /* get my length and skip the length prefix */
1621 len = vmb_get_len(str);
1622 str += VMB_LEN;
1623
1624 /*
1625 * first, do a mapping with a null output buffer to determine how many
1626 * bytes we need for the mapping
1627 */
1628 byte_len = mapper->map_utf8(0, 0, str, len, &src_bytes_used);
1629
1630 /* allocate a new ByteArray with the required number of bytes */
1631 retval->set_obj(CVmObjByteArray::create(vmg_ FALSE, byte_len));
1632 arr = (CVmObjByteArray *)vm_objp(vmg_ retval->val.obj);
1633
1634 /* convert it again, this time storing the bytes */
1635 for (out_idx = 1 ; len != 0 ; )
1636 {
1637 char buf[128];
1638
1639 /* convert a buffer-full */
1640 byte_len = mapper->map_utf8(buf, sizeof(buf), str, len,
1641 &src_bytes_used);
1642
1643 /* store the bytes in the byte array */
1644 arr->cons_copy_from_buf((unsigned char *)buf, out_idx, byte_len);
1645
1646 /* advance past the output bytes we used */
1647 out_idx += byte_len;
1648
1649 /* advance past the source bytes we used */
1650 str += src_bytes_used;
1651 len -= src_bytes_used;
1652 }
1653
1654 /* discard arguments */
1655 G_stk->discard();
1656
1657 /* handled */
1658 return TRUE;
1659 }
1660
1661 /* ------------------------------------------------------------------------ */
1662 /*
1663 * Constant-pool string object
1664 */
1665
1666 /*
1667 * create
1668 */
create(VMG_ const char * const_ptr)1669 vm_obj_id_t CVmObjStringConst::create(VMG_ const char *const_ptr)
1670 {
1671 /* create our new ID */
1672 vm_obj_id_t id = vm_new_id(vmg_ FALSE, FALSE, FALSE);
1673
1674 /* create our string object, pointing directly to the constant pool */
1675 new (vmg_ id) CVmObjStringConst(vmg_ const_ptr);
1676
1677 /* return the new ID */
1678 return id;
1679 }
1680