1 #ifdef RCSID
2 static char RCSid[] =
3 "$Header: d:/cvsroot/tads/tads3/VMSTR.CPP,v 1.3 1999/05/17 02:52:28 MJRoberts Exp $";
4 #endif
5 
6 /*
7  *   Copyright (c) 1998, 2002 Michael J. Roberts.  All Rights Reserved.
8  *
9  *   Please see the accompanying license file, LICENSE.TXT, for information
10  *   on using and copying this software.
11  */
12 /*
13 Name
14   vmstr.cpp - VM string metaclass implementation
15 Function
16 
17 Notes
18 
19 Modified
20   10/28/98 MJRoberts  - Creation
21 */
22 
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26 
27 #include "t3std.h"
28 #include "vmmcreg.h"
29 #include "vmobj.h"
30 #include "vmstr.h"
31 #include "utf8.h"
32 #include "vmerr.h"
33 #include "vmerrnum.h"
34 #include "vmfile.h"
35 #include "vmstack.h"
36 #include "vmpool.h"
37 #include "vmmeta.h"
38 #include "vmrun.h"
39 #include "vmbif.h"
40 #include "vmpredef.h"
41 #include "vmlst.h"
42 #include "vmuni.h"
43 #include "vmcset.h"
44 #include "vmbytarr.h"
45 #include "charmap.h"
46 
47 
48 /* ------------------------------------------------------------------------ */
49 /*
50  *   statics
51  */
52 
53 /* metaclass registration object */
54 static CVmMetaclassString metaclass_reg_obj;
55 CVmMetaclass *CVmObjString::metaclass_reg_ = &metaclass_reg_obj;
56 
57 /* function table */
58 int (*CVmObjString::func_table_[])(VMG_ vm_val_t *retval,
59                                    const vm_val_t *self_val,
60                                    const char *str, uint *argc) =
61 {
62     &CVmObjString::getp_undef,
63     &CVmObjString::getp_len,
64     &CVmObjString::getp_substr,
65     &CVmObjString::getp_upper,
66     &CVmObjString::getp_lower,
67     &CVmObjString::getp_find,
68     &CVmObjString::getp_to_uni,
69     &CVmObjString::getp_htmlify,
70     &CVmObjString::getp_starts_with,
71     &CVmObjString::getp_ends_with,
72     &CVmObjString::getp_to_byte_array,
73     &CVmObjString::getp_replace
74 };
75 
76 /* ------------------------------------------------------------------------ */
77 /*
78  *   Static creation methods
79  */
80 
81 
82 /* create dynamically using stack arguments */
create_from_stack(VMG_ const uchar **,uint)83 vm_obj_id_t CVmObjString::create_from_stack(VMG_ const uchar **, uint)
84 {
85     /* dynamic string construction is not currently supported */
86     err_throw(VMERR_BAD_DYNAMIC_NEW);
87 
88     /* the compiler doesn't know we won't make it here */
89     return VM_INVALID_OBJ;
90 }
91 
92 /* create a string with no initial contents */
create(VMG_ int in_root_set)93 vm_obj_id_t CVmObjString::create(VMG_ int in_root_set)
94 {
95     vm_obj_id_t id = vm_new_id(vmg_ in_root_set, FALSE, FALSE);
96     new (vmg_ id) CVmObjString();
97     return id;
98 }
99 
100 /* create with a given buffer size */
create(VMG_ int in_root_set,size_t byte_size)101 vm_obj_id_t CVmObjString::create(VMG_ int in_root_set, size_t byte_size)
102 {
103     vm_obj_id_t id = vm_new_id(vmg_ in_root_set, FALSE, FALSE);
104     new (vmg_ id) CVmObjString(vmg_ byte_size);
105     return id;
106 }
107 
108 /* create from a constant UTF-8 string */
create(VMG_ int in_root_set,const char * str,size_t bytelen)109 vm_obj_id_t CVmObjString::create(VMG_ int in_root_set,
110                                  const char *str, size_t bytelen)
111 {
112     vm_obj_id_t id = vm_new_id(vmg_ in_root_set, FALSE, FALSE);
113     new (vmg_ id) CVmObjString(vmg_ str, bytelen);
114     return id;
115 }
116 
117 /* ------------------------------------------------------------------------ */
118 /*
119  *   Constructors
120  */
121 
122 /*
123  *   create a string object with a given buffer size
124  */
CVmObjString(VMG_ size_t len)125 CVmObjString::CVmObjString(VMG_ size_t len)
126 {
127     /*
128      *   the length is limited to an unsigned 16-bit value (NB: it really is
129      *   65535 on ALL PLATFORMS - this is a portable limit imposed by the
130      *   portable storage format, not a local platform limit)
131      */
132     if (len > 65535)
133     {
134         ext_ = 0;
135         err_throw(VMERR_STR_TOO_LONG);
136     }
137 
138     /*
139      *   allocate space for the buffer plus the length prefix in the
140      *   variable heap
141      */
142     ext_ = (char *)G_mem->get_var_heap()->alloc_mem(len + VMB_LEN, this);
143 
144     /* set the length */
145     vmb_put_len(ext_, len);
146 }
147 
148 /*
149  *   create a string object from a given UTF8 string constant
150  */
CVmObjString(VMG_ const char * str,size_t len)151 CVmObjString::CVmObjString(VMG_ const char *str, size_t len)
152 {
153     /* check for the length limit */
154     if (len > 65535)
155     {
156         ext_ = 0;
157         err_throw(VMERR_STR_TOO_LONG);
158     }
159 
160     /*
161      *   allocate space for the string plus the length prefix in the
162      *   variable heap
163      */
164     ext_ = (char *)G_mem->get_var_heap()->alloc_mem(len + VMB_LEN, this);
165 
166     /*
167      *   store the length prefix in portable format (so that we can easily
168      *   write our contents to a saved state file)
169      */
170     vmb_put_len(ext_, len);
171 
172     /* copy the string's bytes */
173     memcpy(ext_ + VMB_LEN, str, len);
174 }
175 
176 /* ------------------------------------------------------------------------ */
177 /*
178  *   receive notification of deletion
179  */
notify_delete(VMG_ int in_root_set)180 void CVmObjString::notify_delete(VMG_ int in_root_set)
181 {
182     /* free our extension */
183     if (ext_ != 0 && !in_root_set)
184         G_mem->get_var_heap()->free_mem(ext_);
185 }
186 
187 /* ------------------------------------------------------------------------ */
188 /*
189  *   Set a property.  Strings have no settable properties, so simply
190  *   signal an error indicating that the set-prop call is invalid.
191  */
set_prop(VMG_ CVmUndo *,vm_obj_id_t,vm_prop_id_t,const vm_val_t *)192 void CVmObjString::set_prop(VMG_ CVmUndo *, vm_obj_id_t,
193                             vm_prop_id_t, const vm_val_t *)
194 {
195     err_throw(VMERR_INVALID_SETPROP);
196 }
197 
198 /* ------------------------------------------------------------------------ */
199 /*
200  *   Save the object to a file
201  */
save_to_file(VMG_ CVmFile * fp)202 void CVmObjString::save_to_file(VMG_ CVmFile *fp)
203 {
204     size_t len;
205 
206     /* get our length */
207     len = vmb_get_len(ext_);
208 
209     /* write the length prefix and the string */
210     fp->write_bytes(ext_, len + VMB_LEN);
211 }
212 
213 /*
214  *   Restore the object from a file
215  */
restore_from_file(VMG_ vm_obj_id_t,CVmFile * fp,CVmObjFixup *)216 void CVmObjString::restore_from_file(VMG_ vm_obj_id_t,
217                                      CVmFile *fp, CVmObjFixup *)
218 {
219     size_t len;
220 
221     /* read the length prefix */
222     len = fp->read_uint2();
223 
224     /* free any existing extension */
225     if (ext_ != 0)
226     {
227         G_mem->get_var_heap()->free_mem(ext_);
228         ext_ = 0;
229     }
230 
231     /*
232      *   allocate our extension - make room for the length prefix plus the
233      *   bytes of the string
234      */
235     ext_ = (char *)G_mem->get_var_heap()->alloc_mem(len + VMB_LEN, this);
236 
237     /* store our length prefix */
238     vmb_put_len(ext_, len);
239 
240     /* read the string */
241     fp->read_bytes(ext_ + VMB_LEN, len);
242 }
243 
244 /* ------------------------------------------------------------------------ */
245 /*
246  *   Add a value to this string
247  */
add_val(VMG_ vm_val_t * result,vm_obj_id_t self,const vm_val_t * val)248 void CVmObjString::add_val(VMG_ vm_val_t *result,
249                            vm_obj_id_t self, const vm_val_t *val)
250 {
251     /*
252      *   Use the generic string adder, using my extension as the constant
253      *   string.  We store our extension in the general string format
254      *   required by the static adder.
255      */
256     add_to_str(vmg_ result, self, ext_, val);
257 }
258 
259 /*
260  *   Static string adder.  This creates a new string object that results
261  *   from appending the given value to the given string constant.  This is
262  *   defined statically so that this same code can be shared for adding to
263  *   constant pool strings and adding to CVmObjString objects.
264  *
265  *   'strval' must point to a constant string.  The first two bytes of the
266  *   string are stored in portable UINT2 format and give the length in
267  *   bytes of the string, not including the length prefix; immediately
268  *   following the length prefix are the bytes of the string.
269  *
270  *   Note that we *always* create a new object to hold the result, even if
271  *   the new string is identical to the first, so that we consistently
272  *   return a distinct reference from the original.
273  */
add_to_str(VMG_ vm_val_t * result,vm_obj_id_t self,const char * strval1,const vm_val_t * val)274 void CVmObjString::add_to_str(VMG_ vm_val_t *result,
275                               vm_obj_id_t self, const char *strval1,
276                               const vm_val_t *val)
277 {
278     const char *strval2;
279     char buf[128];
280     vm_obj_id_t obj;
281     size_t len1, len2;
282     CVmObjString *objptr;
283     vm_val_t new_obj2;
284 
285     /* convert the value to be appended to a string */
286     strval2 = cvt_to_str(vmg_ &new_obj2, buf, sizeof(buf), val, 10);
287 
288     /*
289      *   push the new string (if any) and self, to protect the two strings
290      *   from garbage collection
291      */
292     G_stk->push()->set_obj(self);
293     G_stk->push(&new_obj2);
294 
295     /* get the lengths of the two strings */
296     len1 = vmb_get_len(strval1);
297     len2 = vmb_get_len(strval2);
298 
299     /* create a new string object to hold the result */
300     obj = create(vmg_ FALSE, len1 + len2);
301     objptr = (CVmObjString *)vm_objp(vmg_ obj);
302 
303     /* copy the two strings into the new object's string buffer */
304     objptr->copy_into_str(0, strval1 + VMB_LEN, len1);
305     objptr->copy_into_str(len1, strval2 + VMB_LEN, len2);
306 
307     /* we're done with the garbage collection protection */
308     G_stk->discard(2);
309 
310     /* return the new object in the result */
311     result->set_obj(obj);
312 }
313 
314 
315 /* ------------------------------------------------------------------------ */
316 /*
317  *   Allocate a string buffer large enough to hold a given value.  We'll
318  *   use the provided buffer if possible.
319  *
320  *   If the provided buffer is null or is not large enough, we'll allocate
321  *   a new string object with a large enough buffer to hold the value, and
322  *   return the object's extension as the buffer.  This object will never
323  *   be referenced by anyone, so it will be deleted at the next garbage
324  *   collection.
325  *
326  *   The buffer size and requested size are in bytes.
327  */
alloc_str_buf(VMG_ vm_val_t * new_obj,char * buf,size_t buf_size,size_t required_size)328 char *CVmObjString::alloc_str_buf(VMG_ vm_val_t *new_obj,
329                                   char *buf, size_t buf_size,
330                                   size_t required_size)
331 {
332     vm_obj_id_t obj;
333 
334     /* if the provided buffer is large enough, use it */
335     if (buf != 0 && buf_size >= required_size)
336     {
337         /* there's no new object */
338         new_obj->set_nil();
339 
340         /* return the buffer */
341         return buf;
342     }
343 
344     /* allocate a new string object */
345     obj = create(vmg_ FALSE, required_size);
346 
347     /* return the new object's string buffer */
348     return (char *)vm_objp(vmg_ obj)->cast_to_string(vmg_ obj, new_obj);
349 }
350 
351 /* ------------------------------------------------------------------------ */
352 /*
353  *   Convert a value to a string
354  */
cvt_to_str(VMG_ vm_val_t * new_str,char * result_buf,size_t result_buf_size,const vm_val_t * val,int radix)355 const char *CVmObjString::cvt_to_str(VMG_ vm_val_t *new_str,
356                                      char *result_buf,
357                                      size_t result_buf_size,
358                                      const vm_val_t *val, int radix)
359 {
360     /* presume we won't need to create a new string object */
361     new_str->set_nil();
362 
363     /* check the type of the value */
364     switch(val->typ)
365     {
366     case VM_SSTRING:
367         /* it's a string constant - no conversion is necessary */
368         return G_const_pool->get_ptr(val->val.ofs);
369 
370     case VM_OBJ:
371         /* it's an object - ask it for its string representation */
372         return vm_objp(vmg_ val->val.obj)
373             ->cast_to_string(vmg_ val->val.obj, new_str);
374         break;
375 
376     case VM_INT:
377         /*
378          *   It's a number - convert it to a string.  Use the provided
379          *   result buffer if possible, but make sure we have room for the
380          *   number.  The unicode values we're storing are in the ascii
381          *   range, so we only need one byte per character.
382          */
383         result_buf = alloc_str_buf(vmg_ new_str,
384                                    result_buf, result_buf_size, 20);
385 
386         /* generate the string */
387         return cvt_int_to_str(result_buf, 20, val->val.intval, radix);
388 
389     case VM_NIL:
390         /* nil - use the literal string "nil" */
391         return "\003\000nil";
392         break;
393 
394     case VM_TRUE:
395         /* true - use the literal string "true" */
396         return "\004\000true";
397         break;
398 
399     default:
400         /* other types cannot be added to a string */
401         err_throw(VMERR_NO_STR_CONV);
402 
403         /* we never really get here, but the compiler doesn't know that */
404         return 0;
405     }
406 }
407 
408 /* ------------------------------------------------------------------------ */
409 /*
410  *   Convert an integer to a string, storing the result in the given
411  *   buffer in portable string format (with length prefix).  The radix
412  *   must be 8, 10, or 16.
413  *
414  *   Decimal numbers are treated as signed, and a leading dash is included
415  *   if the number is negative.  Octal and hex numbers are treated as
416  *   unsigned.
417  *
418  *   For efficiency, we store the number at the end of the buffer (this
419  *   makes it easy to generate the number, since we need to generate
420  *   numerals in reverse order).  We return a pointer to the result, which
421  *   may not start at the beginning of the buffer.
422  */
cvt_int_to_str(char * buf,size_t buflen,int32 inval,int radix)423 char *CVmObjString::cvt_int_to_str(char *buf, size_t buflen,
424                                    int32 inval, int radix)
425 {
426     int neg;
427     uint32 val;
428     char *p;
429     size_t len;
430 
431     /* start at the end of the buffer */
432     p = buf + buflen;
433 
434     /*
435      *   if it's negative, and we're converting to decimal representation,
436      *   treat the value as signed and use a leading minus sign;
437      *   otherwise, treat the value as unsigned
438      */
439     if (radix == 10 && inval < 0)
440     {
441         /* note that we need a minus sign */
442         neg = TRUE;
443 
444         /* use the positive value for the conversion */
445         val = (uint32)(-inval);
446     }
447     else
448     {
449         /* the value is positive (or at least unsigned) */
450         neg = FALSE;
451 
452         /* use the value as-is */
453         val = (uint32)inval;
454     }
455 
456     /* store numerals in reverse order */
457     do
458     {
459         char c;
460 
461         /* if we have no more room, throw an error */
462         if (p == buf)
463             err_throw(VMERR_CONV_BUF_OVF);
464 
465         /* move on to the next available character in the buffer */
466         --p;
467 
468         /* figure the character representation of this numeral */
469         c = (char)(val % radix);
470         if (c < 10)
471             c += '0';
472         else
473             c += 'A' - 10;
474 
475         /* store the numeral at the current location */
476         *p = c;
477 
478         /* divide the remaining number by the radix */
479         val /= radix;
480     } while (val != 0);
481 
482     /* store the leading minus sign if necessary */
483     if (neg)
484     {
485         /* if we don't have room, throw an error */
486         if (p == buf)
487             err_throw(VMERR_CONV_BUF_OVF);
488 
489         /* move to the next byte */
490         --p;
491 
492         /* store the minus sign */
493         *p = '-';
494     }
495 
496     /* calculate the length */
497     len = buflen - (p - buf);
498 
499     /* make sure we have room for the length prefix */
500     if (p < buf + 2)
501         err_throw(VMERR_CONV_BUF_OVF);
502 
503     /* store the length prefix */
504     p -= 2;
505     vmb_put_len(p, len);
506 
507     /* return the pointer to the start of the number */
508     return p;
509 }
510 
511 /* ------------------------------------------------------------------------ */
512 /*
513  *   Check a value for equality
514  */
equals(VMG_ vm_obj_id_t self,const vm_val_t * val,int) const515 int CVmObjString::equals(VMG_ vm_obj_id_t self,
516                          const vm_val_t *val, int /*depth*/) const
517 {
518     /* if the other value is a reference to myself, we certainly match */
519     if (val->typ == VM_OBJ && val->val.obj == self)
520         return TRUE;
521 
522     /*
523      *   use the constant string comparison routine, using our underlying
524      *   string as the constant string data
525      */
526     return const_equals(vmg_ ext_, val);
527 }
528 
529 /*
530  *   Constant string equality test
531  */
const_equals(VMG_ const char * str,const vm_val_t * val)532 int CVmObjString::const_equals(VMG_ const char *str, const vm_val_t *val)
533 {
534     const char *str2;
535     size_t len;
536 
537     /* get the other value as a string */
538     str2 = val->get_as_string(vmg0_);
539 
540     /* if the object doesn't have an underlying string, we don't match */
541     if (str2 == 0)
542         return FALSE;
543 
544     /*
545      *   if their lengths match, and the bytes match exactly, we have a
546      *   match; otherwise, they're not equal
547      */
548     len = vmb_get_len(str);
549     return (len == vmb_get_len(str2)
550             && memcmp(str + VMB_LEN, str2 + VMB_LEN, len) == 0);
551 }
552 
553 /* ------------------------------------------------------------------------ */
554 /*
555  *   Hash value
556  */
calc_hash(VMG_ vm_obj_id_t self,int) const557 uint CVmObjString::calc_hash(VMG_ vm_obj_id_t self, int /*depth*/) const
558 {
559     return const_calc_hash(ext_);
560 }
561 
562 /*
563  *   Hash value calculation
564  */
const_calc_hash(const char * str)565 uint CVmObjString::const_calc_hash(const char *str)
566 {
567     size_t len;
568     uint hash;
569     utf8_ptr p;
570 
571     /* get and skip the length prefix */
572     len = vmb_get_len(str);
573     str += VMB_LEN;
574 
575     /* scan the string and calculate the hash */
576     for (p.set((char *)str), hash = 0 ; len != 0 ; p.inc(&len))
577         hash += p.getch();
578 
579     /* return the result */
580     return hash;
581 }
582 
583 
584 /* ------------------------------------------------------------------------ */
585 /*
586  *   Compare this string to another value
587  */
compare_to(VMG_ vm_obj_id_t,const vm_val_t * val) const588 int CVmObjString::compare_to(VMG_ vm_obj_id_t /*self*/,
589                              const vm_val_t *val) const
590 {
591     /* use the static string magnitude comparison routine */
592     return const_compare(vmg_ ext_, val);
593 }
594 
595 /*
596  *   Compare a constant string value to another value.  Returns a positive
597  *   number if the constant string is lexically greater than the other
598  *   value, a negative number if the constant string is lexically less
599  *   than the other value, or zero if the constant string is lexically
600  *   identical to the other value.
601  *
602  *   The other value must be a string constant or an object with an
603  *   underlying string value.  We'll throw an error for any other type of
604  *   value.
605  */
const_compare(VMG_ const char * str1,const vm_val_t * val)606 int CVmObjString::const_compare(VMG_ const char *str1, const vm_val_t *val)
607 {
608     const char *str2;
609     size_t len1, len2;
610 
611     /* get the other value as a string */
612     str2 = val->get_as_string(vmg0_);
613 
614     /* if it's not a string, we can't compare it */
615     if (str2 == 0)
616         err_throw(VMERR_INVALID_COMPARISON);
617 
618     /* get the lengths of the two strings */
619     len1 = vmb_get_len(str1);
620     len2 = vmb_get_len(str2);
621 
622     /* perform a lexical comparison and return the result */
623     return utf8_ptr::s_compare_to(str1 + VMB_LEN, len1, str2 + VMB_LEN, len2);
624 }
625 
626 /* ------------------------------------------------------------------------ */
627 /*
628  *   Find a substring within a string
629  */
find_substr(VMG_ const char * str,int start_idx,const char * substr,size_t * idxp)630 const char *CVmObjString::find_substr(VMG_ const char *str, int start_idx,
631                                       const char *substr, size_t *idxp)
632 {
633     utf8_ptr p;
634     size_t rem;
635     size_t sublen;
636     size_t char_ofs;
637     int i;
638 
639     /* get the lengths */
640     rem = vmb_get_len(str);
641     sublen = vmb_get_len(substr);
642 
643     /* set up utf8 pointer into the string */
644     p.set((char *)str + 2);
645 
646     /* skip to the starting index */
647     for (i = start_idx ; i > 0 && rem >= sublen ; --i, p.inc(&rem)) ;
648 
649     /* scan for the substring */
650     for (char_ofs = 0 ; rem != 0 && rem >= sublen ; ++char_ofs, p.inc(&rem))
651     {
652         /* check for a match */
653         if (memcmp(p.getptr(), substr + VMB_LEN, sublen) == 0)
654         {
655             /* it's a match - set the return index if they are interested */
656             if (idxp != 0)
657                 *idxp = char_ofs + start_idx;
658 
659             /* return the current pointer */
660             return p.getptr();
661         }
662     }
663 
664     /* we didn't find it - so indicate by returning null */
665     return 0;
666 }
667 
668 /* ------------------------------------------------------------------------ */
669 /*
670  *   Evaluate a property
671  */
get_prop(VMG_ vm_prop_id_t prop,vm_val_t * retval,vm_obj_id_t self,vm_obj_id_t * source_obj,uint * argc)672 int CVmObjString::get_prop(VMG_ vm_prop_id_t prop, vm_val_t *retval,
673                            vm_obj_id_t self, vm_obj_id_t *source_obj,
674                            uint *argc)
675 {
676     vm_val_t self_val;
677 
678     /* use the constant evaluator */
679     self_val.set_obj(self);
680     if (const_get_prop(vmg_ retval, &self_val, ext_, prop, source_obj, argc))
681     {
682         *source_obj = metaclass_reg_->get_class_obj(vmg0_);
683         return TRUE;
684     }
685 
686     /* inherit default handling from the base object class */
687     return CVmObject::get_prop(vmg_ prop, retval, self, source_obj, argc);
688 }
689 
690 /* ------------------------------------------------------------------------ */
691 /*
692  *   Evaluate a property of a constant string value
693  */
const_get_prop(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,vm_prop_id_t prop,vm_obj_id_t * src_obj,uint * argc)694 int CVmObjString::const_get_prop(VMG_ vm_val_t *retval,
695                                  const vm_val_t *self_val, const char *str,
696                                  vm_prop_id_t prop, vm_obj_id_t *src_obj,
697                                  uint *argc)
698 {
699     ushort func_idx;
700 
701     /* presume no source object */
702     *src_obj = VM_INVALID_OBJ;
703 
704     /* translate the property index to an index into our function table */
705     func_idx = G_meta_table
706                ->prop_to_vector_idx(metaclass_reg_->get_reg_idx(), prop);
707 
708     /* call the appropriate function */
709     if ((*func_table_[func_idx])(vmg_ retval, self_val, str, argc))
710         return TRUE;
711 
712     /*
713      *   If this is a constant string (which is indicated by an invalid
714      *   'self' object ID), try inheriting the default object
715      *   interpretation, passing the constant string placeholder object
716      *   for its type information.
717      */
718     if (self_val->typ != VM_OBJ)
719     {
720         /* try going to CVmObject directly */
721         if (vm_objp(vmg_ G_predef->const_str_obj)
722             ->CVmObject::get_prop(vmg_ prop, retval, G_predef->const_str_obj,
723                                   src_obj, argc))
724             return TRUE;
725     }
726 
727     /* not handled */
728     return FALSE;
729 }
730 
731 /* ------------------------------------------------------------------------ */
732 /*
733  *   property evaluator - get the length
734  */
getp_len(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * argc)735 int CVmObjString::getp_len(VMG_ vm_val_t *retval, const vm_val_t *self_val,
736                            const char *str, uint *argc)
737 {
738     utf8_ptr p;
739     static CVmNativeCodeDesc desc(0);
740 
741     /* check arguments */
742     if (get_prop_check_argc(retval, argc, &desc))
743         return TRUE;
744 
745     /* set up a utf-8 pointer to the string's contents */
746     p.set((char *)str + VMB_LEN);
747 
748     /* return the character length of the string */
749     retval->set_int(p.len(vmb_get_len(str)));
750 
751     /* handled */
752     return TRUE;
753 }
754 
755 /* ------------------------------------------------------------------------ */
756 /*
757  *   property evaluator - extract a substring
758  */
getp_substr(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * in_argc)759 int CVmObjString::getp_substr(VMG_ vm_val_t *retval, const vm_val_t *self_val,
760                               const char *str, uint *in_argc)
761 {
762     long start;
763     ulong len;
764     size_t rem;
765     utf8_ptr p;
766     utf8_ptr start_p;
767     size_t start_rem;
768     size_t new_len;
769     vm_obj_id_t obj;
770     uint argc = (in_argc == 0 ? 0 : *in_argc);
771     static CVmNativeCodeDesc desc(1, 1);
772 
773     /* check arguments */
774     if (get_prop_check_argc(retval, in_argc, &desc))
775         return TRUE;
776 
777     /* pop the starting index */
778     start = CVmBif::pop_long_val(vmg0_);
779 
780     /* pop the length, if present */
781     if (argc >= 2)
782         len = CVmBif::pop_long_val(vmg0_);
783 
784     /* push a self-reference to protect against GC */
785     G_stk->push(self_val);
786 
787     /* set up a utf8 pointer to traverse the string */
788     p.set((char *)str + VMB_LEN);
789 
790     /* get the byte length of the string */
791     rem = vmb_get_len(str);
792 
793     /*
794      *   Skip ahead to the starting index.  If the index is positive, it's
795      *   an index from the start of the string; if it's negative, it's an
796      *   offset from the end of the string.
797      */
798     if (start > 0)
799     {
800         /*
801          *   it's an index from the start - skip ahead by start-1 characters
802          *   (since a start value of 1 tells us to start at the first
803          *   character)
804          */
805         for ( ; start > 1 && rem != 0 ; --start)
806             p.inc(&rem);
807     }
808     else if (start < 0)
809     {
810         /*
811          *   It's an index from the end of the string: -1 tells us to start
812          *   at the last character, -2 at the second to last, and so on.
813          *   Move to the first byte past the end of the string, and work
814          *   backwards by the given number of characters.
815          */
816         for (p.set((char *)str + VMB_LEN + rem), rem = 0 ;
817              start < 0 && p.getptr() != (char *)str + VMB_LEN ; ++start)
818         {
819             /* move back one character */
820             p.dec(&rem);
821         }
822     }
823 
824     /* this is the starting position */
825     start_p = p;
826     start_rem = rem;
827 
828     /*
829      *   if a length was specified, calculate the number of bytes in the
830      *   given length; otherwise, use the entire remainder of the string
831      */
832     if (argc >= 2)
833     {
834         /* keep skipping ahead by the desired length */
835         for ( ; len > 0 && rem != 0 ; --len)
836             p.inc(&rem);
837 
838         /* use the difference in lengths from the starting point to here */
839         new_len = start_rem - rem;
840     }
841     else
842     {
843         /* use the entire remainder of the string */
844         new_len = start_rem;
845     }
846 
847     /* create the new string */
848     obj = CVmObjString::create(vmg_ FALSE, start_p.getptr(), new_len);
849 
850     /* return the new object */
851     retval->set_obj(obj);
852 
853     /* discard the GC protection references */
854     G_stk->discard();
855 
856     /* handled */
857     return TRUE;
858 }
859 
860 /* ------------------------------------------------------------------------ */
861 /*
862  *   property evaluator - toUpper
863  */
getp_upper(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * argc)864 int CVmObjString::getp_upper(VMG_ vm_val_t *retval, const vm_val_t *self_val,
865                              const char *str, uint *argc)
866 {
867     size_t srclen;
868     size_t dstlen;
869     size_t rem;
870     utf8_ptr srcp;
871     utf8_ptr dstp;
872     vm_obj_id_t result_obj;
873     static CVmNativeCodeDesc desc(0);
874 
875     /* check arguments */
876     if (get_prop_check_argc(retval, argc, &desc))
877         return TRUE;
878 
879     /* get my length */
880     srclen = vmb_get_len(str);
881 
882     /* leave the string on the stack as GC protection */
883     G_stk->push(self_val);
884 
885     /*
886      *   Scan the string to determine how long the result will be.  The
887      *   result won't necessarily be the same length as the original,
888      *   because a two-byte character in the original could turn into a
889      *   three-byte character in the result, and vice versa.  (We could
890      *   allocate a result buffer three times the length of the original,
891      *   but this seems more wasteful of space than scanning the string
892      *   twice is wasteful of time.  It's a trade-off, though.)
893      */
894     for (dstlen = 0, srcp.set((char *)str + VMB_LEN), rem = srclen ;
895          rem != 0 ; srcp.inc(&rem))
896     {
897         /* get the size of the mapping for this character */
898         dstlen += utf8_ptr::s_wchar_size(t3_to_upper(srcp.getch()));
899     }
900 
901     /* allocate the result string */
902     result_obj = CVmObjString::create(vmg_ FALSE, dstlen);
903 
904     /* get a pointer to the result buffer */
905     dstp.set(((CVmObjString *)vm_objp(vmg_ result_obj))->cons_get_buf());
906 
907     /* write the string */
908     for (srcp.set((char *)str + VMB_LEN), rem = srclen ;
909          rem != 0 ; srcp.inc(&rem))
910     {
911         /* write the next character */
912         dstp.setch(t3_to_upper(srcp.getch()));
913     }
914 
915     /* return the value */
916     retval->set_obj(result_obj);
917 
918     /* discard GC protection */
919     G_stk->discard();
920 
921     /* handled */
922     return TRUE;
923 }
924 
925 /* ------------------------------------------------------------------------ */
926 /*
927  *   property evaluator - toLower
928  */
getp_lower(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * argc)929 int CVmObjString::getp_lower(VMG_ vm_val_t *retval, const vm_val_t *self_val,
930                              const char *str, uint *argc)
931 {
932     size_t srclen;
933     size_t dstlen;
934     size_t rem;
935     utf8_ptr srcp;
936     utf8_ptr dstp;
937     vm_obj_id_t result_obj;
938     static CVmNativeCodeDesc desc(0);
939 
940     /* check arguments */
941     if (get_prop_check_argc(retval, argc, &desc))
942         return TRUE;
943 
944     /* get my length */
945     srclen = vmb_get_len(str);
946 
947     /* leave the string on the stack as GC protection */
948     G_stk->push(self_val);
949 
950     /*
951      *   Scan the string to determine how long the result will be.  The
952      *   result won't necessarily be the same length as the original,
953      *   because a two-byte character in the original could turn into a
954      *   three-byte character in the result, and vice versa.  (We could
955      *   allocate a result buffer three times the length of the original,
956      *   but this seems more wasteful of space than scanning the string
957      *   twice is wasteful of time.  It's a trade-off, though.)
958      */
959     for (dstlen = 0, srcp.set((char *)str + VMB_LEN), rem = srclen ;
960          rem != 0 ; srcp.inc(&rem))
961     {
962         /* get the size of the mapping for this character */
963         dstlen += utf8_ptr::s_wchar_size(t3_to_lower(srcp.getch()));
964     }
965 
966     /* allocate the result string */
967     result_obj = CVmObjString::create(vmg_ FALSE, dstlen);
968 
969     /* get a pointer to the result buffer */
970     dstp.set(((CVmObjString *)vm_objp(vmg_ result_obj))->cons_get_buf());
971 
972     /* write the string */
973     for (srcp.set((char *)str + VMB_LEN), rem = srclen ;
974          rem != 0 ; srcp.inc(&rem))
975     {
976         /* write the next character */
977         dstp.setch(t3_to_lower(srcp.getch()));
978     }
979 
980     /* return the value */
981     retval->set_obj(result_obj);
982 
983     /* discard GC protection */
984     G_stk->discard();
985 
986     /* handled */
987     return TRUE;
988 }
989 
990 /* ------------------------------------------------------------------------ */
991 /*
992  *   property evaluator - find
993  */
getp_find(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * argc)994 int CVmObjString::getp_find(VMG_ vm_val_t *retval, const vm_val_t *self_val,
995                             const char *str, uint *argc)
996 {
997     const char *str2;
998     size_t idx;
999     uint orig_argc = (argc != 0 ? *argc : 0);
1000     static CVmNativeCodeDesc desc(1, 1);
1001     int start_idx;
1002 
1003     /* check arguments */
1004     if (get_prop_check_argc(retval, argc, &desc))
1005         return TRUE;
1006 
1007     /* retrieve the string to find */
1008     str2 = CVmBif::pop_str_val(vmg0_);
1009 
1010     /* if there's a starting index, retrieve it */
1011     start_idx = (orig_argc >= 2 ? CVmBif::pop_int_val(vmg0_) - 1 : 0);
1012 
1013     /* find the substring */
1014     if (find_substr(vmg_ str, start_idx, str2, &idx) != 0)
1015     {
1016         /* we found it - adjust to a 1-based value for return */
1017         retval->set_int(idx + 1);
1018     }
1019     else
1020     {
1021         /* didn't find it - return nil */
1022         retval->set_nil();
1023     }
1024 
1025     /* handled */
1026     return TRUE;
1027 }
1028 
1029 /* ------------------------------------------------------------------------ */
1030 /*
1031  *   replace flags
1032  */
1033 #define GETP_RPL_ALL    0x0001
1034 
1035 /*
1036  *   property evaluator - replace
1037  */
getp_replace(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * argc)1038 int CVmObjString::getp_replace(VMG_ vm_val_t *retval,
1039                                const vm_val_t *self_val,
1040                                const char *str, uint *argc)
1041 {
1042     vm_val_t arg1;
1043     vm_val_t arg2;
1044     const char *substr;
1045     const char *rplstr;
1046     size_t sublen;
1047     size_t rpllen;
1048     uint orig_argc = (argc != 0 ? *argc : 0);
1049     static CVmNativeCodeDesc desc(3, 1);
1050     int flags;
1051     utf8_ptr p;
1052     size_t rem;
1053     size_t new_len;
1054     int found;
1055     int start_idx;
1056     const char *rpl_start;
1057 
1058     /* check arguments */
1059     if (get_prop_check_argc(retval, argc, &desc))
1060         return TRUE;
1061 
1062     /*
1063      *   make copies of the string references, so we can put them back on the
1064      *   stack as gc protection while we're working
1065      */
1066     arg1 = *G_stk->get(0);
1067     arg2 = *G_stk->get(1);
1068 
1069     /* retrieve the search and replacement substrings */
1070     substr = CVmBif::pop_str_val(vmg0_);
1071     rplstr = CVmBif::pop_str_val(vmg0_);
1072 
1073     /* note the string lengths */
1074     sublen = vmb_get_len(substr);
1075     rpllen = vmb_get_len(rplstr);
1076 
1077     /* get the flags */
1078     flags = CVmBif::pop_int_val(vmg0_);
1079 
1080     /* if there's a starting index, retrieve it */
1081     start_idx = (orig_argc >= 4 ? CVmBif::pop_int_val(vmg0_) - 1 : 0);
1082 
1083     /* put the string references back on the stack for gc protection */
1084     G_stk->push(&arg1);
1085     G_stk->push(&arg2);
1086 
1087     /* start at the beginning of the string to search */
1088     rem = new_len = vmb_get_len(str);
1089     p.set((char *)str + 2);
1090 
1091     /* skip ahead to the starting index */
1092     for ( ; start_idx > 0 && rem >= sublen ; --start_idx, p.inc(&rem)) ;
1093 
1094     /*
1095      *   note the starting index for replacements - we don't want to replace
1096      *   anything before this point
1097      */
1098     rpl_start = p.getptr();
1099 
1100     /*
1101      *   Scan for instances of the substring, so we can figure out how big
1102      *   the result string will be.  Don't actually do any replacements yet;
1103      *   we'll scan again once we know how the result size.
1104      */
1105     for (found = FALSE ; rem >= sublen ; )
1106     {
1107         /* if this is a match for the substring, note it */
1108         if (memcmp(p.getptr(), substr + VMB_LEN, vmb_get_len(substr)) == 0)
1109         {
1110             /* note the find */
1111             found = TRUE;
1112 
1113             /* it's a match - adjust the result length for the replacement */
1114             new_len += rpllen - sublen;
1115 
1116             /* if we're replacing one instance only, look no further */
1117             if ((flags & GETP_RPL_ALL) == 0)
1118                 break;
1119 
1120             /* skip the entire substring in the source */
1121             p.set(p.getptr() + sublen);
1122             rem -= sublen;
1123         }
1124         else
1125         {
1126             /* skip one character */
1127             p.inc(&rem);
1128         }
1129     }
1130 
1131     /*
1132      *   if we found no instances of the search substring, the result is
1133      *   simply the source string; otherwise, we must create a new string
1134      *   with the substitution(s)
1135      */
1136     if (found)
1137     {
1138         utf8_ptr dst;
1139 
1140         /* allocate the new string */
1141         retval->set_obj(create(vmg_ FALSE, new_len));
1142 
1143         /* get a pointer to the buffer */
1144         dst.set(((CVmObjString *)vm_objp(vmg_ retval->val.obj))
1145                 ->cons_get_buf());
1146 
1147         /* scan the string for replacements */
1148         for (p.set((char *)str + 2), rem = vmb_get_len(str) ;
1149              rem >= sublen ; )
1150         {
1151             /*
1152              *   If this is a match for the substring, and we've reached the
1153              *   starting point for replacements, replace the substring.
1154              */
1155             if (p.getptr() >= rpl_start
1156                 && memcmp(p.getptr(), substr + VMB_LEN, sublen) == 0)
1157             {
1158                 /* it's a match - copy the replacement into the result */
1159                 memcpy(dst.getptr(), rplstr + VMB_LEN, rpllen);
1160 
1161                 /* move past the replacement in the result */
1162                 dst.set(dst.getptr() + rpllen);
1163 
1164                 /* move past the search substring in the source */
1165                 p.set(p.getptr() + sublen);
1166                 rem -= sublen;
1167 
1168                 /* if we're replacing one instance only, look no further */
1169                 if ((flags & GETP_RPL_ALL) == 0)
1170                     break;
1171             }
1172             else
1173             {
1174                 /* copy the current character to the result */
1175                 dst.setch(p.getch());
1176 
1177                 /* skip the current character of input */
1178                 p.inc(&rem);
1179             }
1180         }
1181 
1182         /* copy the remaining source into the result */
1183         if (rem != 0)
1184             memcpy(dst.getptr(), p.getptr(), rem);
1185     }
1186     else
1187     {
1188         /* we didn't find it - the result is simply the original string */
1189         *retval = *self_val;
1190     }
1191 
1192     /* discard the gc protection */
1193     G_stk->discard(2);
1194 
1195     /* handled */
1196     return TRUE;
1197 }
1198 
1199 /* ------------------------------------------------------------------------ */
1200 /*
1201  *   property evaluator - convert to unicode
1202  */
getp_to_uni(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * in_argc)1203 int CVmObjString::getp_to_uni(VMG_ vm_val_t *retval,
1204                               const vm_val_t *self_val,
1205                               const char *str, uint *in_argc)
1206 {
1207     uint argc = (in_argc != 0 ? *in_argc : 0);
1208     size_t bytelen;
1209     ulong idx;
1210     utf8_ptr p;
1211     static CVmNativeCodeDesc desc(0, 1);
1212 
1213     /* check arguments */
1214     if (get_prop_check_argc(retval, in_argc, &desc))
1215         return TRUE;
1216 
1217     /* retrieve the index argument if present */
1218     if (argc >= 1)
1219         idx = CVmBif::pop_long_val(vmg0_);
1220 
1221     /* push a self-reference as GC protection */
1222     G_stk->push(self_val);
1223 
1224     /* get and skip the string's length prefix */
1225     bytelen = vmb_get_len(str);
1226     str += VMB_LEN;
1227 
1228     /* set up a utf8 pointer to the string */
1229     p.set((char *)str);
1230 
1231     /* check for an index argument */
1232     if (argc >= 1)
1233     {
1234         /* skip through the string until we get to the desired index */
1235         for ( ; idx > 1 && bytelen != 0 ; --idx, p.inc(&bytelen)) ;
1236 
1237         /* check to see if we have a character available */
1238         if (idx == 1 && bytelen != 0)
1239         {
1240             /* the index is valid - return the character here */
1241             retval->set_int((long)p.getch());
1242         }
1243         else
1244         {
1245             /*
1246              *   the index is past the end of the string or is less than 1
1247              *   - return nil to indicate that there's no character here
1248              */
1249             retval->set_nil();
1250         }
1251     }
1252     else
1253     {
1254         size_t charlen;
1255         vm_obj_id_t lst_obj;
1256         CVmObjList *lst;
1257         size_t i;
1258 
1259         /*
1260          *   There's no index argument - they want a list of all of the
1261          *   code points in the string.  First, get the number of
1262          *   characters in the string.
1263          */
1264         charlen = p.len(bytelen);
1265 
1266         /* create a list to hold the results */
1267         lst_obj = CVmObjList::create(vmg_ FALSE, charlen);
1268         lst = (CVmObjList *)vm_objp(vmg_ lst_obj);
1269 
1270         /* set the list's elements to the unicode characters values */
1271         for (i = 0 ; i < charlen ; ++i, p.inc())
1272         {
1273             wchar_t ch;
1274             vm_val_t ele_val;
1275 
1276             /* get this character */
1277             ch = p.getch();
1278 
1279             /* set this list element */
1280             ele_val.set_int((long)ch);
1281             lst->cons_set_element(i, &ele_val);
1282         }
1283 
1284         /* return the list object */
1285         retval->set_obj(lst_obj);
1286     }
1287 
1288     /* discard the GC protection */
1289     G_stk->discard();
1290 
1291     /* handled */
1292     return TRUE;
1293 }
1294 
1295 /* ------------------------------------------------------------------------ */
1296 /*
1297  *   property evaluator - htmlify
1298  */
1299 
1300 /*
1301  *   htmlify flags
1302  */
1303 
1304 /* preserve spaces */
1305 #define VMSTR_HTMLIFY_KEEP_SPACES   0x0001
1306 
1307 /* preserve newlines */
1308 #define VMSTR_HTMLIFY_KEEP_NEWLINES 0x0002
1309 
1310 /* preserve tabs */
1311 #define VMSTR_HTMLIFY_KEEP_TABS     0x0004
1312 
1313 /*
1314  *   htmlify implementation
1315  */
getp_htmlify(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * in_argc)1316 int CVmObjString::getp_htmlify(VMG_ vm_val_t *retval,
1317                                const vm_val_t *self_val,
1318                                const char *str, uint *in_argc)
1319 {
1320     uint argc = (in_argc != 0 ? *in_argc : 0);
1321     size_t bytelen;
1322     utf8_ptr p;
1323     utf8_ptr dstp;
1324     size_t rem;
1325     size_t extra;
1326     long flags;
1327     vm_obj_id_t result_obj;
1328     int prv_was_sp;
1329     static CVmNativeCodeDesc desc(0, 1);
1330 
1331     /* check arguments */
1332     if (get_prop_check_argc(retval, in_argc, &desc))
1333         return TRUE;
1334 
1335     /* if they specified flags, pop them */
1336     if (argc >= 1)
1337     {
1338         /* retrieve the flags */
1339         flags = CVmBif::pop_long_val(vmg0_);
1340     }
1341     else
1342     {
1343         /* no flags */
1344         flags = 0;
1345     }
1346 
1347     /* push a self-reference as GC protection */
1348     G_stk->push(self_val);
1349 
1350     /* get and skip the string's length prefix */
1351     bytelen = vmb_get_len(str);
1352     str += VMB_LEN;
1353 
1354     /*
1355      *   scan the string to determine how much space we'll have to add to
1356      *   generate the htmlified version
1357      */
1358     for (prv_was_sp = FALSE, extra = 0, p.set((char *)str), rem = bytelen ;
1359          rem != 0 ; p.inc(&rem))
1360     {
1361         int this_is_sp;
1362 
1363         /* presume it's not a space */
1364         this_is_sp = FALSE;
1365 
1366         /* check what we have */
1367         switch(p.getch())
1368         {
1369         case '&':
1370             /* we must replace '&' with '&amp;' - this adds four bytes */
1371             extra += 4;
1372             break;
1373 
1374         case '<':
1375             /* we must replace '<' with '&lt;' - this adds three bytes */
1376             extra += 3;
1377             break;
1378 
1379         case ' ':
1380             /*
1381              *   If we're in preserve-spaces mode, and the previous space
1382              *   was some kind of whitespace character, change this to
1383              *   '&nbsp;' - this adds five bytes
1384              */
1385             if (prv_was_sp && (flags & VMSTR_HTMLIFY_KEEP_SPACES) != 0)
1386                 extra += 5;
1387 
1388             /* note that this was a whitespace character */
1389             this_is_sp = TRUE;
1390             break;
1391 
1392         case '\t':
1393             /* if we're in preserve-tabs mode, change this to '<tab>' */
1394             if ((flags & VMSTR_HTMLIFY_KEEP_TABS) != 0)
1395                 extra += 4;
1396 
1397             /* note that this was a whitespace character */
1398             this_is_sp = TRUE;
1399             break;
1400 
1401         case '\n':
1402         case 0x2028:
1403             /* if we're in preserve-newlines mode, change this to '<br>' */
1404             if ((flags & VMSTR_HTMLIFY_KEEP_NEWLINES) != 0)
1405                 extra += 3;
1406 
1407             /* note that this was a whitespace character */
1408             this_is_sp = TRUE;
1409             break;
1410         }
1411 
1412         /* for next time, remember whether this is a space */
1413         prv_was_sp = this_is_sp;
1414     }
1415 
1416     /* allocate space for the new string */
1417     result_obj = create(vmg_ FALSE, bytelen + extra);
1418 
1419     /* get a pointer to the result buffer */
1420     dstp.set(((CVmObjString *)vm_objp(vmg_ result_obj))->cons_get_buf());
1421 
1422     /* translate the string and write the result */
1423     for (prv_was_sp = FALSE, p.set((char *)str), rem = bytelen ;
1424          rem != 0 ; p.inc(&rem))
1425     {
1426         wchar_t ch;
1427         int this_is_sp;
1428 
1429         /* get this character */
1430         ch = p.getch();
1431 
1432         /* presume it's not a space */
1433         this_is_sp = FALSE;
1434 
1435         /* check what we have */
1436         switch(ch)
1437         {
1438         case '&':
1439             /* replace '&' with '&amp;' */
1440             dstp.setch_str("&amp;");
1441             break;
1442 
1443         case '<':
1444             /* we must replace '<' with '&lt;' - this adds three bytes */
1445             dstp.setch_str("&lt;");
1446             break;
1447 
1448         case ' ':
1449             /* note that this was a whitespace character */
1450             this_is_sp = TRUE;
1451 
1452             /*
1453              *   ignore it if not in preserve-spaces mode, or if the
1454              *   previous character wasn't whitespace of some kind
1455              */
1456             if (!prv_was_sp || (flags & VMSTR_HTMLIFY_KEEP_SPACES) == 0)
1457                 goto do_default;
1458 
1459             /* add the nbsp */
1460             dstp.setch_str("&nbsp;");
1461             break;
1462 
1463         case '\t':
1464             /* note that this was a whitespace character */
1465             this_is_sp = TRUE;
1466 
1467             /* ignore if not in preserve-tabs mode */
1468             if ((flags & VMSTR_HTMLIFY_KEEP_TABS) == 0)
1469                 goto do_default;
1470 
1471             /* add the <tab> */
1472             dstp.setch_str("<tab>");
1473             break;
1474 
1475         case '\n':
1476         case 0x2028:
1477             /* note that this was a whitespace character */
1478             this_is_sp = TRUE;
1479 
1480             /* if we're not in preserve-newlines mode, ignore it */
1481             if ((flags & VMSTR_HTMLIFY_KEEP_NEWLINES) == 0)
1482                 goto do_default;
1483 
1484             /* add the <br> */
1485             dstp.setch_str("<br>");
1486             break;
1487 
1488         default:
1489         do_default:
1490             /* copy this character unchanged */
1491             dstp.setch(ch);
1492             break;
1493         }
1494 
1495         /* for next time, remember whether this is a space */
1496         prv_was_sp = this_is_sp;
1497     }
1498 
1499     /* return the new string */
1500     retval->set_obj(result_obj);
1501 
1502     /* discard the GC protection */
1503     G_stk->discard();
1504 
1505     /* handled */
1506     return TRUE;
1507 }
1508 
1509 /* ------------------------------------------------------------------------ */
1510 /*
1511  *   property evaluator - startsWith
1512  */
getp_starts_with(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * argc)1513 int CVmObjString::getp_starts_with(VMG_ vm_val_t *retval,
1514                                    const vm_val_t *self_val,
1515                                    const char *str, uint *argc)
1516 {
1517     static CVmNativeCodeDesc desc(1);
1518     const char *str2;
1519     size_t len;
1520     size_t len2;
1521 
1522     /* check arguments */
1523     if (get_prop_check_argc(retval, argc, &desc))
1524         return TRUE;
1525 
1526     /* retrieve the other string */
1527     str2 = CVmBif::pop_str_val(vmg0_);
1528 
1529     /* get the lengths of the two strings */
1530     len = vmb_get_len(str);
1531     len2 = vmb_get_len(str2);
1532 
1533     /* move to the contents of each string */
1534     str += VMB_LEN;
1535     str2 += VMB_LEN;
1536 
1537     /*
1538      *   if the other string is no longer than our string, and the other
1539      *   string matches our string exactly for the other string's entire
1540      *   length, we start with the other string
1541      */
1542     retval->set_logical(len2 <= len && memcmp(str, str2, len2) == 0);
1543 
1544     /* handled */
1545     return TRUE;
1546 }
1547 
1548 /*
1549  *   property evaluator - endsWith
1550  */
getp_ends_with(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * argc)1551 int CVmObjString::getp_ends_with(VMG_ vm_val_t *retval,
1552                                  const vm_val_t *self_val,
1553                                  const char *str, uint *argc)
1554 {
1555     static CVmNativeCodeDesc desc(1);
1556     const char *str2;
1557     size_t len;
1558     size_t len2;
1559 
1560     /* check arguments */
1561     if (get_prop_check_argc(retval, argc, &desc))
1562         return TRUE;
1563 
1564     /* retrieve the other string */
1565     str2 = CVmBif::pop_str_val(vmg0_);
1566 
1567     /* get the lengths of the two strings */
1568     len = vmb_get_len(str);
1569     len2 = vmb_get_len(str2);
1570 
1571     /* move to the contents of each string */
1572     str += VMB_LEN;
1573     str2 += VMB_LEN;
1574 
1575     /*
1576      *   If the other string is no longer than our string, and the other
1577      *   string matches our string at the end exactly for the other string's
1578      *   entire length, we start with the other string.  Note we don't need
1579      *   to worry about finding a valid character index in our string for
1580      *   the ending offset, because all we care about is whether or not we
1581      *   have an exact byte match between our suffix and the other string.
1582      */
1583     retval->set_logical(len2 <= len
1584                         && memcmp(str + len - len2, str2, len2) == 0);
1585 
1586     /* handled */
1587     return TRUE;
1588 }
1589 
1590 /* ------------------------------------------------------------------------ */
1591 /*
1592  *   property evaluator - mapToByteArray
1593  */
getp_to_byte_array(VMG_ vm_val_t * retval,const vm_val_t * self_val,const char * str,uint * argc)1594 int CVmObjString::getp_to_byte_array(VMG_ vm_val_t *retval,
1595                                      const vm_val_t *self_val,
1596                                      const char *str, uint *argc)
1597 {
1598     static CVmNativeCodeDesc desc(1);
1599     size_t len;
1600     CCharmapToLocal *mapper;
1601     vm_val_t *arg;
1602     size_t byte_len;
1603     size_t src_bytes_used;
1604     size_t out_idx;
1605     CVmObjByteArray *arr;
1606 
1607     /* check arguments */
1608     if (get_prop_check_argc(retval, argc, &desc))
1609         return TRUE;
1610 
1611     /* retrieve the CharacterSet object and make sure it's valid */
1612     arg = G_stk->get(0);
1613     if (arg->typ != VM_OBJ || !CVmObjCharSet::is_charset(vmg_ arg->val.obj))
1614         err_throw(VMERR_BAD_TYPE_BIF);
1615 
1616     /* get the to-local mapping from the character set */
1617     mapper = ((CVmObjCharSet *)vm_objp(vmg_ arg->val.obj))
1618              ->get_to_local(vmg0_);
1619 
1620     /* get my length and skip the length prefix */
1621     len = vmb_get_len(str);
1622     str += VMB_LEN;
1623 
1624     /*
1625      *   first, do a mapping with a null output buffer to determine how many
1626      *   bytes we need for the mapping
1627      */
1628     byte_len = mapper->map_utf8(0, 0, str, len, &src_bytes_used);
1629 
1630     /* allocate a new ByteArray with the required number of bytes */
1631     retval->set_obj(CVmObjByteArray::create(vmg_ FALSE, byte_len));
1632     arr = (CVmObjByteArray *)vm_objp(vmg_ retval->val.obj);
1633 
1634     /* convert it again, this time storing the bytes */
1635     for (out_idx = 1 ; len != 0 ; )
1636     {
1637         char buf[128];
1638 
1639         /* convert a buffer-full */
1640         byte_len = mapper->map_utf8(buf, sizeof(buf), str, len,
1641                                     &src_bytes_used);
1642 
1643         /* store the bytes in the byte array */
1644         arr->cons_copy_from_buf((unsigned char *)buf, out_idx, byte_len);
1645 
1646         /* advance past the output bytes we used */
1647         out_idx += byte_len;
1648 
1649         /* advance past the source bytes we used */
1650         str += src_bytes_used;
1651         len -= src_bytes_used;
1652     }
1653 
1654     /* discard arguments */
1655     G_stk->discard();
1656 
1657     /* handled */
1658     return TRUE;
1659 }
1660 
1661 /* ------------------------------------------------------------------------ */
1662 /*
1663  *   Constant-pool string object
1664  */
1665 
1666 /*
1667  *   create
1668  */
create(VMG_ const char * const_ptr)1669 vm_obj_id_t CVmObjStringConst::create(VMG_ const char *const_ptr)
1670 {
1671     /* create our new ID */
1672     vm_obj_id_t id = vm_new_id(vmg_ FALSE, FALSE, FALSE);
1673 
1674     /* create our string object, pointing directly to the constant pool */
1675     new (vmg_ id) CVmObjStringConst(vmg_ const_ptr);
1676 
1677     /* return the new ID */
1678     return id;
1679 }
1680