1 /*
2  *   Copyright (c) 2001, 2002 Michael J. Roberts.  All Rights Reserved.
3  *
4  *   Please see the accompanying license file, LICENSE.TXT, for information
5  *   on using and copying this software.
6  */
7 /*
8 Name
9   vmcset.cpp - T3 CharacterSet metaclass
10 Function
11 
12 Notes
13 
14 Modified
15   06/06/01 MJRoberts  - Creation
16 */
17 
18 #include <stdlib.h>
19 #include "vmtype.h"
20 #include "vmobj.h"
21 #include "vmglob.h"
22 #include "vmcset.h"
23 #include "vmbif.h"
24 #include "vmfile.h"
25 #include "vmerrnum.h"
26 #include "vmerr.h"
27 #include "vmstack.h"
28 #include "vmmeta.h"
29 #include "vmrun.h"
30 #include "charmap.h"
31 #include "vmstr.h"
32 #include "vmpredef.h"
33 #include "vmrun.h"
34 #include "vmhost.h"
35 
36 
37 /* ------------------------------------------------------------------------ */
38 /*
39  *   statics
40  */
41 
42 /* metaclass registration object */
43 static CVmMetaclassCharSet metaclass_reg_obj;
44 CVmMetaclass *CVmObjCharSet::metaclass_reg_ = &metaclass_reg_obj;
45 
46 /* function table */
47 int (CVmObjCharSet::
48      *CVmObjCharSet::func_table_[])(VMG_ vm_obj_id_t self,
49                                     vm_val_t *retval, uint *argc) =
50 {
51     &CVmObjCharSet::getp_undef,
52     &CVmObjCharSet::getp_get_name,
53     &CVmObjCharSet::getp_is_known,
54     &CVmObjCharSet::getp_is_mappable,
55     &CVmObjCharSet::getp_is_rt_mappable
56 };
57 
58 
59 /* ------------------------------------------------------------------------ */
60 /*
61  *   Create from stack
62  */
create_from_stack(VMG_ const uchar ** pc_ptr,uint argc)63 vm_obj_id_t CVmObjCharSet::create_from_stack(VMG_ const uchar **pc_ptr,
64                                              uint argc)
65 {
66     vm_obj_id_t id;
67     vm_val_t *arg1;
68     const char *charset_name;
69 
70     /* check our arguments */
71     if (argc != 1)
72         err_throw(VMERR_WRONG_NUM_OF_ARGS);
73 
74     /* get the name of the character set */
75     arg1 = G_stk->get(0);
76     charset_name = arg1->get_as_string(vmg0_);
77     if (charset_name == 0)
78         err_throw(VMERR_BAD_TYPE_BIF);
79 
80     /* create the character set object */
81     id = vm_new_id(vmg_ FALSE, FALSE, FALSE);
82     new (vmg_ id) CVmObjCharSet(vmg_ charset_name + VMB_LEN,
83                                 vmb_get_len(charset_name));
84 
85     /* discard arguments */
86     G_stk->discard(argc);
87 
88     /* return the new object */
89     return id;
90 }
91 
92 /* ------------------------------------------------------------------------ */
93 /*
94  *   Create with no contents
95  */
create(VMG_ int in_root_set)96 vm_obj_id_t CVmObjCharSet::create(VMG_ int in_root_set)
97 {
98     vm_obj_id_t id = vm_new_id(vmg_ in_root_set, FALSE, FALSE);
99     new (vmg_ id) CVmObjCharSet();
100     return id;
101 }
102 
103 /*
104  *   Create with the given character set name
105  */
create(VMG_ int in_root_set,const char * charset_name,size_t charset_name_len)106 vm_obj_id_t CVmObjCharSet::create(VMG_ int in_root_set,
107                                   const char *charset_name,
108                                   size_t charset_name_len)
109 {
110     vm_obj_id_t id = vm_new_id(vmg_ in_root_set, FALSE, FALSE);
111     new (vmg_ id) CVmObjCharSet(vmg_ charset_name, charset_name_len);
112     return id;
113 }
114 
115 /* ------------------------------------------------------------------------ */
116 /*
117  *   Instantiate
118  */
CVmObjCharSet(VMG_ const char * charset_name,size_t charset_name_len)119 CVmObjCharSet::CVmObjCharSet(VMG_ const char *charset_name,
120                              size_t charset_name_len)
121 {
122     /* allocate and initialize our extension */
123     ext_ = 0;
124     alloc_ext(vmg_ charset_name, charset_name_len);
125 }
126 
127 /*
128  *   Allocate and initialize our extension
129  */
alloc_ext(VMG_ const char * charset_name,size_t charset_name_len)130 void CVmObjCharSet::alloc_ext(VMG_ const char *charset_name,
131                               size_t charset_name_len)
132 {
133     size_t alloc_size;
134     vmobj_charset_ext_t *extp;
135     CResLoader *res_ldr;
136 
137     /* if we already have an extension, delete it */
138     if (ext_ != 0)
139         G_mem->get_var_heap()->free_mem(ext_);
140 
141     /*
142      *   compute the size we need - note that we use the one fixed byte of
143      *   the structure's name element as the extra byte we need for null
144      *   termination of the name
145      */
146     alloc_size = sizeof(vmobj_charset_ext_t) + charset_name_len;
147 
148     /* allocate space for our extension structure */
149     ext_ = (char *)G_mem->get_var_heap()->alloc_mem(alloc_size, this);
150 
151     /* cast the extension to our structure type */
152     extp = (vmobj_charset_ext_t *)ext_;
153 
154     /* store the character set name and length, null-terminating the name */
155     extp->charset_name_len = charset_name_len;
156     memcpy(extp->charset_name, charset_name, charset_name_len);
157     extp->charset_name[charset_name_len] = '\0';
158 
159     /* get the resource loader */
160     res_ldr = G_host_ifc->get_cmap_res_loader();
161 
162     /* if we have a resource loader, load the mappings */
163     if (res_ldr != 0)
164     {
165         /* load the unicode-to-local mapping */
166         extp->to_local = CCharmapToLocal::load(res_ldr, extp->charset_name);
167 
168         /* load the local-to-unicode mapping */
169         extp->to_uni = CCharmapToUni::load(res_ldr, extp->charset_name);
170     }
171 }
172 
173 /* ------------------------------------------------------------------------ */
174 /*
175  *   Notify of deletion
176  */
notify_delete(VMG_ int)177 void CVmObjCharSet::notify_delete(VMG_ int /*in_root_set*/)
178 {
179     /* release our mapper objects */
180     if (ext_ != 0)
181     {
182         /* release the to-local character mapper */
183         if (get_ext_ptr()->to_local != 0)
184             get_ext_ptr()->to_local->release_ref();
185 
186         /* release the to-unicode character mapper */
187         if (get_ext_ptr()->to_uni != 0)
188             get_ext_ptr()->to_uni->release_ref();
189 
190         /* free our extension */
191         G_mem->get_var_heap()->free_mem(ext_);
192     }
193 }
194 
195 /* ------------------------------------------------------------------------ */
196 /*
197  *   set a property
198  */
set_prop(VMG_ class CVmUndo *,vm_obj_id_t,vm_prop_id_t,const vm_val_t *)199 void CVmObjCharSet::set_prop(VMG_ class CVmUndo *,
200                              vm_obj_id_t, vm_prop_id_t,
201                              const vm_val_t *)
202 {
203     err_throw(VMERR_INVALID_SETPROP);
204 }
205 
206 /* ------------------------------------------------------------------------ */
207 /*
208  *   get a property
209  */
get_prop(VMG_ vm_prop_id_t prop,vm_val_t * retval,vm_obj_id_t self,vm_obj_id_t * source_obj,uint * argc)210 int CVmObjCharSet::get_prop(VMG_ vm_prop_id_t prop, vm_val_t *retval,
211                             vm_obj_id_t self, vm_obj_id_t *source_obj,
212                             uint *argc)
213 {
214     ushort func_idx;
215 
216     /* translate the property index to an index into our function table */
217     func_idx = G_meta_table
218                ->prop_to_vector_idx(metaclass_reg_->get_reg_idx(), prop);
219 
220     /* call the appropriate function */
221     if ((this->*func_table_[func_idx])(vmg_ self, retval, argc))
222     {
223         *source_obj = metaclass_reg_->get_class_obj(vmg0_);
224         return TRUE;
225     }
226 
227     /* inherit default handling */
228     return CVmObject::get_prop(vmg_ prop, retval, self, source_obj, argc);
229 }
230 
231 /* ------------------------------------------------------------------------ */
232 /*
233  *   load from an image file
234  */
load_from_image(VMG_ vm_obj_id_t self,const char * ptr,size_t siz)235 void CVmObjCharSet::load_from_image(VMG_ vm_obj_id_t self,
236                                     const char *ptr, size_t siz)
237 {
238     /* initialize with the character set name from the image file */
239     alloc_ext(vmg_ ptr + VMB_LEN, vmb_get_len(ptr));
240 }
241 
242 /* ------------------------------------------------------------------------ */
243 /*
244  *   save to a file
245  */
save_to_file(VMG_ class CVmFile * fp)246 void CVmObjCharSet::save_to_file(VMG_ class CVmFile *fp)
247 {
248     /* write the name length */
249     fp->write_int2(get_ext_ptr()->charset_name_len);
250 
251     /* write the bytes of the name */
252     fp->write_bytes(get_ext_ptr()->charset_name,
253                     get_ext_ptr()->charset_name_len);
254 }
255 
256 /*
257  *   restore from a file
258  */
restore_from_file(VMG_ vm_obj_id_t self,CVmFile * fp,CVmObjFixup *)259 void CVmObjCharSet::restore_from_file(VMG_ vm_obj_id_t self,
260                                       CVmFile *fp, CVmObjFixup *)
261 {
262     char buf[128];
263     size_t len;
264     size_t read_len;
265 
266     /* read the length of the character set name */
267     len = fp->read_uint2();
268 
269     /* limit the reading to the length of the buffer */
270     read_len = len;
271     if (read_len > sizeof(buf))
272         read_len = sizeof(buf);
273 
274     /* read the name, up to the buffer length */
275     fp->read_bytes(buf, read_len);
276 
277     /* skip any bytes we couldn't fit in the buffer */
278     if (len > read_len)
279         fp->set_pos(fp->get_pos() + len - read_len);
280 
281     /* initialize from the saved data */
282     alloc_ext(vmg_ buf, read_len);
283 }
284 
285 /* ------------------------------------------------------------------------ */
286 /*
287  *   Compare for equality
288  */
equals(VMG_ vm_obj_id_t self,const vm_val_t * val,int) const289 int CVmObjCharSet::equals(VMG_ vm_obj_id_t self, const vm_val_t *val,
290                           int /*depth*/) const
291 {
292     CVmObjCharSet *other;
293     const vmobj_charset_ext_t *ext;
294     const vmobj_charset_ext_t *other_ext;
295 
296     /* if it's a self-reference, it's certainly equal */
297     if (val->typ == VM_OBJ && val->val.obj == self)
298         return TRUE;
299 
300     /* if it's not another character set, it's not equal */
301     if (val->typ != VM_OBJ || !is_charset(vmg_ val->val.obj))
302         return FALSE;
303 
304     /* we know it's another character set - cast it */
305     other = (CVmObjCharSet *)vm_objp(vmg_ val->val.obj);
306 
307     /* get my extension and the other extension */
308     ext = get_ext_ptr();
309     other_ext = other->get_ext_ptr();
310 
311     /* it's equal if it has the same name (ignoring case) */
312     return (ext->charset_name_len == other_ext->charset_name_len
313             && memicmp(ext->charset_name, other_ext->charset_name,
314                        ext->charset_name_len) == 0);
315 }
316 
317 /*
318  *   Calculate a hash value
319  */
calc_hash(VMG_ vm_obj_id_t self,int) const320 uint CVmObjCharSet::calc_hash(VMG_ vm_obj_id_t self, int /*depth*/) const
321 {
322     uint hash;
323     size_t rem;
324     const char *p;
325 
326     /* add up the bytes in the array */
327     for (hash = 0, rem = get_ext_ptr()->charset_name_len,
328          p = get_ext_ptr()->charset_name ;
329          rem != 0 ;
330          --rem, ++p)
331     {
332         /* add this character into the hash */
333         hash += *p;
334     }
335 
336     /* return the result */
337     return hash;
338 }
339 
340 /* ------------------------------------------------------------------------ */
341 /*
342  *   property evaluator - get the character set name
343  */
getp_get_name(VMG_ vm_obj_id_t self,vm_val_t * retval,uint * argc)344 int CVmObjCharSet::getp_get_name(VMG_ vm_obj_id_t self,
345                                  vm_val_t *retval, uint *argc)
346 {
347     static CVmNativeCodeDesc desc(0);
348 
349     /* check arguments */
350     if (get_prop_check_argc(retval, argc, &desc))
351         return TRUE;
352 
353     /* create a new string for the name */
354     retval->set_obj(CVmObjString::create(vmg_ FALSE,
355                                          get_ext_ptr()->charset_name,
356                                          get_ext_ptr()->charset_name_len));
357 
358     /* handled */
359     return TRUE;
360 }
361 
362 /*
363  *   property evaluator - is known
364  */
getp_is_known(VMG_ vm_obj_id_t self,vm_val_t * retval,uint * argc)365 int CVmObjCharSet::getp_is_known(VMG_ vm_obj_id_t self,
366                                  vm_val_t *retval, uint *argc)
367 {
368     static CVmNativeCodeDesc desc(0);
369 
370     /* check arguments */
371     if (get_prop_check_argc(retval, argc, &desc))
372         return TRUE;
373 
374     /*
375      *   it's known if both of our character mappers are non-null; if either
376      *   is null, the character set is not known on this platform
377      */
378     retval->set_logical(get_ext_ptr()->to_local != 0
379                         && get_ext_ptr()->to_uni != 0);
380 
381     /* handled */
382     return TRUE;
383 }
384 
385 /*
386  *   property evaluator - check a character or a string for mappability
387  */
getp_is_mappable(VMG_ vm_obj_id_t self,vm_val_t * retval,uint * argc)388 int CVmObjCharSet::getp_is_mappable(VMG_ vm_obj_id_t self,
389                                     vm_val_t *retval, uint *argc)
390 {
391     static CVmNativeCodeDesc desc(1);
392     vm_val_t arg;
393     const char *str;
394     CCharmapToLocal *to_local;
395 
396     /* check arguments */
397     if (get_prop_check_argc(retval, argc, &desc))
398         return TRUE;
399 
400     /* get the local mapping */
401     to_local = get_to_local(vmg0_);
402 
403     /* get the argument and check what type we have */
404     G_stk->pop(&arg);
405     if ((str = arg.get_as_string(vmg0_)) != 0)
406     {
407         size_t len;
408         utf8_ptr p;
409 
410         /* get the length and skip the length prefix */
411         len = vmb_get_len(str);
412         str += VMB_LEN;
413 
414         /* presume every character will be mappable */
415         retval->set_true();
416 
417         /* check each character for mappability */
418         for (p.set((char *)str) ; len != 0 ; p.inc(&len))
419         {
420             /* check to see if this character is mappable */
421             if (!to_local->is_mappable(p.getch()))
422             {
423                 /*
424                  *   The character isn't mappable - this is an
425                  *   all-or-nothing check, so if one isn't mappable we
426                  *   return false.  Set the nil return and stop looking.
427                  */
428                 retval->set_nil();
429                 break;
430             }
431         }
432     }
433     else if (arg.typ == VM_INT)
434     {
435         /*
436          *   Check if the integer character value is mappable.  If it's out
437          *   of the 16-bit unicode range (0..0xffff), it's not mappable;
438          *   otherwise, ask the character mapper.
439          */
440         if (arg.val.intval < 0 || arg.val.intval > 0xffff)
441         {
442             /* it's out of the valid unicode range, so it's not mappable */
443             retval->set_nil();
444         }
445         else
446         {
447             /* ask the character mapper */
448             retval->set_logical(to_local->is_mappable(
449                 (wchar_t)arg.val.intval));
450         }
451     }
452 
453     /* handled */
454     return TRUE;
455 }
456 
457 /*
458  *   property evaluator - check a character or a string to see if it has a
459  *   round-trip mapping.  A round-trip mapping is one where the unicode
460  *   characters can be mapped to the local character set, then back to
461  *   unicode, yielding the exact original unicode string.
462  */
getp_is_rt_mappable(VMG_ vm_obj_id_t self,vm_val_t * retval,uint * argc)463 int CVmObjCharSet::getp_is_rt_mappable(VMG_ vm_obj_id_t self,
464                                        vm_val_t *retval, uint *argc)
465 {
466     static CVmNativeCodeDesc desc(1);
467     vm_val_t arg;
468     const char *str;
469     CCharmapToLocal *to_local;
470     CCharmapToUni *to_uni;
471 
472     /* check arguments */
473     if (get_prop_check_argc(retval, argc, &desc))
474         return TRUE;
475 
476     /* get the local and unicode mappings */
477     to_local = get_to_local(vmg0_);
478     to_uni = get_to_uni(vmg0_);
479 
480     /* get the argument and check what type we have */
481     G_stk->pop(&arg);
482     if ((str = arg.get_as_string(vmg0_)) != 0)
483     {
484         size_t len;
485         utf8_ptr p;
486 
487         /* get the length and skip the length prefix */
488         len = vmb_get_len(str);
489         str += VMB_LEN;
490 
491         /* presume every character will be mappable */
492         retval->set_true();
493 
494         /* check each character for mappability */
495         for (p.set((char *)str) ; len != 0 ; p.inc(&len))
496         {
497             /* check for round-trip mappability */
498             if (!is_rt_mappable(p.getch(), to_local, to_uni))
499             {
500                 /* nope - return false */
501                 retval->set_nil();
502                 break;
503             }
504         }
505     }
506     else if (arg.typ == VM_INT)
507     {
508         /* check the integer character for mappability */
509         if (arg.val.intval < 0 || arg.val.intval > 0xffff)
510         {
511             /* it's out of the valid unicode range, so it's not mappable */
512             retval->set_nil();
513         }
514         else
515         {
516             /* ask the character mapper */
517             retval->set_logical(is_rt_mappable(
518                 (wchar_t)arg.val.intval, to_local, to_uni));
519         }
520     }
521 
522     /* handled */
523     return TRUE;
524 }
525 
526 /*------------------------------------------------------------------------ */
527 /*
528  *   Determine if a character has a round-trip mapping.
529  */
is_rt_mappable(wchar_t c,CCharmapToLocal * to_local,CCharmapToUni * to_uni)530 int CVmObjCharSet::is_rt_mappable(wchar_t c, CCharmapToLocal *to_local,
531                                   CCharmapToUni *to_uni)
532 {
533     char lclbuf[16];
534     char unibuf[16];
535     size_t lcllen;
536     size_t unilen;
537     char *p;
538 
539     /* if there's no local mapping, it's obviously not mappable */
540     if (!to_local->is_mappable(c))
541         return FALSE;
542 
543     /*
544      *   If there's an expansion in the mapping to the local set, then there
545      *   can't be a round-trip mapping.  Expansions are inherently one-way
546      *   because they produce multiple local characters for a single unicode
547      *   character, and the reverse mapping has no way to group those
548      *   multiple local characters back into a single unicode character.
549      */
550     if (to_local->get_expansion(c, &lcllen) != 0)
551         return FALSE;
552 
553     /* get the local mapping */
554     lcllen = to_local->map_char(c, lclbuf, sizeof(lclbuf));
555 
556     /* map it back to unicode */
557     p = unibuf;
558     unilen = sizeof(unibuf);
559     unilen = to_uni->map(&p, &unilen, lclbuf, lcllen);
560 
561     /*
562      *   if the unicode mapping is one character that exactly matches the
563      *   original input character, then we have a valid round-trip mapping
564      */
565     return (unilen == utf8_ptr::s_wchar_size(c)
566             && utf8_ptr::s_getch(unibuf) == c);
567 }
568 
569 /*------------------------------------------------------------------------ */
570 /*
571  *   Get the unicode-to-local character set mapper
572  */
get_to_local(VMG0_) const573 CCharmapToLocal *CVmObjCharSet::get_to_local(VMG0_) const
574 {
575     /* if there's no mapper, throw an exception */
576     if (get_ext_ptr()->to_local == 0)
577     {
578         /* throw an UnknownCharacterSetException */
579         G_interpreter->throw_new_class(vmg_ G_predef->charset_unknown_exc,
580                                        0, "unknown character set");
581     }
582 
583     /* return the mapper */
584     return get_ext_ptr()->to_local;
585 }
586 
587 /*
588  *   Get the local-to-unicode character set mapper
589  */
get_to_uni(VMG0_) const590 CCharmapToUni *CVmObjCharSet::get_to_uni(VMG0_) const
591 {
592     /* if there's no mapper, throw an exception */
593     if (get_ext_ptr()->to_uni == 0)
594     {
595         /* throw an UnknownCharacterSetException */
596         G_interpreter->throw_new_class(vmg_ G_predef->charset_unknown_exc,
597                                        0, "unknown character set");
598     }
599 
600     /* return the mapper */
601     return get_ext_ptr()->to_uni;
602 }
603