1 /*
2 * Copyright (c) 2001, 2002 Michael J. Roberts. All Rights Reserved.
3 *
4 * Please see the accompanying license file, LICENSE.TXT, for information
5 * on using and copying this software.
6 */
7 /*
8 Name
9 vmcset.cpp - T3 CharacterSet metaclass
10 Function
11
12 Notes
13
14 Modified
15 06/06/01 MJRoberts - Creation
16 */
17
18 #include <stdlib.h>
19 #include "vmtype.h"
20 #include "vmobj.h"
21 #include "vmglob.h"
22 #include "vmcset.h"
23 #include "vmbif.h"
24 #include "vmfile.h"
25 #include "vmerrnum.h"
26 #include "vmerr.h"
27 #include "vmstack.h"
28 #include "vmmeta.h"
29 #include "vmrun.h"
30 #include "charmap.h"
31 #include "vmstr.h"
32 #include "vmpredef.h"
33 #include "vmrun.h"
34 #include "vmhost.h"
35
36
37 /* ------------------------------------------------------------------------ */
38 /*
39 * statics
40 */
41
42 /* metaclass registration object */
43 static CVmMetaclassCharSet metaclass_reg_obj;
44 CVmMetaclass *CVmObjCharSet::metaclass_reg_ = &metaclass_reg_obj;
45
46 /* function table */
47 int (CVmObjCharSet::
48 *CVmObjCharSet::func_table_[])(VMG_ vm_obj_id_t self,
49 vm_val_t *retval, uint *argc) =
50 {
51 &CVmObjCharSet::getp_undef,
52 &CVmObjCharSet::getp_get_name,
53 &CVmObjCharSet::getp_is_known,
54 &CVmObjCharSet::getp_is_mappable,
55 &CVmObjCharSet::getp_is_rt_mappable
56 };
57
58
59 /* ------------------------------------------------------------------------ */
60 /*
61 * Create from stack
62 */
create_from_stack(VMG_ const uchar ** pc_ptr,uint argc)63 vm_obj_id_t CVmObjCharSet::create_from_stack(VMG_ const uchar **pc_ptr,
64 uint argc)
65 {
66 vm_obj_id_t id;
67 vm_val_t *arg1;
68 const char *charset_name;
69
70 /* check our arguments */
71 if (argc != 1)
72 err_throw(VMERR_WRONG_NUM_OF_ARGS);
73
74 /* get the name of the character set */
75 arg1 = G_stk->get(0);
76 charset_name = arg1->get_as_string(vmg0_);
77 if (charset_name == 0)
78 err_throw(VMERR_BAD_TYPE_BIF);
79
80 /* create the character set object */
81 id = vm_new_id(vmg_ FALSE, FALSE, FALSE);
82 new (vmg_ id) CVmObjCharSet(vmg_ charset_name + VMB_LEN,
83 vmb_get_len(charset_name));
84
85 /* discard arguments */
86 G_stk->discard(argc);
87
88 /* return the new object */
89 return id;
90 }
91
92 /* ------------------------------------------------------------------------ */
93 /*
94 * Create with no contents
95 */
create(VMG_ int in_root_set)96 vm_obj_id_t CVmObjCharSet::create(VMG_ int in_root_set)
97 {
98 vm_obj_id_t id = vm_new_id(vmg_ in_root_set, FALSE, FALSE);
99 new (vmg_ id) CVmObjCharSet();
100 return id;
101 }
102
103 /*
104 * Create with the given character set name
105 */
create(VMG_ int in_root_set,const char * charset_name,size_t charset_name_len)106 vm_obj_id_t CVmObjCharSet::create(VMG_ int in_root_set,
107 const char *charset_name,
108 size_t charset_name_len)
109 {
110 vm_obj_id_t id = vm_new_id(vmg_ in_root_set, FALSE, FALSE);
111 new (vmg_ id) CVmObjCharSet(vmg_ charset_name, charset_name_len);
112 return id;
113 }
114
115 /* ------------------------------------------------------------------------ */
116 /*
117 * Instantiate
118 */
CVmObjCharSet(VMG_ const char * charset_name,size_t charset_name_len)119 CVmObjCharSet::CVmObjCharSet(VMG_ const char *charset_name,
120 size_t charset_name_len)
121 {
122 /* allocate and initialize our extension */
123 ext_ = 0;
124 alloc_ext(vmg_ charset_name, charset_name_len);
125 }
126
127 /*
128 * Allocate and initialize our extension
129 */
alloc_ext(VMG_ const char * charset_name,size_t charset_name_len)130 void CVmObjCharSet::alloc_ext(VMG_ const char *charset_name,
131 size_t charset_name_len)
132 {
133 size_t alloc_size;
134 vmobj_charset_ext_t *extp;
135 CResLoader *res_ldr;
136
137 /* if we already have an extension, delete it */
138 if (ext_ != 0)
139 G_mem->get_var_heap()->free_mem(ext_);
140
141 /*
142 * compute the size we need - note that we use the one fixed byte of
143 * the structure's name element as the extra byte we need for null
144 * termination of the name
145 */
146 alloc_size = sizeof(vmobj_charset_ext_t) + charset_name_len;
147
148 /* allocate space for our extension structure */
149 ext_ = (char *)G_mem->get_var_heap()->alloc_mem(alloc_size, this);
150
151 /* cast the extension to our structure type */
152 extp = (vmobj_charset_ext_t *)ext_;
153
154 /* store the character set name and length, null-terminating the name */
155 extp->charset_name_len = charset_name_len;
156 memcpy(extp->charset_name, charset_name, charset_name_len);
157 extp->charset_name[charset_name_len] = '\0';
158
159 /* get the resource loader */
160 res_ldr = G_host_ifc->get_cmap_res_loader();
161
162 /* if we have a resource loader, load the mappings */
163 if (res_ldr != 0)
164 {
165 /* load the unicode-to-local mapping */
166 extp->to_local = CCharmapToLocal::load(res_ldr, extp->charset_name);
167
168 /* load the local-to-unicode mapping */
169 extp->to_uni = CCharmapToUni::load(res_ldr, extp->charset_name);
170 }
171 }
172
173 /* ------------------------------------------------------------------------ */
174 /*
175 * Notify of deletion
176 */
notify_delete(VMG_ int)177 void CVmObjCharSet::notify_delete(VMG_ int /*in_root_set*/)
178 {
179 /* release our mapper objects */
180 if (ext_ != 0)
181 {
182 /* release the to-local character mapper */
183 if (get_ext_ptr()->to_local != 0)
184 get_ext_ptr()->to_local->release_ref();
185
186 /* release the to-unicode character mapper */
187 if (get_ext_ptr()->to_uni != 0)
188 get_ext_ptr()->to_uni->release_ref();
189
190 /* free our extension */
191 G_mem->get_var_heap()->free_mem(ext_);
192 }
193 }
194
195 /* ------------------------------------------------------------------------ */
196 /*
197 * set a property
198 */
set_prop(VMG_ class CVmUndo *,vm_obj_id_t,vm_prop_id_t,const vm_val_t *)199 void CVmObjCharSet::set_prop(VMG_ class CVmUndo *,
200 vm_obj_id_t, vm_prop_id_t,
201 const vm_val_t *)
202 {
203 err_throw(VMERR_INVALID_SETPROP);
204 }
205
206 /* ------------------------------------------------------------------------ */
207 /*
208 * get a property
209 */
get_prop(VMG_ vm_prop_id_t prop,vm_val_t * retval,vm_obj_id_t self,vm_obj_id_t * source_obj,uint * argc)210 int CVmObjCharSet::get_prop(VMG_ vm_prop_id_t prop, vm_val_t *retval,
211 vm_obj_id_t self, vm_obj_id_t *source_obj,
212 uint *argc)
213 {
214 ushort func_idx;
215
216 /* translate the property index to an index into our function table */
217 func_idx = G_meta_table
218 ->prop_to_vector_idx(metaclass_reg_->get_reg_idx(), prop);
219
220 /* call the appropriate function */
221 if ((this->*func_table_[func_idx])(vmg_ self, retval, argc))
222 {
223 *source_obj = metaclass_reg_->get_class_obj(vmg0_);
224 return TRUE;
225 }
226
227 /* inherit default handling */
228 return CVmObject::get_prop(vmg_ prop, retval, self, source_obj, argc);
229 }
230
231 /* ------------------------------------------------------------------------ */
232 /*
233 * load from an image file
234 */
load_from_image(VMG_ vm_obj_id_t self,const char * ptr,size_t siz)235 void CVmObjCharSet::load_from_image(VMG_ vm_obj_id_t self,
236 const char *ptr, size_t siz)
237 {
238 /* initialize with the character set name from the image file */
239 alloc_ext(vmg_ ptr + VMB_LEN, vmb_get_len(ptr));
240 }
241
242 /* ------------------------------------------------------------------------ */
243 /*
244 * save to a file
245 */
save_to_file(VMG_ class CVmFile * fp)246 void CVmObjCharSet::save_to_file(VMG_ class CVmFile *fp)
247 {
248 /* write the name length */
249 fp->write_int2(get_ext_ptr()->charset_name_len);
250
251 /* write the bytes of the name */
252 fp->write_bytes(get_ext_ptr()->charset_name,
253 get_ext_ptr()->charset_name_len);
254 }
255
256 /*
257 * restore from a file
258 */
restore_from_file(VMG_ vm_obj_id_t self,CVmFile * fp,CVmObjFixup *)259 void CVmObjCharSet::restore_from_file(VMG_ vm_obj_id_t self,
260 CVmFile *fp, CVmObjFixup *)
261 {
262 char buf[128];
263 size_t len;
264 size_t read_len;
265
266 /* read the length of the character set name */
267 len = fp->read_uint2();
268
269 /* limit the reading to the length of the buffer */
270 read_len = len;
271 if (read_len > sizeof(buf))
272 read_len = sizeof(buf);
273
274 /* read the name, up to the buffer length */
275 fp->read_bytes(buf, read_len);
276
277 /* skip any bytes we couldn't fit in the buffer */
278 if (len > read_len)
279 fp->set_pos(fp->get_pos() + len - read_len);
280
281 /* initialize from the saved data */
282 alloc_ext(vmg_ buf, read_len);
283 }
284
285 /* ------------------------------------------------------------------------ */
286 /*
287 * Compare for equality
288 */
equals(VMG_ vm_obj_id_t self,const vm_val_t * val,int) const289 int CVmObjCharSet::equals(VMG_ vm_obj_id_t self, const vm_val_t *val,
290 int /*depth*/) const
291 {
292 CVmObjCharSet *other;
293 const vmobj_charset_ext_t *ext;
294 const vmobj_charset_ext_t *other_ext;
295
296 /* if it's a self-reference, it's certainly equal */
297 if (val->typ == VM_OBJ && val->val.obj == self)
298 return TRUE;
299
300 /* if it's not another character set, it's not equal */
301 if (val->typ != VM_OBJ || !is_charset(vmg_ val->val.obj))
302 return FALSE;
303
304 /* we know it's another character set - cast it */
305 other = (CVmObjCharSet *)vm_objp(vmg_ val->val.obj);
306
307 /* get my extension and the other extension */
308 ext = get_ext_ptr();
309 other_ext = other->get_ext_ptr();
310
311 /* it's equal if it has the same name (ignoring case) */
312 return (ext->charset_name_len == other_ext->charset_name_len
313 && memicmp(ext->charset_name, other_ext->charset_name,
314 ext->charset_name_len) == 0);
315 }
316
317 /*
318 * Calculate a hash value
319 */
calc_hash(VMG_ vm_obj_id_t self,int) const320 uint CVmObjCharSet::calc_hash(VMG_ vm_obj_id_t self, int /*depth*/) const
321 {
322 uint hash;
323 size_t rem;
324 const char *p;
325
326 /* add up the bytes in the array */
327 for (hash = 0, rem = get_ext_ptr()->charset_name_len,
328 p = get_ext_ptr()->charset_name ;
329 rem != 0 ;
330 --rem, ++p)
331 {
332 /* add this character into the hash */
333 hash += *p;
334 }
335
336 /* return the result */
337 return hash;
338 }
339
340 /* ------------------------------------------------------------------------ */
341 /*
342 * property evaluator - get the character set name
343 */
getp_get_name(VMG_ vm_obj_id_t self,vm_val_t * retval,uint * argc)344 int CVmObjCharSet::getp_get_name(VMG_ vm_obj_id_t self,
345 vm_val_t *retval, uint *argc)
346 {
347 static CVmNativeCodeDesc desc(0);
348
349 /* check arguments */
350 if (get_prop_check_argc(retval, argc, &desc))
351 return TRUE;
352
353 /* create a new string for the name */
354 retval->set_obj(CVmObjString::create(vmg_ FALSE,
355 get_ext_ptr()->charset_name,
356 get_ext_ptr()->charset_name_len));
357
358 /* handled */
359 return TRUE;
360 }
361
362 /*
363 * property evaluator - is known
364 */
getp_is_known(VMG_ vm_obj_id_t self,vm_val_t * retval,uint * argc)365 int CVmObjCharSet::getp_is_known(VMG_ vm_obj_id_t self,
366 vm_val_t *retval, uint *argc)
367 {
368 static CVmNativeCodeDesc desc(0);
369
370 /* check arguments */
371 if (get_prop_check_argc(retval, argc, &desc))
372 return TRUE;
373
374 /*
375 * it's known if both of our character mappers are non-null; if either
376 * is null, the character set is not known on this platform
377 */
378 retval->set_logical(get_ext_ptr()->to_local != 0
379 && get_ext_ptr()->to_uni != 0);
380
381 /* handled */
382 return TRUE;
383 }
384
385 /*
386 * property evaluator - check a character or a string for mappability
387 */
getp_is_mappable(VMG_ vm_obj_id_t self,vm_val_t * retval,uint * argc)388 int CVmObjCharSet::getp_is_mappable(VMG_ vm_obj_id_t self,
389 vm_val_t *retval, uint *argc)
390 {
391 static CVmNativeCodeDesc desc(1);
392 vm_val_t arg;
393 const char *str;
394 CCharmapToLocal *to_local;
395
396 /* check arguments */
397 if (get_prop_check_argc(retval, argc, &desc))
398 return TRUE;
399
400 /* get the local mapping */
401 to_local = get_to_local(vmg0_);
402
403 /* get the argument and check what type we have */
404 G_stk->pop(&arg);
405 if ((str = arg.get_as_string(vmg0_)) != 0)
406 {
407 size_t len;
408 utf8_ptr p;
409
410 /* get the length and skip the length prefix */
411 len = vmb_get_len(str);
412 str += VMB_LEN;
413
414 /* presume every character will be mappable */
415 retval->set_true();
416
417 /* check each character for mappability */
418 for (p.set((char *)str) ; len != 0 ; p.inc(&len))
419 {
420 /* check to see if this character is mappable */
421 if (!to_local->is_mappable(p.getch()))
422 {
423 /*
424 * The character isn't mappable - this is an
425 * all-or-nothing check, so if one isn't mappable we
426 * return false. Set the nil return and stop looking.
427 */
428 retval->set_nil();
429 break;
430 }
431 }
432 }
433 else if (arg.typ == VM_INT)
434 {
435 /*
436 * Check if the integer character value is mappable. If it's out
437 * of the 16-bit unicode range (0..0xffff), it's not mappable;
438 * otherwise, ask the character mapper.
439 */
440 if (arg.val.intval < 0 || arg.val.intval > 0xffff)
441 {
442 /* it's out of the valid unicode range, so it's not mappable */
443 retval->set_nil();
444 }
445 else
446 {
447 /* ask the character mapper */
448 retval->set_logical(to_local->is_mappable(
449 (wchar_t)arg.val.intval));
450 }
451 }
452
453 /* handled */
454 return TRUE;
455 }
456
457 /*
458 * property evaluator - check a character or a string to see if it has a
459 * round-trip mapping. A round-trip mapping is one where the unicode
460 * characters can be mapped to the local character set, then back to
461 * unicode, yielding the exact original unicode string.
462 */
getp_is_rt_mappable(VMG_ vm_obj_id_t self,vm_val_t * retval,uint * argc)463 int CVmObjCharSet::getp_is_rt_mappable(VMG_ vm_obj_id_t self,
464 vm_val_t *retval, uint *argc)
465 {
466 static CVmNativeCodeDesc desc(1);
467 vm_val_t arg;
468 const char *str;
469 CCharmapToLocal *to_local;
470 CCharmapToUni *to_uni;
471
472 /* check arguments */
473 if (get_prop_check_argc(retval, argc, &desc))
474 return TRUE;
475
476 /* get the local and unicode mappings */
477 to_local = get_to_local(vmg0_);
478 to_uni = get_to_uni(vmg0_);
479
480 /* get the argument and check what type we have */
481 G_stk->pop(&arg);
482 if ((str = arg.get_as_string(vmg0_)) != 0)
483 {
484 size_t len;
485 utf8_ptr p;
486
487 /* get the length and skip the length prefix */
488 len = vmb_get_len(str);
489 str += VMB_LEN;
490
491 /* presume every character will be mappable */
492 retval->set_true();
493
494 /* check each character for mappability */
495 for (p.set((char *)str) ; len != 0 ; p.inc(&len))
496 {
497 /* check for round-trip mappability */
498 if (!is_rt_mappable(p.getch(), to_local, to_uni))
499 {
500 /* nope - return false */
501 retval->set_nil();
502 break;
503 }
504 }
505 }
506 else if (arg.typ == VM_INT)
507 {
508 /* check the integer character for mappability */
509 if (arg.val.intval < 0 || arg.val.intval > 0xffff)
510 {
511 /* it's out of the valid unicode range, so it's not mappable */
512 retval->set_nil();
513 }
514 else
515 {
516 /* ask the character mapper */
517 retval->set_logical(is_rt_mappable(
518 (wchar_t)arg.val.intval, to_local, to_uni));
519 }
520 }
521
522 /* handled */
523 return TRUE;
524 }
525
526 /*------------------------------------------------------------------------ */
527 /*
528 * Determine if a character has a round-trip mapping.
529 */
is_rt_mappable(wchar_t c,CCharmapToLocal * to_local,CCharmapToUni * to_uni)530 int CVmObjCharSet::is_rt_mappable(wchar_t c, CCharmapToLocal *to_local,
531 CCharmapToUni *to_uni)
532 {
533 char lclbuf[16];
534 char unibuf[16];
535 size_t lcllen;
536 size_t unilen;
537 char *p;
538
539 /* if there's no local mapping, it's obviously not mappable */
540 if (!to_local->is_mappable(c))
541 return FALSE;
542
543 /*
544 * If there's an expansion in the mapping to the local set, then there
545 * can't be a round-trip mapping. Expansions are inherently one-way
546 * because they produce multiple local characters for a single unicode
547 * character, and the reverse mapping has no way to group those
548 * multiple local characters back into a single unicode character.
549 */
550 if (to_local->get_expansion(c, &lcllen) != 0)
551 return FALSE;
552
553 /* get the local mapping */
554 lcllen = to_local->map_char(c, lclbuf, sizeof(lclbuf));
555
556 /* map it back to unicode */
557 p = unibuf;
558 unilen = sizeof(unibuf);
559 unilen = to_uni->map(&p, &unilen, lclbuf, lcllen);
560
561 /*
562 * if the unicode mapping is one character that exactly matches the
563 * original input character, then we have a valid round-trip mapping
564 */
565 return (unilen == utf8_ptr::s_wchar_size(c)
566 && utf8_ptr::s_getch(unibuf) == c);
567 }
568
569 /*------------------------------------------------------------------------ */
570 /*
571 * Get the unicode-to-local character set mapper
572 */
get_to_local(VMG0_) const573 CCharmapToLocal *CVmObjCharSet::get_to_local(VMG0_) const
574 {
575 /* if there's no mapper, throw an exception */
576 if (get_ext_ptr()->to_local == 0)
577 {
578 /* throw an UnknownCharacterSetException */
579 G_interpreter->throw_new_class(vmg_ G_predef->charset_unknown_exc,
580 0, "unknown character set");
581 }
582
583 /* return the mapper */
584 return get_ext_ptr()->to_local;
585 }
586
587 /*
588 * Get the local-to-unicode character set mapper
589 */
get_to_uni(VMG0_) const590 CCharmapToUni *CVmObjCharSet::get_to_uni(VMG0_) const
591 {
592 /* if there's no mapper, throw an exception */
593 if (get_ext_ptr()->to_uni == 0)
594 {
595 /* throw an UnknownCharacterSetException */
596 G_interpreter->throw_new_class(vmg_ G_predef->charset_unknown_exc,
597 0, "unknown character set");
598 }
599
600 /* return the mapper */
601 return get_ext_ptr()->to_uni;
602 }
603