1 /* 2 * Copyright (c) 2002 by Michael J. Roberts. All Rights Reserved. 3 * 4 * Please see the accompanying license file, LICENSE.TXT, for information 5 * on using and copying this software. 6 */ 7 /* 8 Name 9 vmstrcmp.h - T3 String Comparator intrinsic class 10 Function 11 Defines the String Comparator intrinsic class, which provides native 12 code that performs complex, parameterized string comparisons. We offer 13 the following customizable options for our comparisons: 14 15 - We can match exactly on case, or without regard to case. 16 17 - We can optionally match a value to a truncated reference value 18 (which allows user input to use abbreviated forms of dictionary words, for 19 example). The minimum truncation length is a settable option. 20 21 - We can use equivalence mappings that allow a given character in a 22 reference string to match different characters in value strings. For 23 example, we could specify that an "a" with an acute accent in a reference 24 string matches an unaccented "a" in a value string. Each such mapping can 25 specify result flag bits, so a caller can determine if particular 26 equivalence mappings were used in making a match. 27 28 This class implements the generic "comparator" interface, by providing 29 a hash value calculator method and a value comparison method, so a String 30 Comparator instance can be used as a Dictionary's comparator object. 31 32 StringComparator objects are immutable; all of our parameters are set 33 in the constructor. This is desirable because it allows the object to be 34 installed in a Dictionary (or any other hash table-based structure) 35 without any danger that the hash table will need to be rebuilt as long as 36 the same comparator is installed. 37 Notes 38 39 Modified 40 09/01/02 MJRoberts - Creation 41 */ 42 43 #ifndef VMSTRCMP_H 44 #define VMSTRCMP_H 45 46 #include <stdlib.h> 47 #include <os.h> 48 #include "vmtype.h" 49 #include "vmobj.h" 50 #include "vmglob.h" 51 52 53 /* ------------------------------------------------------------------------ */ 54 /* 55 * Our serialized data stream, in both the image file and a saved file, 56 * consists of: 57 * 58 * UINT2 truncation_length 59 *. UINT2 flags 60 *. UINT2 equivalence_mapping_count 61 *. UINT2 equivalence_total_value_chars 62 *. equivalence_mappings 63 * 64 * The 'flags' value consists of the following combination of bit fields: 65 * 66 * 0x0001 - the match is case-sensitive 67 * 68 * The 'equivalence_total_value_chars' gives the sum total of the value 69 * string characters in ALL of the equivalence mappings. This value is 70 * stored simply to make it easier to calculate the memory allocation 71 * needs when loading this object. 72 * 73 * Each 'equivalence_mapping' entry is arranged like this: 74 * 75 * UINT2 reference_char 76 *. UBYTE value_char_count 77 *. UINT4 uc_result_flags 78 *. UINT4 lc_result_flags 79 *. UINT2 value_char[value_char_count] 80 * 81 * Each character is given as a 16-bit Unicode value. These values map 82 * directly to the corresponding vmobj_strcmp_equiv structure entries. 83 */ 84 85 /* ------------------------------------------------------------------------ */ 86 /* 87 * Our in-memory extension. 88 */ 89 struct vmobj_strcmp_ext 90 { 91 /* 92 * The truncation length for reference strings, or zero if no 93 * truncation is allowed. This is the minimum length that we must 94 * match when the value string is shorter than the reference string. 95 */ 96 size_t trunc_len; 97 98 /* 99 * Case sensitivity. If this is true, then our matches are sensitive 100 * to case, which means that we must match each character exactly on 101 * case. If this is false, then our matches are insensitive to case, 102 * so we can match an upper-case letter to the corresponding 103 * lower-case letter. 104 */ 105 int case_sensitive; 106 107 /* 108 * Equivalence mapping table, giving the mapping for each "reference" 109 * string character. This is a two-tiered array: the first tier is 110 * indexed by the high-order 8 bits of a reference character, and 111 * gives a pointer to the second tier array, or a null pointer if 112 * there is no mapping for any character with the given high-order 8 113 * bits. The second tier is indexed by the low-order 8 bits, and 114 * gives a pointer to the equivalence mapping structure for the 115 * character, or a null pointer if there is no mapping for the 116 * character. 117 */ 118 struct vmobj_strcmp_equiv **equiv[256]; 119 }; 120 121 /* 122 * Equivalence mapping entry. Note that we don't store the reference 123 * character in a mapping structure, because we can only reach these 124 * mapping structures by indexing the mapping array with the reference 125 * character, and thus must always already know the reference character 126 * before we can even reach one of these. 127 */ 128 struct vmobj_strcmp_equiv 129 { 130 /* string of value characters matching this reference character */ 131 size_t val_ch_cnt; 132 wchar_t *val_ch; 133 134 /* 135 * Additive result flags for upper-case input matches: this value is 136 * bitwise-OR'd into the result code when this equivalence mapping is 137 * used to match the value to an upper-case input letter. 138 */ 139 unsigned long uc_result_flags; 140 141 /* additive result flags for lower-case input matches */ 142 unsigned long lc_result_flags; 143 }; 144 145 /* ------------------------------------------------------------------------ */ 146 /* 147 * String Comparator intrinsic class 148 */ 149 class CVmObjStrComp: public CVmObject 150 { 151 friend class CVmMetaclassStrComp; 152 153 public: 154 /* metaclass registration object */ 155 static class CVmMetaclass *metaclass_reg_; get_metaclass_reg()156 class CVmMetaclass *get_metaclass_reg() const { return metaclass_reg_; } 157 158 /* am I of the given metaclass? */ is_of_metaclass(class CVmMetaclass * meta)159 virtual int is_of_metaclass(class CVmMetaclass *meta) const 160 { 161 /* try my own metaclass and my base class */ 162 return (meta == metaclass_reg_ 163 || CVmObject::is_of_metaclass(meta)); 164 } 165 166 /* am I a StringComparator object? */ is_strcmp_obj(VMG_ vm_obj_id_t obj)167 static int is_strcmp_obj(VMG_ vm_obj_id_t obj) 168 { return vm_objp(vmg_ obj)->is_of_metaclass(metaclass_reg_); } 169 170 /* create dynamically using stack arguments */ 171 static vm_obj_id_t create_from_stack(VMG_ const uchar **pc_ptr, 172 uint argc); 173 174 /* 175 * call a static property - we don't have any of our own, so simply 176 * "inherit" the base class handling 177 */ call_stat_prop(VMG_ vm_val_t * result,const uchar ** pc_ptr,uint * argc,vm_prop_id_t prop)178 static int call_stat_prop(VMG_ vm_val_t *result, 179 const uchar **pc_ptr, uint *argc, 180 vm_prop_id_t prop) 181 { 182 /* defer to our base class */ 183 return CVmObject::call_stat_prop(vmg_ result, pc_ptr, argc, prop); 184 } 185 186 /* notify of deletion */ 187 void notify_delete(VMG_ int in_root_set); 188 189 /* set a property */ 190 void set_prop(VMG_ class CVmUndo *undo, 191 vm_obj_id_t self, vm_prop_id_t prop, const vm_val_t *val); 192 193 /* get a property */ 194 int get_prop(VMG_ vm_prop_id_t prop, vm_val_t *val, 195 vm_obj_id_t self, vm_obj_id_t *source_obj, uint *argc); 196 197 /* undo operations - we are immutable and hence keep no undo */ notify_new_savept()198 void notify_new_savept() { } apply_undo(VMG_ struct CVmUndoRecord *)199 void apply_undo(VMG_ struct CVmUndoRecord *) { } mark_undo_ref(VMG_ struct CVmUndoRecord *)200 void mark_undo_ref(VMG_ struct CVmUndoRecord *) { } remove_stale_undo_weak_ref(VMG_ struct CVmUndoRecord *)201 void remove_stale_undo_weak_ref(VMG_ struct CVmUndoRecord *) { } 202 203 /* we reference no other objects */ mark_refs(VMG_ uint)204 void mark_refs(VMG_ uint) { } remove_stale_weak_refs(VMG0_)205 void remove_stale_weak_refs(VMG0_) { } 206 207 /* load from an image file */ 208 void load_from_image(VMG_ vm_obj_id_t, const char *ptr, size_t); 209 210 /* rebuild for image file */ 211 virtual ulong rebuild_image(VMG_ char *buf, ulong buflen); 212 213 /* save to a file */ 214 void save_to_file(VMG_ class CVmFile *fp); 215 216 /* restore from a file */ 217 void restore_from_file(VMG_ vm_obj_id_t self, 218 class CVmFile *fp, class CVmObjFixup *fixup); 219 220 /* 221 * Direct Interface. These functions correspond to methods we expose 222 * through the get_prop() interface, but can be called directly from 223 * the C++ code of other intrinsic classes (such as Dictionary) to 224 * avoid the overhead of going through the get_prop() mechanism. 225 * These are virtual to allow derived intrinsic classes to override 226 * the implementation of the public VM-visible interface. 227 */ 228 229 /* calculate a hash value for a constant string */ 230 virtual unsigned int calc_hash(const char *str, size_t len); 231 232 /* match two strings */ 233 virtual unsigned long match_strings(const char *valstr, size_t vallen, 234 const char *refstr, size_t reflen); 235 236 protected: 237 /* create with no extension */ CVmObjStrComp()238 CVmObjStrComp() { ext_ = 0; } 239 240 /* delete my extension */ 241 void delete_ext(VMG0_); 242 243 /* get my extension data */ get_ext()244 vmobj_strcmp_ext *get_ext() const { return (vmobj_strcmp_ext *)ext_; } 245 246 /* load from an abstact stream object */ 247 void load_from_stream(VMG_ class CVmStream *str); 248 249 /* 250 * Write to an abstract stream object. Returns the number of bytes 251 * actually needed to store the object. 252 * 253 * If 'bytes_avail' is non-null, it indicates the maximum number of 254 * bytes available for writing; if we need more than this amount, we 255 * won't write anything at all, but will simply return the number of 256 * bytes we actually need. 257 */ 258 ulong write_to_stream(VMG_ class CVmStream *str, ulong *bytes_avail); 259 260 /* allocate and initialize our extension */ 261 void alloc_ext(VMG_ size_t trunc_len, int case_sensitive, 262 size_t equiv_cnt, size_t total_chars, 263 class CVmObjStrCompMapReader *reader); 264 265 /* count of equivalence mappings */ 266 void count_equiv_mappings(size_t *equiv_cnt, size_t *total_ch_cnt); 267 268 /* property evaluator - undefined property */ getp_undef(VMG_ vm_obj_id_t,vm_val_t *,uint *)269 int getp_undef(VMG_ vm_obj_id_t, vm_val_t *, uint *) { return FALSE; } 270 271 /* property evaluator - calculate a hash value */ 272 int getp_calc_hash(VMG_ vm_obj_id_t, vm_val_t *val, uint *argc); 273 274 /* property evaluator - match two values */ 275 int getp_match_values(VMG_ vm_obj_id_t, vm_val_t *val, uint *argc); 276 277 /* property evaluation function table */ 278 static int (CVmObjStrComp::*func_table_[])(VMG_ vm_obj_id_t self, 279 vm_val_t *retval, uint *argc); 280 }; 281 282 /* ------------------------------------------------------------------------ */ 283 /* 284 * Registration table object 285 */ 286 class CVmMetaclassStrComp: public CVmMetaclass 287 { 288 public: 289 /* get the global name */ get_meta_name()290 const char *get_meta_name() const { return "string-comparator/030000"; } 291 292 /* create from image file */ create_for_image_load(VMG_ vm_obj_id_t id)293 void create_for_image_load(VMG_ vm_obj_id_t id) 294 { 295 new (vmg_ id) CVmObjStrComp(); 296 G_obj_table->set_obj_gc_characteristics(id, FALSE, FALSE); 297 } 298 299 /* create from restoring from saved state */ create_for_restore(VMG_ vm_obj_id_t id)300 void create_for_restore(VMG_ vm_obj_id_t id) 301 { 302 new (vmg_ id) CVmObjStrComp(); 303 G_obj_table->set_obj_gc_characteristics(id, FALSE, FALSE); 304 } 305 306 /* create dynamically using stack arguments */ create_from_stack(VMG_ const uchar ** pc_ptr,uint argc)307 vm_obj_id_t create_from_stack(VMG_ const uchar **pc_ptr, uint argc) 308 { return CVmObjStrComp::create_from_stack(vmg_ pc_ptr, argc); } 309 310 /* call a static property */ call_stat_prop(VMG_ vm_val_t * result,const uchar ** pc_ptr,uint * argc,vm_prop_id_t prop)311 int call_stat_prop(VMG_ vm_val_t *result, 312 const uchar **pc_ptr, uint *argc, 313 vm_prop_id_t prop) 314 { 315 return CVmObjStrComp::call_stat_prop(vmg_ result, pc_ptr, argc, prop); 316 } 317 }; 318 319 #endif /* VMSTRCMP_H */ 320 321 /* 322 * Register the class 323 */ 324 VM_REGISTER_METACLASS(CVmObjStrComp) 325 326