1 2 #define PERL_NO_GET_CONTEXT /* we want efficiency */ 3 4 /* I guese no private function needs pTHX_ and aTHX_ */ 5 6 #include "EXTERN.h" 7 #include "perl.h" 8 #include "XSUB.h" 9 10 /* This file is prepared by mkheader */ 11 #include "ucatbl.h" 12 13 /* At present, char > 0x10ffff are unaffected without complaint, right? */ 14 #define VALID_UTF_MAX (0x10ffff) 15 #define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv)) 16 17 #define MAX_DIV_16 (UV_MAX / 16) 18 19 /* Supported Levels */ 20 #define MinLevel (1) 21 #define MaxLevel (4) 22 23 /* Shifted weight at 4th level */ 24 #define Shift4Wt (0xFFFF) 25 26 #define VCE_Length (9) 27 28 #define Hangul_SBase (0xAC00) 29 #define Hangul_SIni (0xAC00) 30 #define Hangul_SFin (0xD7A3) 31 #define Hangul_NCount (588) 32 #define Hangul_TCount (28) 33 #define Hangul_LBase (0x1100) 34 #define Hangul_LIni (0x1100) 35 #define Hangul_LFin (0x1159) 36 #define Hangul_LFill (0x115F) 37 #define Hangul_LEnd (0x115F) /* Unicode 5.2 */ 38 #define Hangul_VBase (0x1161) 39 #define Hangul_VIni (0x1160) /* from Vowel Filler */ 40 #define Hangul_VFin (0x11A2) 41 #define Hangul_VEnd (0x11A7) /* Unicode 5.2 */ 42 #define Hangul_TBase (0x11A7) /* from "no-final" codepoint */ 43 #define Hangul_TIni (0x11A8) 44 #define Hangul_TFin (0x11F9) 45 #define Hangul_TEnd (0x11FF) /* Unicode 5.2 */ 46 #define HangulL2Ini (0xA960) /* Unicode 5.2 */ 47 #define HangulL2Fin (0xA97C) /* Unicode 5.2 */ 48 #define HangulV2Ini (0xD7B0) /* Unicode 5.2 */ 49 #define HangulV2Fin (0xD7C6) /* Unicode 5.2 */ 50 #define HangulT2Ini (0xD7CB) /* Unicode 5.2 */ 51 #define HangulT2Fin (0xD7FB) /* Unicode 5.2 */ 52 53 #define CJK_UidIni (0x4E00) 54 #define CJK_UidFin (0x9FA5) 55 #define CJK_UidF41 (0x9FBB) /* Unicode 4.1 */ 56 #define CJK_UidF51 (0x9FC3) /* Unicode 5.1 */ 57 #define CJK_UidF52 (0x9FCB) /* Unicode 5.2 */ 58 #define CJK_UidF61 (0x9FCC) /* Unicode 6.1 */ 59 #define CJK_UidF80 (0x9FD5) /* Unicode 8.0 */ 60 #define CJK_UidF100 (0x9FEA) /* Unicode 10.0 */ 61 62 #define CJK_ExtAIni (0x3400) /* Unicode 3.0 */ 63 #define CJK_ExtAFin (0x4DB5) /* Unicode 3.0 */ 64 #define CJK_ExtBIni (0x20000) /* Unicode 3.1 */ 65 #define CJK_ExtBFin (0x2A6D6) /* Unicode 3.1 */ 66 #define CJK_ExtCIni (0x2A700) /* Unicode 5.2 */ 67 #define CJK_ExtCFin (0x2B734) /* Unicode 5.2 */ 68 #define CJK_ExtDIni (0x2B740) /* Unicode 6.0 */ 69 #define CJK_ExtDFin (0x2B81D) /* Unicode 6.0 */ 70 #define CJK_ExtEIni (0x2B820) /* Unicode 8.0 */ 71 #define CJK_ExtEFin (0x2CEA1) /* Unicode 8.0 */ 72 #define CJK_ExtFIni (0x2CEB0) /* Unicode 10.0 */ 73 #define CJK_ExtFFin (0x2EBE0) /* Unicode 10.0 */ 74 75 #define CJK_CompIni (0xFA0E) 76 #define CJK_CompFin (0xFA29) 77 static const STDCHAR UnifiedCompat[] = { 78 1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1 79 }; /* E F 0 1 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 */ 80 81 #define TangIdeoIni (0x17000) /* Unicode 9.0 */ 82 #define TangIdeoFin (0x187EC) /* Unicode 9.0 */ 83 #define TangCompIni (0x18800) /* Unicode 9.0 */ 84 #define TangCompFin (0x18AF2) /* Unicode 9.0 */ 85 #define NushuIni (0x1B170) /* Unicode 10.0 */ 86 #define NushuFin (0x1B2FB) /* Unicode 10.0 */ 87 88 #define codeRange(bcode, ecode) ((bcode) <= code && code <= (ecode)) 89 90 MODULE = Unicode::Collate PACKAGE = Unicode::Collate 91 92 PROTOTYPES: DISABLE 93 94 void 95 _fetch_rest () 96 PREINIT: 97 char ** rest; 98 PPCODE: 99 for (rest = (char **)UCA_rest; *rest; ++rest) { 100 XPUSHs(sv_2mortal(newSVpv((char *) *rest, 0))); 101 } 102 103 104 void 105 _fetch_simple (uv) 106 UV uv 107 PREINIT: 108 U8 ***plane, **row; 109 U8* result = NULL; 110 PPCODE: 111 if (!OVER_UTF_MAX(uv)){ 112 plane = (U8***)UCA_simple[uv >> 16]; 113 if (plane) { 114 row = plane[(uv >> 8) & 0xff]; 115 result = row ? row[uv & 0xff] : NULL; 116 } 117 } 118 if (result) { 119 int i; 120 int num = (int)*result; 121 ++result; 122 EXTEND(SP, num); 123 for (i = 0; i < num; ++i) { 124 PUSHs(sv_2mortal(newSVpvn((char *) result, VCE_Length))); 125 result += VCE_Length; 126 } 127 } else { 128 PUSHs(sv_2mortal(newSViv(0))); 129 } 130 131 SV* 132 _ignorable_simple (uv) 133 UV uv 134 ALIAS: 135 _exists_simple = 1 136 PREINIT: 137 U8 ***plane, **row; 138 int num = -1; 139 U8* result = NULL; 140 CODE: 141 if (!OVER_UTF_MAX(uv)){ 142 plane = (U8***)UCA_simple[uv >> 16]; 143 if (plane) { 144 row = plane[(uv >> 8) & 0xff]; 145 result = row ? row[uv & 0xff] : NULL; 146 } 147 if (result) 148 num = (int)*result; /* assuming 0 <= num < 128 */ 149 } 150 151 if (ix) 152 RETVAL = boolSV(num >0); 153 else 154 RETVAL = boolSV(num==0); 155 OUTPUT: 156 RETVAL 157 158 159 void 160 _getHexArray (src) 161 SV* src 162 PREINIT: 163 char *s, *e; 164 STRLEN byte; 165 UV value; 166 bool overflowed = FALSE; 167 const char *hexdigit; 168 PPCODE: 169 s = SvPV(src,byte); 170 for (e = s + byte; s < e;) { 171 hexdigit = strchr((char *) PL_hexdigit, *s++); 172 if (! hexdigit) 173 continue; 174 value = (hexdigit - PL_hexdigit) & 0xF; 175 while (*s) { 176 hexdigit = strchr((char *) PL_hexdigit, *s++); 177 if (! hexdigit) 178 break; 179 if (overflowed) 180 continue; 181 if (value > MAX_DIV_16) { 182 overflowed = TRUE; 183 continue; 184 } 185 value = (value << 4) | ((hexdigit - PL_hexdigit) & 0xF); 186 } 187 XPUSHs(sv_2mortal(newSVuv(overflowed ? UV_MAX : value))); 188 } 189 190 191 SV* 192 _isIllegal (sv) 193 SV* sv 194 PREINIT: 195 UV uv; 196 CODE: 197 if (!sv || !SvIOK(sv)) 198 XSRETURN_YES; 199 uv = SvUVX(sv); 200 RETVAL = boolSV( 201 0x10FFFF < uv /* out of range */ 202 || ((uv & 0xFFFE) == 0xFFFE) /* ??FFF[EF] */ 203 || (0xD800 <= uv && uv <= 0xDFFF) /* unpaired surrogates */ 204 || (0xFDD0 <= uv && uv <= 0xFDEF) /* other non-characters */ 205 ); 206 OUTPUT: 207 RETVAL 208 209 210 void 211 _decompHangul (code) 212 UV code 213 PREINIT: 214 UV sindex, lindex, vindex, tindex; 215 PPCODE: 216 /* code *must* be in Hangul syllable. 217 * Check it before you enter here. */ 218 sindex = code - Hangul_SBase; 219 lindex = sindex / Hangul_NCount; 220 vindex = (sindex % Hangul_NCount) / Hangul_TCount; 221 tindex = sindex % Hangul_TCount; 222 223 EXTEND(SP, tindex ? 3 : 2); 224 PUSHs(sv_2mortal(newSVuv(lindex + Hangul_LBase))); 225 PUSHs(sv_2mortal(newSVuv(vindex + Hangul_VBase))); 226 if (tindex) 227 PUSHs(sv_2mortal(newSVuv(tindex + Hangul_TBase))); 228 229 230 SV* 231 getHST (code, uca_vers = 0) 232 UV code; 233 IV uca_vers; 234 PREINIT: 235 const char * hangtype; 236 STRLEN typelen; 237 CODE: 238 if (codeRange(Hangul_SIni, Hangul_SFin)) { 239 if ((code - Hangul_SBase) % Hangul_TCount) { 240 hangtype = "LVT"; typelen = 3; 241 } else { 242 hangtype = "LV"; typelen = 2; 243 } 244 } else if (uca_vers < 20) { 245 if (codeRange(Hangul_LIni, Hangul_LFin) || code == Hangul_LFill) { 246 hangtype = "L"; typelen = 1; 247 } else if (codeRange(Hangul_VIni, Hangul_VFin)) { 248 hangtype = "V"; typelen = 1; 249 } else if (codeRange(Hangul_TIni, Hangul_TFin)) { 250 hangtype = "T"; typelen = 1; 251 } else { 252 hangtype = ""; typelen = 0; 253 } 254 } else { 255 if (codeRange(Hangul_LIni, Hangul_LEnd) || 256 codeRange(HangulL2Ini, HangulL2Fin)) { 257 hangtype = "L"; typelen = 1; 258 } else if (codeRange(Hangul_VIni, Hangul_VEnd) || 259 codeRange(HangulV2Ini, HangulV2Fin)) { 260 hangtype = "V"; typelen = 1; 261 } else if (codeRange(Hangul_TIni, Hangul_TEnd) || 262 codeRange(HangulT2Ini, HangulT2Fin)) { 263 hangtype = "T"; typelen = 1; 264 } else { 265 hangtype = ""; typelen = 0; 266 } 267 } 268 269 RETVAL = newSVpvn(hangtype, typelen); 270 OUTPUT: 271 RETVAL 272 273 274 void 275 _derivCE_9 (code) 276 UV code 277 ALIAS: 278 _derivCE_14 = 1 279 _derivCE_18 = 2 280 _derivCE_20 = 3 281 _derivCE_22 = 4 282 _derivCE_24 = 5 283 _derivCE_32 = 6 284 _derivCE_34 = 7 285 _derivCE_36 = 8 286 PREINIT: 287 UV base, aaaa, bbbb; 288 U8 a[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00"; 289 U8 b[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00"; 290 bool basic_unified = 0, tangut = 0, nushu = 0; 291 PPCODE: 292 if (codeRange(CJK_UidIni, CJK_CompFin)) { 293 if (codeRange(CJK_CompIni, CJK_CompFin)) 294 basic_unified = (bool)UnifiedCompat[code - CJK_CompIni]; 295 else 296 basic_unified = (ix >= 8 ? (code <= CJK_UidF100) : 297 ix >= 6 ? (code <= CJK_UidF80) : 298 ix == 5 ? (code <= CJK_UidF61) : 299 ix >= 3 ? (code <= CJK_UidF52) : 300 ix == 2 ? (code <= CJK_UidF51) : 301 ix == 1 ? (code <= CJK_UidF41) : 302 (code <= CJK_UidFin)); 303 } else { 304 if (ix >= 7) 305 tangut = (codeRange(TangIdeoIni, TangIdeoFin) || 306 codeRange(TangCompIni, TangCompFin)); 307 if (ix >= 8) 308 nushu = (codeRange(NushuIni, NushuFin)); 309 } 310 base = tangut 311 ? 0xFB00 : 312 nushu 313 ? 0xFB01 : 314 basic_unified 315 ? 0xFB40 : /* CJK */ 316 ((codeRange(CJK_ExtAIni, CJK_ExtAFin)) 317 || 318 (codeRange(CJK_ExtBIni, CJK_ExtBFin)) 319 || 320 (ix >= 3 && codeRange(CJK_ExtCIni, CJK_ExtCFin)) 321 || 322 (ix >= 4 && codeRange(CJK_ExtDIni, CJK_ExtDFin)) 323 || 324 (ix >= 6 && codeRange(CJK_ExtEIni, CJK_ExtEFin)) 325 || 326 (ix >= 8 && codeRange(CJK_ExtFIni, CJK_ExtFFin))) 327 ? 0xFB80 /* CJK ext. */ 328 : 0xFBC0; /* others */ 329 aaaa = tangut || nushu ? base : base + (code >> 15); 330 bbbb = (tangut ? (code - TangIdeoIni) : 331 nushu ? (code - NushuIni) : (code & 0x7FFF)) | 0x8000; 332 a[1] = (U8)(aaaa >> 8); 333 a[2] = (U8)(aaaa & 0xFF); 334 b[1] = (U8)(bbbb >> 8); 335 b[2] = (U8)(bbbb & 0xFF); 336 a[4] = (U8)(0x20); /* second octet of level 2 */ 337 a[6] = (U8)(0x02); /* second octet of level 3 */ 338 a[7] = b[7] = (U8)(code >> 8); 339 a[8] = b[8] = (U8)(code & 0xFF); 340 EXTEND(SP, 2); 341 PUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length))); 342 PUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length))); 343 344 345 void 346 _derivCE_8 (code) 347 UV code 348 PREINIT: 349 UV aaaa, bbbb; 350 U8 a[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00"; 351 U8 b[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00"; 352 PPCODE: 353 aaaa = 0xFF80 + (code >> 15); 354 bbbb = (code & 0x7FFF) | 0x8000; 355 a[1] = (U8)(aaaa >> 8); 356 a[2] = (U8)(aaaa & 0xFF); 357 b[1] = (U8)(bbbb >> 8); 358 b[2] = (U8)(bbbb & 0xFF); 359 a[4] = (U8)(0x02); /* second octet of level 2 */ 360 a[6] = (U8)(0x01); /* second octet of level 3 */ 361 a[7] = b[7] = (U8)(code >> 8); 362 a[8] = b[8] = (U8)(code & 0xFF); 363 EXTEND(SP, 2); 364 PUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length))); 365 PUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length))); 366 367 368 void 369 _uideoCE_8 (code) 370 UV code 371 PREINIT: 372 U8 uice[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00"; 373 PPCODE: 374 uice[1] = uice[7] = (U8)(code >> 8); 375 uice[2] = uice[8] = (U8)(code & 0xFF); 376 uice[4] = (U8)(0x20); /* second octet of level 2 */ 377 uice[6] = (U8)(0x02); /* second octet of level 3 */ 378 PUSHs(sv_2mortal(newSVpvn((char *) uice, VCE_Length))); 379 380 381 SV* 382 _isUIdeo (code, uca_vers) 383 UV code; 384 IV uca_vers; 385 bool basic_unified = 0; 386 CODE: 387 /* uca_vers = 0 for _uideoCE_8() */ 388 if (CJK_UidIni <= code) { 389 if (codeRange(CJK_CompIni, CJK_CompFin)) 390 basic_unified = (bool)UnifiedCompat[code - CJK_CompIni]; 391 else 392 basic_unified = (uca_vers >= 36 ? (code <= CJK_UidF100) : 393 uca_vers >= 32 ? (code <= CJK_UidF80) : 394 uca_vers >= 24 ? (code <= CJK_UidF61) : 395 uca_vers >= 20 ? (code <= CJK_UidF52) : 396 uca_vers >= 18 ? (code <= CJK_UidF51) : 397 uca_vers >= 14 ? (code <= CJK_UidF41) : 398 (code <= CJK_UidFin)); 399 } 400 RETVAL = boolSV( 401 (basic_unified) 402 || 403 (codeRange(CJK_ExtAIni, CJK_ExtAFin)) 404 || 405 (uca_vers >= 8 && codeRange(CJK_ExtBIni, CJK_ExtBFin)) 406 || 407 (uca_vers >= 20 && codeRange(CJK_ExtCIni, CJK_ExtCFin)) 408 || 409 (uca_vers >= 22 && codeRange(CJK_ExtDIni, CJK_ExtDFin)) 410 || 411 (uca_vers >= 32 && codeRange(CJK_ExtEIni, CJK_ExtEFin)) 412 || 413 (uca_vers >= 36 && codeRange(CJK_ExtFIni, CJK_ExtFFin)) 414 ); 415 OUTPUT: 416 RETVAL 417 418 419 SV* 420 mk_SortKey (self, buf) 421 SV* self; 422 SV* buf; 423 PREINIT: 424 SV *dst, **svp; 425 STRLEN dlen, vlen; 426 U8 *d, *p, *e, *v, *s[MaxLevel], *eachlevel[MaxLevel]; 427 AV *bufAV; 428 HV *selfHV; 429 UV back_flag; 430 I32 i, buf_len; 431 IV lv, level, uca_vers; 432 bool upper_lower, kata_hira, v2i, last_is_var; 433 CODE: 434 if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV) 435 selfHV = (HV*)SvRV(self); 436 else 437 croak("$self is not a HASHREF."); 438 439 if (SvROK(buf) && SvTYPE(SvRV(buf)) == SVt_PVAV) 440 bufAV = (AV*)SvRV(buf); 441 else 442 croak("XSUB, not an ARRAYREF."); 443 444 buf_len = av_len(bufAV); 445 446 if (buf_len < 0) { /* empty: -1 */ 447 dlen = 2 * (MaxLevel - 1); 448 dst = newSV(dlen); 449 (void)SvPOK_only(dst); 450 d = (U8*)SvPVX(dst); 451 while (dlen--) 452 *d++ = '\0'; 453 } else { 454 svp = hv_fetch(selfHV, "level", 5, FALSE); 455 level = svp ? SvIV(*svp) : MaxLevel; 456 457 for (lv = 0; lv < level; lv++) { 458 New(0, eachlevel[lv], 2 * (1 + buf_len) + 1, U8); 459 s[lv] = eachlevel[lv]; 460 } 461 462 svp = hv_fetch(selfHV, "upper_before_lower", 18, FALSE); 463 upper_lower = svp ? SvTRUE(*svp) : FALSE; 464 svp = hv_fetch(selfHV, "katakana_before_hiragana", 24, FALSE); 465 kata_hira = svp ? SvTRUE(*svp) : FALSE; 466 svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE); 467 uca_vers = SvIV(*svp); 468 svp = hv_fetch(selfHV, "variable", 8, FALSE); 469 v2i = uca_vers >= 9 && svp /* (vers >= 9) and not (non-ignorable) */ 470 ? !(SvCUR(*svp) == 13 && memEQ(SvPVX(*svp), "non-ignorable", 13)) 471 : FALSE; 472 473 last_is_var = FALSE; 474 for (i = 0; i <= buf_len; i++) { 475 svp = av_fetch(bufAV, i, FALSE); 476 477 if (svp && SvPOK(*svp)) 478 v = (U8*)SvPV(*svp, vlen); 479 else 480 croak("not a vwt."); 481 482 if (vlen < VCE_Length) /* ignore short VCE (unexpected) */ 483 continue; 484 485 /* "Ignorable (L1, L2) after Variable" since track. v. 9 */ 486 if (v2i) { 487 if (*v) 488 last_is_var = TRUE; 489 else if (v[1] || v[2]) /* non zero primary weight */ 490 last_is_var = FALSE; 491 else if (last_is_var) /* zero primary weight; skipped */ 492 continue; 493 } 494 495 if (v[5] == 0) { /* tert wt < 256 */ 496 if (upper_lower) { 497 if (0x8 <= v[6] && v[6] <= 0xC) /* lower */ 498 v[6] -= 6; 499 else if (0x2 <= v[6] && v[6] <= 0x6) /* upper */ 500 v[6] += 6; 501 else if (v[6] == 0x1C) /* square upper */ 502 v[6]++; 503 else if (v[6] == 0x1D) /* square lower */ 504 v[6]--; 505 } 506 if (kata_hira) { 507 if (0x0F <= v[6] && v[6] <= 0x13) /* katakana */ 508 v[6] -= 2; 509 else if (0xD <= v[6] && v[6] <= 0xE) /* hiragana */ 510 v[6] += 5; 511 } 512 } 513 514 for (lv = 0; lv < level; lv++) { 515 if (v[2 * lv + 1] || v[2 * lv + 2]) { 516 *s[lv]++ = v[2 * lv + 1]; 517 *s[lv]++ = v[2 * lv + 2]; 518 } 519 } 520 } 521 522 dlen = 2 * (MaxLevel - 1); 523 for (lv = 0; lv < level; lv++) 524 dlen += s[lv] - eachlevel[lv]; 525 526 dst = newSV(dlen); 527 (void)SvPOK_only(dst); 528 d = (U8*)SvPVX(dst); 529 530 svp = hv_fetch(selfHV, "backwardsFlag", 13, FALSE); 531 back_flag = svp ? SvUV(*svp) : (UV)0; 532 533 for (lv = 0; lv < level; lv++) { 534 if (back_flag & (1 << (lv + 1))) { 535 p = s[lv]; 536 e = eachlevel[lv]; 537 for ( ; e < p; p -= 2) { 538 *d++ = p[-2]; 539 *d++ = p[-1]; 540 } 541 } 542 else { 543 p = eachlevel[lv]; 544 e = s[lv]; 545 while (p < e) 546 *d++ = *p++; 547 } 548 if (lv + 1 < MaxLevel) { /* lv + 1 == real level */ 549 *d++ = '\0'; 550 *d++ = '\0'; 551 } 552 } 553 554 for (lv = level; lv < MaxLevel; lv++) { 555 if (lv + 1 < MaxLevel) { /* lv + 1 == real level */ 556 *d++ = '\0'; 557 *d++ = '\0'; 558 } 559 } 560 561 for (lv = 0; lv < level; lv++) { 562 Safefree(eachlevel[lv]); 563 } 564 } 565 *d = '\0'; 566 SvCUR_set(dst, d - (U8*)SvPVX(dst)); 567 RETVAL = dst; 568 OUTPUT: 569 RETVAL 570 571 572 SV* 573 varCE (self, vce) 574 SV* self; 575 SV* vce; 576 PREINIT: 577 SV *dst, *vbl, **svp; 578 HV *selfHV; 579 U8 *a, *v, *d; 580 STRLEN alen, vlen; 581 bool ig_l2; 582 IV uca_vers; 583 UV totwt; 584 CODE: 585 if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV) 586 selfHV = (HV*)SvRV(self); 587 else 588 croak("$self is not a HASHREF."); 589 590 svp = hv_fetch(selfHV, "ignore_level2", 13, FALSE); 591 ig_l2 = svp ? SvTRUE(*svp) : FALSE; 592 593 svp = hv_fetch(selfHV, "variable", 8, FALSE); 594 vbl = svp ? *svp : &PL_sv_no; 595 a = (U8*)SvPV(vbl, alen); 596 v = (U8*)SvPV(vce, vlen); 597 598 dst = newSV(vlen); 599 d = (U8*)SvPVX(dst); 600 (void)SvPOK_only(dst); 601 Copy(v, d, vlen, U8); 602 SvCUR_set(dst, vlen); 603 d[vlen] = '\0'; 604 605 /* primary weight == 0 && secondary weight != 0 */ 606 if (ig_l2 && !d[1] && !d[2] && (d[3] || d[4])) { 607 d[3] = d[4] = d[5] = d[6] = '\0'; 608 } 609 610 /* variable: checked only the first char and the length, 611 trusting checkCollator() and %VariableOK in Perl ... */ 612 613 if (vlen >= VCE_Length && *a != 'n') { 614 if (*v) { 615 if (*a == 's') { /* shifted or shift-trimmed */ 616 d[7] = d[1]; /* wt level 1 to 4 */ 617 d[8] = d[2]; 618 } /* else blanked */ 619 d[1] = d[2] = d[3] = d[4] = d[5] = d[6] = '\0'; 620 } else if (*a == 's') { /* shifted or shift-trimmed */ 621 totwt = d[1] + d[2] + d[3] + d[4] + d[5] + d[6]; 622 if (alen == 7 && totwt != 0) { /* shifted */ 623 if (d[1] == 0 && d[2] == 1) { /* XXX: CollationAuxiliary-6.2.0 */ 624 d[7] = d[1]; /* wt level 1 to 4 */ 625 d[8] = d[2]; 626 } else { 627 svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE); 628 if (!svp) 629 croak("Panic: no $self->{UCA_Version} in varCE"); 630 uca_vers = SvIV(*svp); 631 632 /* completely ignorable or the second derived CE */ 633 if (uca_vers >= 36 && d[3] + d[4] + d[5] + d[6] == 0) { 634 d[7] = d[8] = '\0'; 635 } else { 636 d[7] = (U8)(Shift4Wt >> 8); 637 d[8] = (U8)(Shift4Wt & 0xFF); 638 } 639 } 640 } else { /* shift-trimmed or completely ignorable */ 641 d[7] = d[8] = '\0'; 642 } 643 } /* else blanked */ 644 } /* else non-ignorable */ 645 RETVAL = dst; 646 OUTPUT: 647 RETVAL 648 649 650 651 SV* 652 visualizeSortKey (self, key) 653 SV * self 654 SV * key 655 PREINIT: 656 HV *selfHV; 657 SV **svp, *dst; 658 U8 *s, *e, *d; 659 STRLEN klen, dlen; 660 UV uv; 661 IV uca_vers, sep = 0; 662 const char *upperhex = "0123456789ABCDEF"; 663 CODE: 664 if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV) 665 selfHV = (HV*)SvRV(self); 666 else 667 croak("$self is not a HASHREF."); 668 669 svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE); 670 if (!svp) 671 croak("Panic: no $self->{UCA_Version} in visualizeSortKey"); 672 uca_vers = SvIV(*svp); 673 674 s = (U8*)SvPV(key, klen); 675 676 /* slightly *longer* than the need, but I'm afraid of miscounting; 677 = (klen / 2) * 5 - 1 678 # FFFF and ' ' for each 16bit units but ' ' is less by 1; 679 # ' ' and '|' for level boundaries including the identical level 680 + 2 # '[' and ']' 681 + 1 # '\0' 682 (a) if klen is odd (not expected), maybe more 5 bytes. 683 (b) there is not always the identical level. 684 */ 685 dlen = (klen / 2) * 5 + MaxLevel * 2 + 2; 686 dst = newSV(dlen); 687 (void)SvPOK_only(dst); 688 d = (U8*)SvPVX(dst); 689 690 *d++ = '['; 691 for (e = s + klen; s < e; s += 2) { 692 uv = (U16)(*s << 8 | s[1]); 693 if (uv || sep >= MaxLevel) { 694 if ((d[-1] != '[') && ((9 <= uca_vers) || (d[-1] != '|'))) 695 *d++ = ' '; 696 *d++ = upperhex[ (s[0] >> 4) & 0xF ]; 697 *d++ = upperhex[ s[0] & 0xF ]; 698 *d++ = upperhex[ (s[1] >> 4) & 0xF ]; 699 *d++ = upperhex[ s[1] & 0xF ]; 700 } else { 701 if ((9 <= uca_vers) && (d[-1] != '[')) 702 *d++ = ' '; 703 *d++ = '|'; 704 ++sep; 705 } 706 } 707 *d++ = ']'; 708 *d = '\0'; 709 SvCUR_set(dst, d - (U8*)SvPVX(dst)); 710 RETVAL = dst; 711 OUTPUT: 712 RETVAL 713