1#!/usr/local/bin/perl 2 3use 5.014; 4 5use utf8; 6use strict; 7use autodie; 8use warnings; 9use warnings qw< FATAL utf8 >; 10use open qw< :std :utf8 >; 11use charnames qw< :full >; 12use feature qw< unicode_strings >; 13 14use re "/msux"; 15 16############################################# 17 18use File::Basename qw< basename >; 19use Carp qw< carp croak confess cluck >; 20use Encode qw< encode decode >; 21use Unicode::Normalize qw< NFD NFC NFKD NFKC >; 22 23############################################# 24 25sub compile ( ); 26sub convert_to_superscripts ( _ ); 27sub deQ ( $ ); 28sub deQQ ( $ ); 29sub dequeue ( $$ ); 30sub filter ( ); 31sub fix_encodings ( ); 32sub inits ( ); 33sub last_rites ( ); 34sub main ( ); 35 36############################################# 37 38MAIN: { 39 main(); 40 exit(0); 41} 42die "NOT REACHED"; 43 44############################################# 45 46 47sub fix_encodings() { 48 if (grep /\P{ASCII}/ => @ARGV) { 49 @ARGV = map { decode("UTF-8", $_) } @ARGV; 50 } 51} 52 53sub inits() { 54 last_rites(); 55 fix_encodings(); 56 $0 = basename($0); # shorter messages 57 $| = 1; 58} 59 60sub last_rites() { 61 62 $SIG{__DIE__} = sub { 63 confess "Uncaught exception: @_" unless $^S; 64 }; 65 66 $SIG{__WARN__} = sub { 67 if ($^S) { cluck "Trapped warning: @_" } 68 else { confess "Deadly warning: @_" } 69 }; 70 71} 72 73sub main() { 74 inits(); 75 compile(); 76 filter(); 77} 78 79sub compile() { 80 81 my $superscripts = q(); 82 my $originals = q(); 83 84 local $_; 85 86 binmode(DATA, ":utf8"); 87 while (<DATA>) { 88 next if / \A [\h\v] + \z /; 89 next if / ^ \h* \N{NUMBER SIGN} /; 90 chomp; 91 die "bad data line: $_" unless m{ 92 \A \h+ 93 94 (?<CHAR> 95 \H 96 ) 97 98 \h+ 99 100 (?<CPNUM> 101 \p{ahex}{4,6} 102 ) 103 104 \t 105 106 (?<NAME> 107 (?= \w ) 108 [A-Z0-9\N{SPACE}\N{HYPHEN-MINUS}] + 109 (?<= \w ) 110 ) 111 112 \z 113 }; 114 115 my($char, $cpnum, $name) = @+{qw[CHAR CPNUM NAME]}; 116 my $nfkd = NFKD($char); 117 if ($char ne $nfkd && length($nfkd) == 1) { 118 $superscripts .= $char; 119 $originals .= $nfkd; 120 } 121 } 122 123 my $code = deQ<<'LITERAL' . deQQ<<"INTERPOLATED"; 124 |Q| 125 |Q| use utf8; 126 |Q| 127 |Q| sub convert_to_superscripts (_) { 128 |Q| confess "argcount" unless @_ == 1; 129 |Q| my $string = $_[0]; 130 |Q| confess "want string" if ref $_[0]; 131LITERAL 132 |QQ| 133 |QQ| \$string =~ tr[$originals][$superscripts]; 134 |QQ| return \$string; 135 |QQ| } 136 |QQ| 137 |QQ| 'ig00' 138 |QQ| 139INTERPOLATED 140 141 eval $code || die; 142 143} 144 145sub filter() { 146 147 if (@ARGV == 0 && -t STDIN) { 148 print STDERR "$0: reading from standard input\n" 149 if -t STDERR; 150 } 151 152 eval q{ 153 END { close STDOUT } 154 1; 155 } || die; 156 157 while (my $line = <>) { 158 chomp $line; 159 my $nfline = NFD($line); 160 my $superb = convert_to_superscripts($nfline); 161 say $superb; 162 } 163 164} 165 166sub dequeue($$) { 167 my($leader, $body) = @_; 168 $body =~ s/^\s*\Q$leader\E ?//gm; 169 return $body; 170} 171 172sub deQ($) { 173 my $text = $_[0]; 174 return dequeue q<|Q|>, $text; 175} 176 177sub deQQ($) { 178 my $text = $_[0]; 179 return dequeue qq<|QQ|>, $text; 180} 181 182 183__END__ 184 ⁺ 207A SUPERSCRIPT PLUS SIGN 185 ⁻ 207B SUPERSCRIPT MINUS 186 ⁼ 207C SUPERSCRIPT EQUALS SIGN 187 ⁽ 207D SUPERSCRIPT LEFT PARENTHESIS 188 ⁾ 207E SUPERSCRIPT RIGHT PARENTHESIS 189 190 ⁰ 2070 SUPERSCRIPT ZERO 191 ¹ 00B9 SUPERSCRIPT ONE 192 ² 00B2 SUPERSCRIPT TWO 193 ³ 00B3 SUPERSCRIPT THREE 194 ⁴ 2074 SUPERSCRIPT FOUR 195 ⁵ 2075 SUPERSCRIPT FIVE 196 ⁶ 2076 SUPERSCRIPT SIX 197 ⁷ 2077 SUPERSCRIPT SEVEN 198 ⁸ 2078 SUPERSCRIPT EIGHT 199 ⁹ 2079 SUPERSCRIPT NINE 200 201 ᴬ 1D2C MODIFIER LETTER CAPITAL A 202 ᵃ 1D43 MODIFIER LETTER SMALL A 203 ᴭ 1D2D MODIFIER LETTER CAPITAL AE 204 ᵆ 1D46 MODIFIER LETTER SMALL TURNED AE 205 ᵄ 1D44 MODIFIER LETTER SMALL TURNED A 206 ᵅ 1D45 MODIFIER LETTER SMALL ALPHA 207 ᶛ 1D9B MODIFIER LETTER SMALL TURNED ALPHA 208 ᴮ 1D2E MODIFIER LETTER CAPITAL B 209 ᵇ 1D47 MODIFIER LETTER SMALL B 210 ᴯ 1D2F MODIFIER LETTER CAPITAL BARRED B 211 ᶜ 1D9C MODIFIER LETTER SMALL C 212 ᶝ 1D9D MODIFIER LETTER SMALL C WITH CURL 213 ᴰ 1D30 MODIFIER LETTER CAPITAL D 214 ᵈ 1D48 MODIFIER LETTER SMALL D 215 ᶞ 1D9E MODIFIER LETTER SMALL ETH 216 ᴱ 1D31 MODIFIER LETTER CAPITAL E 217 ᵉ 1D49 MODIFIER LETTER SMALL E 218 ᴲ 1D32 MODIFIER LETTER CAPITAL REVERSED E 219 ᵊ 1D4A MODIFIER LETTER SMALL SCHWA 220 ᵋ 1D4B MODIFIER LETTER SMALL OPEN E 221 ᶟ 1D9F MODIFIER LETTER SMALL REVERSED OPEN E 222 ᵌ 1D4C MODIFIER LETTER SMALL TURNED OPEN E 223 ᶠ 1DA0 MODIFIER LETTER SMALL F 224 ᴳ 1D33 MODIFIER LETTER CAPITAL G 225 ᵍ 1D4D MODIFIER LETTER SMALL G 226 ᶢ 1DA2 MODIFIER LETTER SMALL SCRIPT G 227 ˠ 02E0 MODIFIER LETTER SMALL GAMMA 228 ʰ 02B0 MODIFIER LETTER SMALL H 229 ᴴ 1D34 MODIFIER LETTER CAPITAL H 230 ʱ 02B1 MODIFIER LETTER SMALL H WITH HOOK 231 ʻ 02BB MODIFIER LETTER TURNED COMMA 232 ʽ 02BD MODIFIER LETTER REVERSED COMMA 233 ᴵ 1D35 MODIFIER LETTER CAPITAL I 234 ⁱ 2071 SUPERSCRIPT LATIN SMALL LETTER I 235 ᶦ 1DA6 MODIFIER LETTER SMALL CAPITAL I 236 ᵎ 1D4E MODIFIER LETTER SMALL TURNED I 237 ᶤ 1DA4 MODIFIER LETTER SMALL I WITH STROKE 238 ᶧ 1DA7 MODIFIER LETTER SMALL CAPITAL I WITH STROKE 239 ᶥ 1DA5 MODIFIER LETTER SMALL IOTA 240 ʲ 02B2 MODIFIER LETTER SMALL J 241 ᴶ 1D36 MODIFIER LETTER CAPITAL J 242 ᶨ 1DA8 MODIFIER LETTER SMALL J WITH CROSSED-TAIL 243 ᶡ 1DA1 MODIFIER LETTER SMALL DOTLESS J WITH STROKE 244 ᴷ 1D37 MODIFIER LETTER CAPITAL K 245 ᵏ 1D4F MODIFIER LETTER SMALL K 246 ˡ 02E1 MODIFIER LETTER SMALL L 247 ᴸ 1D38 MODIFIER LETTER CAPITAL L 248 ᶫ 1DAB MODIFIER LETTER SMALL CAPITAL L 249 ᶪ 1DAA MODIFIER LETTER SMALL L WITH PALATAL HOOK 250 ᶩ 1DA9 MODIFIER LETTER SMALL L WITH RETROFLEX HOOK 251 ᴹ 1D39 MODIFIER LETTER CAPITAL M 252 ᵐ 1D50 MODIFIER LETTER SMALL M 253 ᶬ 1DAC MODIFIER LETTER SMALL M WITH HOOK 254 ᴺ 1D3A MODIFIER LETTER CAPITAL N 255 ⁿ 207F SUPERSCRIPT LATIN SMALL LETTER N 256 ᶰ 1DB0 MODIFIER LETTER SMALL CAPITAL N 257 ᴻ 1D3B MODIFIER LETTER CAPITAL REVERSED N 258 ᶮ 1DAE MODIFIER LETTER SMALL N WITH LEFT HOOK 259 ᶯ 1DAF MODIFIER LETTER SMALL N WITH RETROFLEX HOOK 260 ᵑ 1D51 MODIFIER LETTER SMALL ENG 261 ᴼ 1D3C MODIFIER LETTER CAPITAL O 262 ᵒ 1D52 MODIFIER LETTER SMALL O 263 ᵓ 1D53 MODIFIER LETTER SMALL OPEN O 264 ᵔ 1D54 MODIFIER LETTER SMALL TOP HALF O 265 ᵕ 1D55 MODIFIER LETTER SMALL BOTTOM HALF O 266 ᶱ 1DB1 MODIFIER LETTER SMALL BARRED O 267 ᴽ 1D3D MODIFIER LETTER CAPITAL OU 268 ᴾ 1D3E MODIFIER LETTER CAPITAL P 269 ᵖ 1D56 MODIFIER LETTER SMALL P 270 ᶲ 1DB2 MODIFIER LETTER SMALL PHI 271 ʳ 02B3 MODIFIER LETTER SMALL R 272 ᴿ 1D3F MODIFIER LETTER CAPITAL R 273 ʴ 02B4 MODIFIER LETTER SMALL TURNED R 274 ʵ 02B5 MODIFIER LETTER SMALL TURNED R WITH HOOK 275 ʶ 02B6 MODIFIER LETTER SMALL CAPITAL INVERTED R 276 ˢ 02E2 MODIFIER LETTER SMALL S 277 ᶳ 1DB3 MODIFIER LETTER SMALL S WITH HOOK 278 ᶴ 1DB4 MODIFIER LETTER SMALL ESH 279 ᵀ 1D40 MODIFIER LETTER CAPITAL T 280 ᵗ 1D57 MODIFIER LETTER SMALL T 281 ᶵ 1DB5 MODIFIER LETTER SMALL T WITH PALATAL HOOK 282 ᵁ 1D41 MODIFIER LETTER CAPITAL U 283 ᵘ 1D58 MODIFIER LETTER SMALL U 284 ᶸ 1DB8 MODIFIER LETTER SMALL CAPITAL U 285 ᵙ 1D59 MODIFIER LETTER SMALL SIDEWAYS U 286 ᶶ 1DB6 MODIFIER LETTER SMALL U BAR 287 ᶣ 1DA3 MODIFIER LETTER SMALL TURNED H 288 ᵚ 1D5A MODIFIER LETTER SMALL TURNED M 289 ᶭ 1DAD MODIFIER LETTER SMALL TURNED M WITH LONG LEG 290 ᶷ 1DB7 MODIFIER LETTER SMALL UPSILON 291 ᵛ 1D5B MODIFIER LETTER SMALL V 292 ⱽ 2C7D MODIFIER LETTER CAPITAL V 293 ᶹ 1DB9 MODIFIER LETTER SMALL V WITH HOOK 294 ᶺ 1DBA MODIFIER LETTER SMALL TURNED V 295 ʷ 02B7 MODIFIER LETTER SMALL W 296 ᵂ 1D42 MODIFIER LETTER CAPITAL W 297 ˣ 02E3 MODIFIER LETTER SMALL X 298 ʸ 02B8 MODIFIER LETTER SMALL Y 299 ᶻ 1DBB MODIFIER LETTER SMALL Z 300 ᶼ 1DBC MODIFIER LETTER SMALL Z WITH RETROFLEX HOOK 301 ᶽ 1DBD MODIFIER LETTER SMALL Z WITH CURL 302 ᶾ 1DBE MODIFIER LETTER SMALL EZH 303 ꝰ A770 MODIFIER LETTER US 304 305 ᵜ 1D5C MODIFIER LETTER SMALL AIN 306 ᵝ 1D5D MODIFIER LETTER SMALL BETA 307 ᵞ 1D5E MODIFIER LETTER SMALL GREEK GAMMA 308 ᵟ 1D5F MODIFIER LETTER SMALL DELTA 309 ᶿ 1DBF MODIFIER LETTER SMALL THETA 310 ᵠ 1D60 MODIFIER LETTER SMALL GREEK PHI 311 ᵡ 1D61 MODIFIER LETTER SMALL CHI 312 ᵸ 1D78 MODIFIER LETTER CYRILLIC EN 313 ჼ 10FC MODIFIER LETTER GEORGIAN NAR 314 ٰ 0670 ARABIC LETTER SUPERSCRIPT ALEF 315 ܑ 0711 SYRIAC LETTER SUPERSCRIPT ALAPH 316 317 ˀ 02C0 MODIFIER LETTER GLOTTAL STOP 318 ʼ 02BC MODIFIER LETTER APOSTROPHE 319 ˮ 02EE MODIFIER LETTER DOUBLE APOSTROPHE 320 ʾ 02BE MODIFIER LETTER RIGHT HALF RING 321 ˤ 02E4 MODIFIER LETTER SMALL REVERSED GLOTTAL STOP 322 ʿ 02BF MODIFIER LETTER LEFT HALF RING 323 ˁ 02C1 MODIFIER LETTER REVERSED GLOTTAL STOP 324 325 ՙ 0559 ARMENIAN MODIFIER LETTER LEFT HALF RING 326 ⵯ 2D6F TIFINAGH MODIFIER LETTER LABIALIZATION MARK 327 ꜀ A700 MODIFIER LETTER CHINESE TONE YIN PING 328 ꜁ A701 MODIFIER LETTER CHINESE TONE YANG PING 329 ꜂ A702 MODIFIER LETTER CHINESE TONE YIN SHANG 330 ꜃ A703 MODIFIER LETTER CHINESE TONE YANG SHANG 331 ꜄ A704 MODIFIER LETTER CHINESE TONE YIN QU 332 ꜅ A705 MODIFIER LETTER CHINESE TONE YANG QU 333 ꜆ A706 MODIFIER LETTER CHINESE TONE YIN RU 334 ꜇ A707 MODIFIER LETTER CHINESE TONE YANG RU 335 ꜈ A708 MODIFIER LETTER EXTRA-HIGH DOTTED TONE BAR 336 ꜉ A709 MODIFIER LETTER HIGH DOTTED TONE BAR 337 ꜊ A70A MODIFIER LETTER MID DOTTED TONE BAR 338 ꜋ A70B MODIFIER LETTER LOW DOTTED TONE BAR 339 ꜌ A70C MODIFIER LETTER EXTRA-LOW DOTTED TONE BAR 340 ꜍ A70D MODIFIER LETTER EXTRA-HIGH DOTTED LEFT-STEM TONE BAR 341 ꜎ A70E MODIFIER LETTER HIGH DOTTED LEFT-STEM TONE BAR 342 ꜏ A70F MODIFIER LETTER MID DOTTED LEFT-STEM TONE BAR 343 ꜐ A710 MODIFIER LETTER LOW DOTTED LEFT-STEM TONE BAR 344 ꜑ A711 MODIFIER LETTER EXTRA-LOW DOTTED LEFT-STEM TONE BAR 345 ꜒ A712 MODIFIER LETTER EXTRA-HIGH LEFT-STEM TONE BAR 346 ꜓ A713 MODIFIER LETTER HIGH LEFT-STEM TONE BAR 347 ꜔ A714 MODIFIER LETTER MID LEFT-STEM TONE BAR 348 ꜕ A715 MODIFIER LETTER LOW LEFT-STEM TONE BAR 349 ꜖ A716 MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR 350 ꜗ A717 MODIFIER LETTER DOT VERTICAL BAR 351 ꜘ A718 MODIFIER LETTER DOT SLASH 352 ꜙ A719 MODIFIER LETTER DOT HORIZONTAL BAR 353 ꜚ A71A MODIFIER LETTER LOWER RIGHT CORNER ANGLE 354 ꜛ A71B MODIFIER LETTER RAISED UP ARROW 355 ꜜ A71C MODIFIER LETTER RAISED DOWN ARROW 356 ꜝ A71D MODIFIER LETTER RAISED EXCLAMATION MARK 357 ꜞ A71E MODIFIER LETTER RAISED INVERTED EXCLAMATION MARK 358 ꜟ A71F MODIFIER LETTER LOW INVERTED EXCLAMATION MARK 359 ꜠ A720 MODIFIER LETTER STRESS AND HIGH TONE 360 ꜡ A721 MODIFIER LETTER STRESS AND LOW TONE 361 ꞈ A788 MODIFIER LETTER LOW CIRCUMFLEX ACCENT 362 ꞉ A789 MODIFIER LETTER COLON 363 ꞊ A78A MODIFIER LETTER SHORT EQUALS SIGN 364 ː 02D0 MODIFIER LETTER TRIANGULAR COLON 365 ˑ 02D1 MODIFIER LETTER HALF TRIANGULAR COLON 366 ꩰ AA70 MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION 367 ʹ 02B9 MODIFIER LETTER PRIME 368 ʺ 02BA MODIFIER LETTER DOUBLE PRIME 369 ˂ 02C2 MODIFIER LETTER LEFT ARROWHEAD 370 ˃ 02C3 MODIFIER LETTER RIGHT ARROWHEAD 371 ˄ 02C4 MODIFIER LETTER UP ARROWHEAD 372 ˅ 02C5 MODIFIER LETTER DOWN ARROWHEAD 373 ˆ 02C6 MODIFIER LETTER CIRCUMFLEX ACCENT 374 ˈ 02C8 MODIFIER LETTER VERTICAL LINE 375 ˉ 02C9 MODIFIER LETTER MACRON 376 ˊ 02CA MODIFIER LETTER ACUTE ACCENT 377 ˋ 02CB MODIFIER LETTER GRAVE ACCENT 378 ˌ 02CC MODIFIER LETTER LOW VERTICAL LINE 379 ˍ 02CD MODIFIER LETTER LOW MACRON 380 ˎ 02CE MODIFIER LETTER LOW GRAVE ACCENT 381 ˏ 02CF MODIFIER LETTER LOW ACUTE ACCENT 382 ˒ 02D2 MODIFIER LETTER CENTRED RIGHT HALF RING 383 ˓ 02D3 MODIFIER LETTER CENTRED LEFT HALF RING 384 ˔ 02D4 MODIFIER LETTER UP TACK 385 ˕ 02D5 MODIFIER LETTER DOWN TACK 386 ˖ 02D6 MODIFIER LETTER PLUS SIGN 387 ˗ 02D7 MODIFIER LETTER MINUS SIGN 388 ˞ 02DE MODIFIER LETTER RHOTIC HOOK 389 ˟ 02DF MODIFIER LETTER CROSS ACCENT 390 ˥ 02E5 MODIFIER LETTER EXTRA-HIGH TONE BAR 391 ˦ 02E6 MODIFIER LETTER HIGH TONE BAR 392 ˧ 02E7 MODIFIER LETTER MID TONE BAR 393 ˨ 02E8 MODIFIER LETTER LOW TONE BAR 394 ˩ 02E9 MODIFIER LETTER EXTRA-LOW TONE BAR 395 ˪ 02EA MODIFIER LETTER YIN DEPARTING TONE MARK 396 ˫ 02EB MODIFIER LETTER YANG DEPARTING TONE MARK 397 ˬ 02EC MODIFIER LETTER VOICING 398 ˭ 02ED MODIFIER LETTER UNASPIRATED 399 ˯ 02EF MODIFIER LETTER LOW DOWN ARROWHEAD 400 ˰ 02F0 MODIFIER LETTER LOW UP ARROWHEAD 401 ˱ 02F1 MODIFIER LETTER LOW LEFT ARROWHEAD 402 ˲ 02F2 MODIFIER LETTER LOW RIGHT ARROWHEAD 403 ˳ 02F3 MODIFIER LETTER LOW RING 404 ˴ 02F4 MODIFIER LETTER MIDDLE GRAVE ACCENT 405 ˵ 02F5 MODIFIER LETTER MIDDLE DOUBLE GRAVE ACCENT 406 ˶ 02F6 MODIFIER LETTER MIDDLE DOUBLE ACUTE ACCENT 407 ˷ 02F7 MODIFIER LETTER LOW TILDE 408 ˸ 02F8 MODIFIER LETTER RAISED COLON 409 ˹ 02F9 MODIFIER LETTER BEGIN HIGH TONE 410 ˺ 02FA MODIFIER LETTER END HIGH TONE 411 ˻ 02FB MODIFIER LETTER BEGIN LOW TONE 412 ˼ 02FC MODIFIER LETTER END LOW TONE 413 ˽ 02FD MODIFIER LETTER SHELF 414 ˾ 02FE MODIFIER LETTER OPEN SHELF 415 ˿ 02FF MODIFIER LETTER LOW LEFT ARROW 416 ∇ 2207 NABLA 417