1#!/usr/bin/perl -w 2# 3# update_emoji.pl 4# 5# This script generates the emoji.plugin.zsh emoji definitions from the Unicode 6# character data for the emoji characters. 7# 8# The data file can be found at https://unicode.org/Public/emoji/latest/emoji-data.txt 9# as referenced in Unicode TR51 (https://www.unicode.org/reports/tr51/index.html). 10# 11# This is known to work with the data file from version 1.0. It may not work with later 12# versions if the format changes. In particular, this reads line comments to get the 13# emoji character name and unicode version. 14# 15# Country names have punctuation and other non-letter characters removed from their name, 16# to avoid possible complications with having to escape the strings when using them as 17# array subscripts. The definition file seems to use some combining characters like accents 18# that get stripped during this process. 19 20use strict; 21use warnings; 22use 5.010; 23use autodie; 24 25use Path::Class; 26use File::Copy; 27 28# Parse definitions out of the data file and convert 29sub process_emoji_data_file { 30 my ( $infile, $outfilename ) = @_; 31 my $file = file($infile); 32 my $outfile = file($outfilename); 33 my $outfilebase = $outfile->basename(); 34 my $tempfilename = "$outfilename.tmp"; 35 my $tempfile = file($tempfilename); 36 my $outfh = $tempfile->openw(); 37 $outfh->print(" 38# $outfilebase - Emoji character definitions for oh-my-zsh emoji plugin 39# 40# This file is auto-generated by update_emoji.pl. Do not edit it manually. 41# 42# This contains the definition for: 43# \$emoji - which maps character names to Unicode characters 44# \$emoji_flags - maps country names to Unicode flag characters using region indicators 45 46# Main emoji 47typeset -gAH emoji 48# National flags 49typeset -gAH emoji_flags 50# Combining modifiers 51typeset -gAH emoji_mod 52 53"); 54 55 my $fh = $file->openr(); 56 my $line_num = 0; 57 while ( my $line = $fh->getline() ) { 58 $line_num++; 59 $_ = $line; 60 # Skip all-comment lines (from the header) and blank lines 61 # (But don't strip comments on normal lines; we need to parse those for 62 # the emoji names.) 63 next if /^\s*#/ or /^\s*$/; 64 65 if (/^(\S.*?\S)\s*;\s*(\w+)\s*;\s*(\w+)\s*;\s*(\w+)\s*;\s*(\w.*?)\s*#\s*V(\S+)\s\(.*?\)\s*(\w.*\S)\s*$/) { 66 my ($code, $style, $level, $modifier_status, $sources, $version, $keycap_name) 67 = ($1, $2, $3, $4, $5, $6, $7); 68 #print "code=$code style=$style level=$level modifier_status=$modifier_status sources=$sources version=$version name=$keycap_name\n"; 69 my @code_points = split /\s+/, $code; 70 my @sources = split /\s+/, $sources; 71 72 my $flag_country = ""; 73 if ( $keycap_name =~ /^flag for (\S.*?)\s*$/) { 74 $flag_country = $1; 75 } 76 77 my $zsh_code = join '', map { "\\U$_" } @code_points; 78 # Convert keycap names to valid associative array names that do not require any 79 # quoting. Works fine for most stuff, but is clumsy for flags. 80 my $omz_name = lc($keycap_name); 81 $omz_name =~ s/[^A-Za-z0-9]/_/g; 82 my $zsh_flag_country = $flag_country; 83 $zsh_flag_country =~ s/[^\p{Letter}]/_/g; 84 if ($flag_country) { 85 $outfh->print("emoji_flags[$zsh_flag_country]=\$'$zsh_code'\n"); 86 } else { 87 $outfh->print("emoji[$omz_name]=\$'$zsh_code'\n"); 88 } 89 # Modifiers are included in both the main set and their separate map, 90 # because they have a standalone representation as a color swatch. 91 if ( $modifier_status eq "modifier" ) { 92 $outfh->print("emoji_mod[$omz_name]=\$'$zsh_code'\n"); 93 } 94 } else { 95 die "Failed parsing line $line_num: '$_'"; 96 } 97 } 98 $fh->close(); 99 $outfh->print("\n"); 100 $outfh->close(); 101 102 move($tempfilename, $outfilename) 103 or die "Failed moving temp file to $outfilename: $!"; 104} 105 106my $datafile = "emoji-data.txt"; 107my $zsh_def_file = "emoji-char-definitions.zsh"; 108process_emoji_data_file($datafile, $zsh_def_file); 109 110print "Updated definition file $zsh_def_file\n"; 111 112 113 114