Blame view

plugins/emoji/update_emoji.pl 3.73 KB
dcebc9e8f   mj   Squashed 'repos/r...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
  #!/usr/bin/perl -w
  #
  # update_emoji.pl
  #
  # This script generates the emoji.plugin.zsh emoji definitions from the Unicode
  # character data for the emoji characters.
  #
  # The data file can be found at http://unicode.org/Public/emoji/latest/emoji-data.txt
  # as referenced in Unicode TR51 (http://www.unicode.org/reports/tr51/index.html).
  #
  # This is known to work with the data file from version 1.0. It may not work with later
  # versions if the format changes. In particular, this reads line comments to get the
  # emoji character name and unicode version.
  #
  # Country names have punctuation and other non-letter characters removed from their name,
  # to avoid possible complications with having to escape the strings when using them as 
  # array subscripts. The definition file seems to use some combining characters like accents
  # that get stripped during this process.
  
  use strict;
  use warnings;
  use 5.010;
  use autodie;
  
  use Path::Class;
  use File::Copy;
  
  # Parse definitions out of the data file and convert
  sub process_emoji_data_file {
  	my ( $infile, $outfilename ) = @_;
  	my $file = file($infile);
  	my $outfile = file($outfilename);
  	my $outfilebase = $outfile->basename();
  	my $tempfilename = "$outfilename.tmp";
  	my $tempfile = file($tempfilename);
  	my $outfh = $tempfile->openw();
  	$outfh->print("
  # $outfilebase - Emoji character definitions for oh-my-zsh emoji plugin
  #
  # This file is auto-generated by update_emoji.pl. Do not edit it manually.
  #
  # This contains the definition for:
  #   \$emoji         - which maps character names to Unicode characters
  #   \$emoji_flags   - maps country names to Unicode flag characters using region indicators 
  
  # Main emoji
  typeset -gAH emoji
  # National flags
  typeset -gAH emoji_flags
  # Combining modifiers
  typeset -gAH emoji_mod
  
  ");
  
  	my $fh = $file->openr();
  	my $line_num = 0;
  	while ( my $line = $fh->getline() ) {
  		$line_num++;
  		$_ = $line;
  		# Skip all-comment lines (from the header) and blank lines
  		# (But don't strip comments on normal lines; we need to parse those for
  		# the emoji names.)
  		next if /^\s*#/ or /^\s*$/;
  
  		if (/^(\S.*?\S)\s*;\s*(\w+)\s*;\s*(\w+)\s*;\s*(\w+)\s*;\s*(\w.*?)\s*#\s*V(\S+)\s\(.*?\)\s*(\w.*\S)\s*$/) {
  			my ($code, $style, $level, $modifier_status, $sources, $version, $keycap_name) 
  				= ($1, $2, $3, $4, $5, $6, $7);
  			#print "code=$code style=$style level=$level modifier_status=$modifier_status sources=$sources version=$version name=$keycap_name
  ";
  			my @code_points = split /\s+/, $code;
  			my @sources = split /\s+/, $sources;
  
  			my $flag_country = "";
  			if ( $keycap_name =~ /^flag for (\S.*?)\s*$/) {
  				$flag_country = $1;
  			}
  
  			my $zsh_code = join '', map { "\\U$_" } @code_points;
  			# Convert keycap names to valid associative array names that do not require any
  			# quoting. Works fine for most stuff, but is clumsy for flags.
  			my $omz_name = lc($keycap_name);
  			$omz_name =~ s/[^A-Za-z0-9]/_/g;
  			my $zsh_flag_country = $flag_country;
  			$zsh_flag_country =~ s/[^\p{Letter}]/_/g;
  			if ($flag_country) {
  				$outfh->print("emoji_flags[$zsh_flag_country]=\$'$zsh_code'
  ");
  			} else {
  				$outfh->print("emoji[$omz_name]=\$'$zsh_code'
  ");				
  			}
  			# Modifiers are included in both the main set and their separate map,
  			# because they have a standalone representation as a color swatch.
ed37aae5b   mj   Squashed 'repos/r...
94
  			if ( $modifier_status eq "modifier" ) {
dcebc9e8f   mj   Squashed 'repos/r...
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
  				$outfh->print("emoji_mod[$omz_name]=\$'$zsh_code'
  ");
  			}
  		} else {
  			die "Failed parsing line $line_num: '$_'";
  		}
  	}
  	$fh->close();
  	$outfh->print("
  ");
  	$outfh->close();
  
  	move($tempfilename, $outfilename)
  		or die "Failed moving temp file to $outfilename: $!";
  }
  
  my $datafile = "emoji-data.txt";
  my $zsh_def_file = "emoji-char-definitions.zsh";
  process_emoji_data_file($datafile, $zsh_def_file);
  
  print "Updated definition file $zsh_def_file
  ";