Files
gemoji/script/benchmark
2015-05-18 17:41:13 +02:00

43 lines
1.2 KiB
Ruby

#!/usr/bin/env ruby
require 'emoji'
require 'benchmark'
# Build a regexp that matches all native emoji characters.
# Some emoji code point sequences are prefixes of other emoji code point
# sequences, e.g.:
# U+2728 SPARKLES
# vs.
# U+2728 SPARKLES U+FE0F VARIATION SELECTOR-16
# We sort the code point sequences longest-first so that the regex will
# match the longest possible sequence.
def unicodes_pattern
$unicodes_pattern ||= Regexp.new(emoji_unicodes.sort_by(&:length).reverse.join("|"))
end
def emoji_unicodes
Emoji.all.flat_map(&:unicode_aliases)
end
def unicode_emoji_filter(text)
text.gsub(unicodes_pattern) do |unicode|
emoji = Emoji.find_by_unicode(unicode)
"<g-emoji alias='#{emoji.name}'>"
end
end
def gsub_unicode(text)
Emoji.gsub_unicode(text) do |emoji|
"<g-emoji alias='#{emoji.name}'>"
end
end
data_file = File.join(File.dirname(File.realpath(__FILE__)), '../db/emoji.json')
raw = File.open(data_file, 'r:UTF-8') { |file| file.read }
raw = raw * 100
puts "Benchmarking #{raw.bytesize} bytes..."
puts Benchmark.measure('unicode_emoji_filter') { unicode_emoji_filter(raw) }
puts Benchmark.measure('Emoji#gsub_unicode') { gsub_unicode(raw) }