Class: Proso::Dictionary
- Inherits:
-
Object
- Object
- Proso::Dictionary
- Includes:
- Treat::Core::DSL
- Defined in:
- lib/proso.rb
Constant Summary collapse
- SEMI_SCUDS =
%w{a all am an and are as at be been but by can could dear did do does else for from get got had has have he her hers him his how i if in is it its just least let like may me might most must my no nor not of off on or our own said say says she should since so some than that the their them then there these they this tis to too twas us wants was we went were what when where which while who whom why will with would yet you your}
Class Method Summary collapse
Instance Method Summary collapse
- #attridgify(string) ⇒ Object
-
#initialize ⇒ Dictionary
constructor
A new instance of Dictionary.
- #inspect ⇒ Object
- #levenshtein_distance(s1, s2) ⇒ Object
- #load_from_json(filepath) ⇒ Object
- #stress_distance(string, meter) ⇒ Object
- #stresses(string) ⇒ Object
- #token_stress(string) ⇒ Object
Constructor Details
#initialize ⇒ Dictionary
11 12 13 |
# File 'lib/proso.rb', line 11 def initialize @dict = load_from_json(File.('../../data/cmudict.json', __FILE__)) end |
Class Method Details
.serialize_cmudict(filepath) ⇒ Object
21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/proso.rb', line 21 def self.serialize_cmudict(filepath) cmudict = File.open(filepath, 'r'){ |f| f.read } matches = cmudict.scan(/^([\w\d'-]+)(\(\d+\))?\s+([\w\d ]+)$/) dict = matches.inject({}) do |memo, e| memo[e[0]] = memo[e[0]] ? memo[e[0]] << e[2] : [e[2]] memo end File.open("#{filepath}", "w") do |f| f.write(dict.to_json) end end |
Instance Method Details
#attridgify(string) ⇒ Object
69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
# File 'lib/proso.rb', line 69 def attridgify(string) # Basically we coerce the stress pattern into the best plausible # Attridge beat/offbeat representation. This considers any number of # Attridge-permissible changes equally, so it won't differentiate between # the simplicity of the string's fulfillment of a metrical contract. string .gsub(/www/, 'oBo') .gsub(/sss/, 'BoB') .gsub(/^ss/, 'oB') .gsub(/s/, 'B') .gsub(/w/, 'o') .gsub(/BBB/, 'BoB') .gsub(/ooo/, 'oBo') end |
#inspect ⇒ Object
33 34 35 |
# File 'lib/proso.rb', line 33 def inspect "#<Prosody::Dictionary:#{object_id}>" end |
#levenshtein_distance(s1, s2) ⇒ Object
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# File 'lib/proso.rb', line 84 def levenshtein_distance(s1, s2) m = s1.length n = s2.length return m if n == 0 return n if m == 0 d = Array.new(m+1) {Array.new(n+1)} (0..m).each{ |i| d[i][0] = i } (0..n).each{ |j| d[0][j] = j } (1..n).each do |j| (1..m).each do |i| d[i][j] = if s1[i-1] == s2[j-1] d[i-1][j-1] else [ d[i-1][j] +1, d[i][j-1]+1, d[i-1][j-1]+1 ].min end end end d[m][n] end |
#load_from_json(filepath) ⇒ Object
15 16 17 18 19 |
# File 'lib/proso.rb', line 15 def load_from_json(filepath) File.open("#{filepath}", "r") do |f| JSON.parse(File.read(f)) end end |
#stress_distance(string, meter) ⇒ Object
37 38 39 40 41 42 43 44 45 |
# File 'lib/proso.rb', line 37 def stress_distance(string, meter) begin stresses(string) .map{ |s| levenshtein_distance(s, meter*(s.size / 2)) }.min rescue RuntimeError => e puts e return Float::INFINITY end end |
#stresses(string) ⇒ Object
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
# File 'lib/proso.rb', line 52 def stresses(string) p = phrase(string) .do(:tokenize) .map(&:value) .reject{ |t| /[[:punct:]]/.match(t) } .map{ |t| token_stress(t) } first, *rest = *p combinations = first .product(*rest) .map(&:join) .map{ |t| t.gsub(/\D/, '') } .map{ |t| t.gsub(/[123]/, 's') } .map{ |t| t.gsub(/0/, 'w') } .map{ |t| attridgify(t) } .uniq end |
#token_stress(string) ⇒ Object
47 48 49 50 |
# File 'lib/proso.rb', line 47 def token_stress(string) return ["0"] if SEMI_SCUDS.include? string.downcase @dict[string.upcase] or raise "No pronunciation entry for '#{string}'" end |