Class: Numerizer

Inherits:
Object
  • Object
show all
Defined in:
lib/numerizer/numerizer.rb

Constant Summary collapse

DIRECT_NUMS =
[
  ['jedenaście', '11'],
  ['dwanaście', '12'],
  ['trzynaście', '13'],
  ['czternaście', '14'],
  ['piętnaście', '15'],
  ['szesnaście', '16'],
  ['siedemnaście', '17'],
  ['osiemnaście', '18'],
  ['dziewiętnaście', '19'],
  ['zero', '0'],
  ['jeden', '1'],
  ['dwa(\W|$)', '2\1'],
  ['trzy(\W|$)', '3\1'],
  ['cztery', '4'],  
  ['pięć(\W|$)', '5\1'], # The weird regex is so that it matches pięć but not pięćdziesiąt
  ['sześć(\W|$)', '6\1'],
  ['siedem(\W|$)', '7\1'],
  ['osiem(\W|$)', '8\1'],
  ['dziewięć(\W|$)', '9\1'],
  ['dziesięć', '10']
]
TEN_PREFIXES =
[ ['dwadzieścia', 20],
  ['trzydzieści', 30],
  ['czterdzieści', 40],
  ['pięćdziesiąt', 50],
  ['sześćdziesiąt', 60],
  ['siedemdziesiąt', 70],
  ['osiemdziesiąt', 80],
  ['dziewięćdziesiąt', 90]
]
HUNDRET_PREFIXES =
[ ['sto', 100],
  ['dwieście', 200],
  ['trzysta', 300],
  ['czterysta', 400],
  ['pięćset', 500],
  ['sześćset', 600],
  ['siedemset', 700],
  ['osiemset', 800],
  ['dziewięćset', 900],
  ['tysiąc', 1000]
]

Class Method Summary collapse

Class Method Details

.numerize(string) ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/numerizer/numerizer.rb', line 50

def self.numerize(string)
  string = string.dup

  # preprocess
  # string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words but shouldn't matter for date extraction
  # string.gsub!(/a half/, 'haAlf') # take the 'a' out so it doesn't turn into a 1, save the half for the end
  
  # easy/direct replacements
  
  DIRECT_NUMS.each do |dn|
    string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
  end
  
  # ten, twenty, etc.
  
  TEN_PREFIXES.each do |tp|
    string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) { '<num>' + (tp[1] + $1.to_i).to_s }
  end
  
  TEN_PREFIXES.each do |tp|
    string.gsub!(/#{tp[0]}/i) { '<num>' + tp[1].to_s }
  end
  
  # hundreds, thousands, millions, etc.
  
  HUNDRET_PREFIXES.each do |hp|
    string.gsub!(/#{hp[0]}/i) { '<num>' + hp[1].to_s }
    andition(string)
  end
  
  # fractional addition
  # I'm not combining this with the previous block as using float addition complicates the strings
  # (with extraneous .0's and such )
  # string.gsub!(/(\d+)(?: | and |-)*haAlf/i) { ($1.to_f + 0.5).to_s }
  
  string.gsub(/<num>/, '')
end