Class: Canonizer

Inherits:
Object
  • Object
show all
Defined in:
app/lib/canonizer.rb

Overview

Helper for canonizing and transliterating strings

Constant Summary collapse

TRANSLITERATION_MAP =

Keys are not latin letters

{
  'а' => 'a', 'б' => 'b', 'в' => 'v', 'г' => 'g', 'д' => 'd', 'е' => 'e',
  'ё' => 'yo', 'ж' => 'zh', 'з' => 'z', 'и' => 'i', 'й' => 'j', 'к' => 'k',
  'л' => 'l', 'м' => 'm', 'н' => 'n', 'о' => 'o', 'п' => 'p', 'р' => 'r',
  'с' => 's', 'т' => 't', 'у' => 'u', 'ф' => 'f', 'х' => 'kh', 'ц' => 'c',
  'ч' => 'ch', 'ш' => 'sh', 'щ' => 'shh', 'ъ' => '', 'ы' => 'y', 'ь' => '',
  'э' => 'e', 'ю' => 'yu', 'я' => 'ya',
  'å' => 'ao', 'ä' => 'ae', 'ö' => 'oe', 'é' => 'e'
}.freeze

Class Method Summary collapse

Class Method Details

.canonize(input) ⇒ Object

Parameters:

  • input (String)


27
28
29
30
31
# File 'app/lib/canonizer.rb', line 27

def self.canonize(input)
  lowered = input.to_s.downcase.strip
  canonized = lowered.gsub(/[^a-zа-я0-9ё]/, '')
  canonized.empty? ? lowered : canonized
end

.transliterate(text) ⇒ Object

Parameters:

  • text (String)


17
18
19
20
21
22
23
24
# File 'app/lib/canonizer.rb', line 17

def self.transliterate(text)
  pattern = Regexp.new "[#{TRANSLITERATION_MAP.keys.join}]"
  result = text.to_s.downcase.gsub(pattern, TRANSLITERATION_MAP)

  a = /[^-a-z0-9_]/ # non-allowed characters will be replaced with dash
  b = /\A[-_]*([-a-z0-9_]*[a-z0-9]+)[-_]*\z/ # chop leading and trailing dash
  result.gsub(a, '-').gsub(b, '\1').gsub(/--+/, '-').gsub(/-+\z/, '')
end

.urlize(input) ⇒ Object

Parameters:

  • input (String)


34
35
36
37
# File 'app/lib/canonizer.rb', line 34

def self.urlize(input)
  lowered = input.to_s.downcase.squish
  lowered.gsub(/[^a-zа-я0-9ё]/, '-').gsub(/-+\z/, '')
end