Class: EastAsianWidthSimple

Inherits:
Object
  • Object
show all
Defined in:
lib/east_asian_width_simple.rb

Constant Summary collapse

Error =
Class.new(StandardError)
MissingCodePointError =
Class.new(Error)
HEX_DIGIT_REGEXP =
/\h/
PROPERTY_TO_WIDTH_MAP =
{ A: nil, F: 2, H: 1, N: nil, Na: 1, W: 2 }.freeze

Instance Method Summary collapse

Constructor Details

#initialize(east_asian_width_txt_io) ⇒ EastAsianWidthSimple

Returns a new instance of EastAsianWidthSimple.

[View source]

10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/east_asian_width_simple.rb', line 10

def initialize(east_asian_width_txt_io)
  @lookup_table = Array.new(2**21)
  east_asian_width_txt_io.each_line do |line|
    next unless line.start_with?(HEX_DIGIT_REGEXP)

    code_point, property = line.split(' ').first.split(';')
    if code_point.include?('..')
      first, last = code_point.split('..')
      @lookup_table.fill(property.to_sym, first.to_i(16)..last.to_i(16))
    else
      @lookup_table[code_point.to_i(16)] = property.to_sym
    end
  end
end

Instance Method Details

#inspectObject

[View source]

56
57
58
# File 'lib/east_asian_width_simple.rb', line 56

def inspect
  "#<#{self.class}:#{object_id}>"
end

#lookup(codepoint) ⇒ Object

[View source]

43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/east_asian_width_simple.rb', line 43

def lookup(codepoint)
  ret = @lookup_table[codepoint]
  if ret.nil?
    raise(
      MissingCodePointError,
      "Cannot find the code point 0x#{codepoint.to_s(16)} " \
      'in the lookup table.'
    )
  end

  ret
end

#lookup_width(codepoint) ⇒ Object

[View source]

30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/east_asian_width_simple.rb', line 30

def lookup_width(codepoint)
  property = lookup(codepoint)
  width = PROPERTY_TO_WIDTH_MAP[property]
  if width.nil?
    warn <<~WARNING_MESSAGE
      The code point 0x#{codepoint.to_s(16)} has the property "#{property}" \
      whose width is unknown.
    WARNING_MESSAGE
    return 1
  end
  width
end

#string_width(string) ⇒ Object

[View source]

25
26
27
28
# File 'lib/east_asian_width_simple.rb', line 25

def string_width(string)
  string = string.encode(Encoding::UTF_8) unless string.encoding == Encoding::UTF_8
  string.codepoints.sum { |codepoint| lookup_width(codepoint) }
end