Class: JDict::DictIndex

Inherits:
Object
  • Object
show all
Defined in:
lib/ruby-jdict/index.rb

Constant Summary collapse

ENTITY_REGEX =
/<!ENTITY\s([^ ]*)\s\"(.*)">/

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path) ⇒ DictIndex

Initialize a full-text search index backend for JMdict

Parameters:

  • path (String)

    path to the dictionary



14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/ruby-jdict/index.rb', line 14

def initialize(path)
  @dictionary_path = path
  @index_path = File.dirname(@dictionary_path)
  @pos_hash = {}

  raise "No dictionary found at path #{@dictionary_path}" unless File.exists? @dictionary_path

  @db_file = File.join(@index_path, "jdict.db")
  initialize_db(@db_file)

  build_index!
end

Instance Attribute Details

#pathObject (readonly)

Returns the value of attribute path.



10
11
12
# File 'lib/ruby-jdict/index.rb', line 10

def path
  @path
end

Instance Method Details

#build_index!(&block) ⇒ Integer

Builds the full-text search index

Returns:

  • (Integer)

    the number of indexed entries



42
43
44
45
46
47
48
49
# File 'lib/ruby-jdict/index.rb', line 42

def build_index!(&block)
  entries_added = do_build_index(&block) unless built?

  #make the hash from abbreviated parts of speech to full definitions
  @pos_hash ||= build_pos_hash

  entries_added
end

#built?Boolean

Returns:

  • (Boolean)


27
28
29
# File 'lib/ruby-jdict/index.rb', line 27

def built?
  @index.first_value_from( "SELECT count(*) from search" ) != 0
end

#delete!Object



31
32
33
34
35
36
37
38
# File 'lib/ruby-jdict/index.rb', line 31

def delete!
  @index.close
  @index = nil

  File.unlink(@db_file) if File.exist?(@db_file)

  initialize_db(@db_file)
end

#get_pos(pos) ⇒ String

Retrieves the definition of a part-of-speech from its abbreviation

Parameters:

  • pos (String)

    the abbreviation for the part-of-speech

Returns:

  • (String)

    the full description of the part-of-speech



82
83
84
85
# File 'lib/ruby-jdict/index.rb', line 82

def get_pos(pos)
  build_pos_hash if @pos_hash.empty?
  @pos_hash[pos_to_sym(pos)]
end

#search(term, opts = {}) ⇒ Array(Entry)

Returns the search results as an array of Entry

Parameters:

  • term (String)

    the search string

  • language (Symbol)

    the language to return results in

Returns:

  • (Array(Entry))

    the results of the search



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/ruby-jdict/index.rb', line 55

def search(term, opts = {})
  raise "Index not found at path #{@index_path}" unless File.exists? @index_path

  results = []

  query = make_query(term, opts[:exact])

  @index.execute("SELECT sequence_number, kanji, kana, senses, bm25(search) as score FROM search WHERE search MATCH ? LIMIT ?", query, opts[:max_results]) do |row|
    entry = Entry.from_sql(row)
    score = 0.0

    is_exact_match = entry.kanji.include?(term) || entry.kana.include?(term)
    score = 1.0 if is_exact_match

    should_add = !opts[:exact] || (opts[:exact] && is_exact_match)

    # add the result
    results << [score, entry] if should_add
  end

  # Sort the results by first column (score) and return only the second column (entry)
  results.sort_by { |entry| -entry[0] }.map { |entry| entry[1] }
end