Class: JLDrill::Dictionary

Inherits:
DataFile show all
Defined in:
lib/jldrill/model/items/Dictionary.rb

Overview

A Dictionary. It is composed of an array of entries from an EDict dictionary. These entries are parsed to create DictionaryEntry. The DictionaryEntry can then further parse the entries to create Meanings.

Direct Known Subclasses

CEDictionary, JEDictionary

Instance Attribute Summary collapse

Attributes inherited from DataFile

#encoding, #file, #lines, #parsed, #publisher, #stepSize

Instance Method Summary collapse

Methods inherited from DataFile

#createLines, #eof?, #findEncoding, #fraction, #load, #loaded?, #parse, #parseChunk, #parser, #setLoaded, #shortFilename

Constructor Details

#initializeDictionary

Returns a new instance of Dictionary.



20
21
22
23
# File 'lib/jldrill/model/items/Dictionary.rb', line 20

def initialize
    super
    @stepSize = 1000
end

Instance Attribute Details

#dictEntriesObject (readonly)

Returns the value of attribute dictEntries.



18
19
20
# File 'lib/jldrill/model/items/Dictionary.rb', line 18

def dictEntries
  @dictEntries
end

Instance Method Details

#dataSizeObject

The number of items we have indexed in the dictionary.



43
44
45
# File 'lib/jldrill/model/items/Dictionary.rb', line 43

def dataSize
    return @dictEntries.size
end

#eachVocab(&block) ⇒ Object



95
96
97
98
99
# File 'lib/jldrill/model/items/Dictionary.rb', line 95

def eachVocab(&block)
    @dictEntries.each do |word|
        block.call(word.toVocab)
    end
end

#findBinWithKanji(kanji) ⇒ Object

Find the items that may have been hashed with this kanji.



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/jldrill/model/items/Dictionary.rb', line 146

def findBinWithKanji(kanji)
    if kanji.size >= hashSize
        bin = (@kanjiHash[kanji[0..hashSize - 1]] ||= [])
    else
        keys = @kanjiHash.keys.find_all do |key|
            key.start_with?(kanji)
        end
        bin = []
        keys.each do |key|
            bin += @kanjiHash[key]
        end
    end
    if bin.empty?
        bin = findBinWithSimplified(kanji)
    end
    return bin
end

#findBinWithReading(reading) ⇒ Object

Find the items that may have been hashed with this reading.



114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/jldrill/model/items/Dictionary.rb', line 114

def findBinWithReading(reading)
    if reading.size >= hashSize
        bin = (@readingHash[reading[0..hashSize - 1]] ||= [])
    else
        keys = @readingHash.keys.find_all do |key|
            key.start_with?(reading)
        end
        bin = []
        keys.each do |key|
            bin += @readingHash[key]
        end
    end
    return bin
end

#findBinWithSimplified(kanji) ⇒ Object

Find the items that may have been hashed with this simplified kanji.



130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/jldrill/model/items/Dictionary.rb', line 130

def findBinWithSimplified(kanji)
    if kanji.size >= hashSize
        bin = (@simplifiedHash[kanji[0..hashSize - 1]] ||= [])
    else
        keys = @simplifiedHash.keys.find_all do |key|
            key.start_with?(kanji)
        end
        bin = []
        keys.each do |key|
            bin += @simplifiedHash[key]
        end
    end
    return bin
end

#findKanji(kanji) ⇒ Object

Return all the DictionaryEntry that have the kanji, kanji.



203
204
205
206
207
208
209
210
211
212
213
# File 'lib/jldrill/model/items/Dictionary.rb', line 203

def findKanji(kanji)
    relevance = kanji.size
    return findBinWithKanji(kanji).find_all do |word|
        if word.kanjiEql?(kanji)
            word.relevance = relevance
            true
        else
            false
        end
    end
end

#findKanjiStartingWith(kanji) ⇒ Object

Return all the DictionaryEntry that have kanji starting with kanji.



178
179
180
181
182
183
184
185
186
187
# File 'lib/jldrill/model/items/Dictionary.rb', line 178

def findKanjiStartingWith(kanji)
    bin = findBinWithKanji(kanji)
    if kanji.size > hashSize 
        return bin.find_all do |word|
            word.kanjiStartsWith?(kanji)
        end
    else
        return bin
    end
end

#findKanjiThatStart(kanji) ⇒ Object

Return all the words that occur at the begining of kanji



245
246
247
248
249
250
251
252
253
254
255
# File 'lib/jldrill/model/items/Dictionary.rb', line 245

def findKanjiThatStart(kanji)
    findBinWithKanji(kanji[0..hashSize - 1]).find_all do |word|
        relevance = word.kanji.size
        if word.keyStartsWithKanji?(kanji)
            word.relevance = relevance
            true
        else
            false
        end
    end
end

#findReading(reading) ⇒ Object

Return all the DictionaryEntry that have the reading, reading.



190
191
192
193
194
195
196
197
198
199
200
# File 'lib/jldrill/model/items/Dictionary.rb', line 190

def findReading(reading)
    relevance = reading.size
    return findBinWithReading(reading).find_all do |word|
        if word.readingEql?(reading)
            word.relevance = relevance
            true
        else
            false
        end
    end
end

#findReadingsStartingWith(reading) ⇒ Object

Return all the DictionaryEntry that have a reading starting with reading.



166
167
168
169
170
171
172
173
174
175
# File 'lib/jldrill/model/items/Dictionary.rb', line 166

def findReadingsStartingWith(reading)
    bin = findBinWithReading(reading)
    if reading.size > hashSize 
        return bin.find_all do |word|
            word.readingStartsWith?(reading)
        end
    else
        return bin
    end
end

#findReadingsThatStart(reading) ⇒ Object

Return all the words that occur at the begining of reading



232
233
234
235
236
237
238
239
240
241
242
# File 'lib/jldrill/model/items/Dictionary.rb', line 232

def findReadingsThatStart(reading)
    findBinWithReading(reading[0..hashSize - 1]).find_all do |word|
        relevance = word.reading.size
        if word.keyStartsWithReading?(reading)
            word.relevance = relevance
            true
        else
            false
        end
    end
end

#findWord(string) ⇒ Object



215
216
217
218
219
# File 'lib/jldrill/model/items/Dictionary.rb', line 215

def findWord(string)
    kanji = findKanji(string)
    reading = findReading(string)
    return kanji + reading
end

#findWordsThatStart(string) ⇒ Object

Return all the words that occur at the begining of the string These are sorted by size with the largest finds given first



259
260
261
262
263
# File 'lib/jldrill/model/items/Dictionary.rb', line 259

def findWordsThatStart(string)
    kanji = findKanjiThatStart(string)
    reading = findReadingsThatStart(string)
    return kanji + reading
end

#finishParsingObject

This is what to do when we are finished parsing.



108
109
110
111
# File 'lib/jldrill/model/items/Dictionary.rb', line 108

def finishParsing
    # Don't reset the lines because we need them later
    setLoaded(true)
end

#getMeaning(position) ⇒ Object

Return the meaning for the word at the position in the file. The concrete implementation should override this method.



53
54
55
# File 'lib/jldrill/model/items/Dictionary.rb', line 53

def getMeaning(position)
    return ""
end

#hashSizeObject

Ruby 1.8 and 1.9 use different counting mechanisms for the size of strings. hashSize must return the size of the character that you want to hash on. This implementation is an example. You should override it in the concrete Dictionary class.



29
30
31
# File 'lib/jldrill/model/items/Dictionary.rb', line 29

def hashSize
    return "".size
end

#hashWord(word) ⇒ Object

Hash the word in both the reading and kanji hashes so that we can find them quickly.



64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/jldrill/model/items/Dictionary.rb', line 64

def hashWord(word)
    # We will hash on the first character.
    if !word.reading.empty?
        (@readingHash[word.reading[0..hashSize - 1]] ||= []).push(word)
    end
    if !word.kanji.empty?
        (@kanjiHash[word.kanji[0..hashSize - 1]] ||= []).push(word)
    end
    if !word.simplified.empty? && !word.kanji.eql?(word.simplified)
        (@simplifiedHash[word.simplified[0..hashSize - 1]] ||= []).push(word)
    end
end

#include?(vocabulary) ⇒ Boolean

Return true if the dictionary contains this vocabulary.

Returns:

  • (Boolean)


222
223
224
225
226
227
228
229
# File 'lib/jldrill/model/items/Dictionary.rb', line 222

def include?(vocabulary)
    if vocabulary.reading.nil?
        return false
    end
    return findReading(vocabulary.reading).any? do |word|
        word.toVocab.eql?(vocabulary)
    end
end

#lengthObject



47
48
49
# File 'lib/jldrill/model/items/Dictionary.rb', line 47

def length
    return dataSize
end

#parseEntryObject

Create the indeces for the item at the current line.



102
103
104
105
# File 'lib/jldrill/model/items/Dictionary.rb', line 102

def parseEntry
    parseLine(@parsed)
    @parsed += 1
end

#parseLine(index) ⇒ Object



77
78
79
80
81
82
83
84
# File 'lib/jldrill/model/items/Dictionary.rb', line 77

def parseLine(index)
    word = getDictionaryEntry(index)
    if !word.nil?
        @dictEntries[@dictEntries.size] = word
        hashWord(word)
    end
    return word
end

#readLinesObject

Read all the lines into the buffer.



58
59
60
# File 'lib/jldrill/model/items/Dictionary.rb', line 58

def readLines
    super
end

#resetObject

Reset the dictionary back to empty



34
35
36
37
38
39
40
# File 'lib/jldrill/model/items/Dictionary.rb', line 34

def reset
    @dictEntries = []
    @readingHash = {}
    @kanjiHash = {}
    @simplifiedHash = {}
    super
end

#vocab(index) ⇒ Object



86
87
88
89
90
91
92
93
# File 'lib/jldrill/model/items/Dictionary.rb', line 86

def vocab(index)
    word = @dictEntries[index]
    if !word.nil?
        return word.toVocab
    else
        return nil
    end
end