Class: JLDrill::Tatoeba::JapaneseIndexFile

Inherits:
DataFile
  • Object
show all
Defined in:
lib/jldrill/model/Tatoeba.rb

Constant Summary collapse

INDEX_RE =
/^(\d*)[\t](\d*)[\t](.*)/

Instance Attribute Summary collapse

Attributes inherited from DataFile

#encoding, #file, #lines, #parsed, #publisher, #stepSize

Instance Method Summary collapse

Methods inherited from DataFile

#createLines, #eof?, #findEncoding, #fraction, #load, #loaded?, #parse, #parseChunk, #parser, #readLines, #reset, #setLoaded, #shortFilename

Constructor Details

#initialize(sentences) ⇒ JapaneseIndexFile

Returns a new instance of JapaneseIndexFile.



201
202
203
204
205
206
207
# File 'lib/jldrill/model/Tatoeba.rb', line 201

def initialize(sentences)
          super()
          @sentences = sentences
          @numSentences = 0
          @usageMap = JLDrill::VocabularyUsage::Map.new
          @stepSize = 1000
end

Instance Attribute Details

#sentencesObject (readonly)

Returns the value of attribute sentences.



199
200
201
# File 'lib/jldrill/model/Tatoeba.rb', line 199

def sentences
  @sentences
end

Instance Method Details

#dataSizeObject



217
218
219
# File 'lib/jldrill/model/Tatoeba.rb', line 217

def dataSize
    @numSentences
end

#findUsageData(usageHash, b_line) ⇒ Object

Find the usage data that matches the usageHash in the supplied B line. If it doesn’t exist, return empty string



223
224
225
226
227
228
229
230
231
# File 'lib/jldrill/model/Tatoeba.rb', line 223

def findUsageData(usageHash, b_line)
    retVal = b_line.split(" ").find do |usageData|
        usageData.start_with?(usageHash)
    end
    if retVal.nil?
        retVal = ""
    end
    return retVal
end

#finishParsingObject

Don’t erase @lines because we need them later



254
255
256
# File 'lib/jldrill/model/Tatoeba.rb', line 254

def finishParsing
    setLoaded(true)
end

#parseDataOnLine(pos) ⇒ Object



233
234
235
236
237
238
239
# File 'lib/jldrill/model/Tatoeba.rb', line 233

def parseDataOnLine(pos)
    if INDEX_RE.match(@lines[pos])
        return $1.to_i, $2.to_i, $3
    else
        return 0, 0, ""
    end
end

#parseEntryObject



209
210
211
212
213
214
215
# File 'lib/jldrill/model/Tatoeba.rb', line 209

def parseEntry
    if INDEX_RE.match(@lines[@parsed])
        @numSentences += 1
        @usageMap.add_B_line($3, @parsed)
    end
    @parsed += 1
end

#search(kanji, reading) ⇒ Object



241
242
243
244
245
246
247
248
249
250
251
# File 'lib/jldrill/model/Tatoeba.rb', line 241

def search(kanji, reading)
    retVal = []
    result = @usageMap.search(kanji, reading)
    result.positions.each do |position|
        jidx, eidx, b_line = parseDataOnLine(position)
        usageData = findUsageData(result.successfulHash, b_line)
        usage = JLDrill::VocabularyUsage.from_B_line(usageData)
        retVal.push(TatoebaExample.new(jidx, eidx, usage, @sentences))
    end
    return retVal
end