Class: JLDrill::Tatoeba::ChineseIndexFile

Inherits:

DataFile

Object
DataFile
JLDrill::Tatoeba::ChineseIndexFile

show all

Defined in:: lib/jldrill/model/Tatoeba.rb

Constant Summary collapse

LINK_RE =

/^(\d*)[\t](\d*)/

CHINESE_INDEX_RE =

/^(\d*)[\t]cmn/

ENGLISH_INDEX_RE =

/^(\d*)[\t]eng/

Instance Attribute Summary

Attributes inherited from DataFile

#encoding, #file, #lines, #parsed, #publisher, #stepSize

Instance Method Summary collapse

#dataSize ⇒ Object
#finishParsing ⇒ Object

Don’t erase @lines because we need them later.
#getPositions(kanji) ⇒ Object

Return an array of positions in the chineseIndeces for which the respective sentence contains the given kanji.
#initialize(sentences) ⇒ ChineseIndexFile constructor

A new instance of ChineseIndexFile.
#loaded? ⇒ Boolean
#parseEntry ⇒ Object
#search(kanji, reading) ⇒ Object

Methods inherited from DataFile

#createLines, #eof?, #findEncoding, #fraction, #load, #parse, #parseChunk, #parser, #readLines, #reset, #setLoaded, #shortFilename

Constructor Details

#initialize(sentences) ⇒ `ChineseIndexFile`

Returns a new instance of ChineseIndexFile.

# File 'lib/jldrill/model/Tatoeba.rb', line 117

def initialize(sentences)
    super()
    @sentences = sentences
    @chineseIndeces = []
    @englishIndeces = []
    @stepSize = 10000
    @ruledOut = 0
end

Instance Method Details

#dataSize ⇒ `Object`



160
161
162

# File 'lib/jldrill/model/Tatoeba.rb', line 160

def dataSize
    @chineseIndeces.size
end

#finishParsing ⇒ `Object`

Don’t erase @lines because we need them later



165
166
167

# File 'lib/jldrill/model/Tatoeba.rb', line 165

def finishParsing
    setLoaded(true)
end

#getPositions(kanji) ⇒ `Object`

Return an array of positions in the chineseIndeces for which the respective sentence contains the given kanji

# File 'lib/jldrill/model/Tatoeba.rb', line 176

def getPositions(kanji)
    return (0..@chineseIndeces.size - 1).find_all do |i|
        @sentences.sentenceAt(@chineseIndeces[i]).match(kanji)
    end
end

#loaded? ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/jldrill/model/Tatoeba.rb', line 169

def loaded?
    retVal = super
    return retVal
end

#parseEntry ⇒ `Object`

# File 'lib/jldrill/model/Tatoeba.rb', line 126

def parseEntry
    if LINK_RE.match(@lines[@parsed])
        cindex = $1.to_i
        eindex = $2.to_i
        # We are only intereste in Chinese sentences.  We'll
        # first check the index on the left hand side.  If it is
        # not Chinese, we will ignore all the rest of the entries
        # with the same index (they are in order of the left hand side
        # so we just have to keep track of the last one).  If it is
        # Chinese, we will keep checking the right hand entry
        # until we find English.  Then we will ignore all the rest
        # of the entries.
        if cindex != @ruledOut
            chinese = @sentences.dataAt(cindex)
            english = @sentences.dataAt(eindex)
            if CHINESE_INDEX_RE.match(chinese)
                if ENGLISH_INDEX_RE.match(english)
                    @chineseIndeces.push(cindex)
                    @englishIndeces.push(eindex)
                    # We've found the English for this Chinese
                    # sentence, so don't process the following ones
                    # with the same index
                    @ruledOut = cindex
                end
            else
                # It's not a Chinese sentences, so don't process
                # the following ones with the same index
                @ruledOut = cindex
            end
        end
    end
    @parsed += 1
end

#search(kanji, reading) ⇒ `Object`

# File 'lib/jldrill/model/Tatoeba.rb', line 182

def search(kanji, reading)
    retVal = []
    positions = getPositions(kanji)
    positions.each do |i|
        cindex = @chineseIndeces[i]
        eindex = @englishIndeces[i]
        usage = JLDrill::VocabularyUsage.from_B_line(kanji)
        retVal.push(TatoebaExample.new(cindex, eindex, usage, @sentences))
    end
    return retVal
end

Class: JLDrill::Tatoeba::ChineseIndexFile

Constant Summary collapse

Instance Attribute Summary

Attributes inherited from DataFile

Instance Method Summary collapse

Methods inherited from DataFile

Constructor Details

#initialize(sentences) ⇒ ChineseIndexFile

Instance Method Details

#dataSize ⇒ Object

#finishParsing ⇒ Object

#getPositions(kanji) ⇒ Object

#loaded? ⇒ Boolean

#parseEntry ⇒ Object