Class: FastaReader

Inherits:

Object

Object
FastaReader

show all

Defined in:: lib/bio-table/parsers/fastareader.rb

Instance Method Summary collapse

#close ⇒ Object
#digest_tag(tag) ⇒ Object
#each ⇒ Object

returns a FastaRecord for every item (invokes parse_each).
#first ⇒ Object
#get(id) ⇒ Object

Return a record by its id, nil when not found.
#get_by_index(idx) ⇒ Object
#get_rec(fpos) ⇒ Object
#initialize(fn, regex = nil) ⇒ FastaReader constructor

Initalize the reader of FASTA file fn.
#parse_each ⇒ Object

Parse the FASTA file and yield id, descr, sequence.
#size ⇒ Object

Returns the size of the dataset - as read.

Constructor Details

#initialize(fn, regex = nil) ⇒ `FastaReader`

Initalize the reader of FASTA file fn. Options can be :regex and :index (true/false)

# File 'lib/bio-table/parsers/fastareader.rb', line 19

def initialize fn, regex = nil
  @logger = Bio::Log::LoggerPlus['bio-table']
  @f = File.open(fn)
  @fread_once = false
  @regex = regex
  @regex = '^(\S+)' if @regex == nil
  @regex = '('+regex+')' if regex !~ /\(/
  @logger.info "Parsing FASTA with ID regex '"+@regex+"'"
end

Instance Method Details

#close ⇒ `Object`



126
127
128

# File 'lib/bio-table/parsers/fastareader.rb', line 126

def close
  @f.close
end

#digest_tag(tag) ⇒ `Object`

# File 'lib/bio-table/parsers/fastareader.rb', line 106

def digest_tag tag
  if tag =~ /^>/
    descr = $'.strip
    matches = /#{@regex}/.match(descr).captures
    if matches.size > 0
      # p matches
      return matches.join("\t"), descr
    end
    p descr  # do not remove these
    p @regex
  end
  raise "Can not digest '#{tag}' using '"+@regex+"'"
end

#each ⇒ `Object`

returns a FastaRecord for every item (invokes parse_each)



64
65
66

# File 'lib/bio-table/parsers/fastareader.rb', line 64

def each
  parse_each { | id, descr, seq | yield FastaRecord.new(id, descr, seq) }
end

#first ⇒ `Object`

# File 'lib/bio-table/parsers/fastareader.rb', line 68

def first
  parse_each { | id, descr, seq | 
    return FastaRecord.new(id, descr, seq) 
  }
end

#get(id) ⇒ `Object`

Return a record by its id, nil when not found

# File 'lib/bio-table/parsers/fastareader.rb', line 75

def get id
  indexed?
  if fpos = indexer_get(id)
    get_rec(fpos)
  else
    nil
  end
end

#get_by_index(idx) ⇒ `Object`

# File 'lib/bio-table/parsers/fastareader.rb', line 97

def get_by_index idx
  indexed?
  if fpos = indexer_get_by_index(idx)[1]
    ret = get_rec(fpos)
    return ret
  end
  nil
end

#get_rec(fpos) ⇒ `Object`

# File 'lib/bio-table/parsers/fastareader.rb', line 84

def get_rec fpos
  @f.seek fpos
  tag = @f.gets
  seq = ""
  begin
    line = @f.gets
    break if line =~ /^>/
    seq += line.strip 
  end while !@f.eof
  id, descr = digest_tag(tag)
  FastaRecord.new(id,descr,seq)
end

#parse_each ⇒ `Object`

Parse the FASTA file and yield id, descr, sequence. When the indexer is on it will index the records the first time. Note that, with indexing, when you don’t complete parsing there will be an error the second time. This is a # trade-off, otherwise one would always have to index the file and read it twice.

# File 'lib/bio-table/parsers/fastareader.rb', line 34

def parse_each
  @f.seek 0    # force file rewind
  @rec_fpos = 0
  @rec_line = @f.gets
  fpos = 0
  @count = 0
  begin
    # digest id from record description
    id, descr = digest_tag(@rec_line)
    id_fpos = @rec_fpos
    # parse the sequence
    seq = ""
    begin
      fpos = @f.tell
      line = @f.gets
      break if line =~ /^>/
      seq += line.strip 
    end while !@f.eof 
    # new record
    @count += 1
    @rec_fpos = fpos
    @rec_line = line
    # p [@rec_line, id, id_fpos]
    # indexer_set(id, id_fpos) if @indexer and not @fread_once
    yield id, descr, seq
  end while !@f.eof
  @fread_once = true
end

#size ⇒ `Object`

Returns the size of the dataset - as read. After the final record the size represents the number of items in the FASTA file



122
123
124

# File 'lib/bio-table/parsers/fastareader.rb', line 122

def size
  @count
end

Class: FastaReader

Instance Method Summary collapse

Constructor Details

#initialize(fn, regex = nil) ⇒ FastaReader

Instance Method Details

#close ⇒ Object

#digest_tag(tag) ⇒ Object

#each ⇒ Object

#first ⇒ Object

#get(id) ⇒ Object

#get_by_index(idx) ⇒ Object

#get_rec(fpos) ⇒ Object

#parse_each ⇒ Object

#size ⇒ Object