Class: Bio::SQL::Sequence

Inherits:
Object show all
Defined in:
lib/bio/db/biosql/sequence.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Sequence

Returns a new instance of Sequence.



89
90
91
92
93
94
95
# File 'lib/bio/db/biosql/sequence.rb', line 89

def initialize(options={})
  options.assert_valid_keys(:entry, :biodatabase_id,:biosequence)
  return @entry = options[:entry] unless options[:entry].nil?
  
  return to_biosql(options[:biosequence], options[:biodatabase_id]) unless options[:biosequence].nil? or options[:biodatabase_id].nil?
  
end

Instance Attribute Details

#entryObject (readonly)

Returns the value of attribute entry.



70
71
72
# File 'lib/bio/db/biosql/sequence.rb', line 70

def entry
  @entry
end

Instance Method Details

#cdsfeaturesObject

return the seqfeature mapped from BioSQL with a type_term like ‘CDS’



294
295
296
# File 'lib/bio/db/biosql/sequence.rb', line 294

def cdsfeatures
  @entry.cdsfeatures
end

#comment=(value) ⇒ Object



392
393
394
395
# File 'lib/bio/db/biosql/sequence.rb', line 392

def comment=(value)
		comment=Comment.new(:bioentry=>@entry, :comment_text=>value, :rank=>@entry.comments.count.succ)
		comment.save!
end

#commentsObject



362
363
364
365
366
# File 'lib/bio/db/biosql/sequence.rb', line 362

def comments
  @entry.comments.map do |comment|
    comment.comment_text
  end
end

#databaseObject



217
218
219
# File 'lib/bio/db/biosql/sequence.rb', line 217

def database
  @entry.biodatabase.name
end

#database_descObject



221
222
223
# File 'lib/bio/db/biosql/sequence.rb', line 221

def database_desc
  @entry.biodatabase.description
end

#deleteObject



72
73
74
# File 'lib/bio/db/biosql/sequence.rb', line 72

def delete
  @entry.destroy
end

#descriptionObject Also known as: definition



242
243
244
# File 'lib/bio/db/biosql/sequence.rb', line 242

def description
  @entry.description
end

#description=(value) ⇒ Object Also known as: definition=



247
248
249
# File 'lib/bio/db/biosql/sequence.rb', line 247

def description=(value)
  @entry.description=value
end

#divisionObject



235
236
237
# File 'lib/bio/db/biosql/sequence.rb', line 235

def division
  @entry.division
end

#division=(value) ⇒ Object



238
239
240
# File 'lib/bio/db/biosql/sequence.rb', line 238

def division=(value)
  @entry.division=value
end

#feature=(feat) ⇒ Object



273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
# File 'lib/bio/db/biosql/sequence.rb', line 273

def feature=(feat)
 #ToDo: avoid Ontology find here, probably more efficient create class variables
	type_term_ontology = Ontology.find_or_create_by_name('SeqFeature Keys')
  type_term = Term.find_or_create_by_name(:name=>feat.feature, :ontology=>type_term_ontology)
	source_term_ontology = Ontology.find_or_create_by_name('SeqFeature Sources')
	source_term = Term.find_or_create_by_name(:name=>'EMBLGenBankSwit',:ontology=>source_term_ontology)
  seqfeature = Seqfeature.create(:bioentry=>@entry, :source_term=>source_term, :type_term=>type_term, :rank=>@entry.seqfeatures.count.succ, :display_name=>'')
  #seqfeature.save!       
  feat.locations.each do |loc|
    location = Location.new(:seqfeature=>seqfeature, :start_pos=>loc.from, :end_pos=>loc.to, :strand=>loc.strand, :rank=>seqfeature.locations.count.succ)
    location.save!
  end
	qual_term_ontology = Ontology.find_or_create_by_name('Annotation Tags')
  feat.each do |qualifier|
    qual_term = Term.find_or_create_by_name(:name=>qualifier.qualifier, :ontology=>qual_term_ontology)
    qual = SeqfeatureQualifierValue.new(:seqfeature=>seqfeature, :term=>qual_term, :value=>qualifier.value.to_s, :rank=>seqfeature.seqfeature_qualifier_values.count.succ)
    qual.save!          
  end
end

#featuresObject



268
269
270
271
# File 'lib/bio/db/biosql/sequence.rb', line 268

def features
  @entry.seqfeatures.collect {|sf|
    self.get_seqfeature(sf)}
end

#get_seqfeature(sf) ⇒ Object



76
77
78
79
80
81
82
83
# File 'lib/bio/db/biosql/sequence.rb', line 76

def get_seqfeature(sf)
  
  #in seqfeature BioSQL class
  locations_str = sf.locations.map{|loc| loc.to_s}.join(',')
  #pp sf.locations.inspect
  locations_str = "join(#{locations_str})" if sf.locations.count>1 
  Bio::Feature.new(sf.type_term.name, locations_str,sf.seqfeature_qualifier_values.collect{|sfqv| Bio::Feature::Qualifier.new(sfqv.term.name,sfqv.value)})
end

#identifierObject



252
253
254
# File 'lib/bio/db/biosql/sequence.rb', line 252

def identifier
  @entry.identifier
end

#identifier=(value) ⇒ Object



256
257
258
# File 'lib/bio/db/biosql/sequence.rb', line 256

def identifier=(value)
  @entry.identifier=value
end

#lengthObject



334
335
336
# File 'lib/bio/db/biosql/sequence.rb', line 334

def length 
  @entry.biosequence.length
end

#length=(len) ⇒ Object



85
86
87
# File 'lib/bio/db/biosql/sequence.rb', line 85

def length=(len)
  @entry.biosequence.length=len
end

#nameObject Also known as: entry_id



179
180
181
# File 'lib/bio/db/biosql/sequence.rb', line 179

def name 
  @entry.name
end

#name=(value) ⇒ Object Also known as: entry_id=



184
185
186
# File 'lib/bio/db/biosql/sequence.rb', line 184

def name=(value)
  @entry.name=value
end

#organismObject Also known as: species

TODO def secondary_accession

  @entry.bioentry_qualifier_values
end


201
202
203
# File 'lib/bio/db/biosql/sequence.rb', line 201

def organism
  @entry.taxon.nil? ? "" : "#{@entry.taxon.taxon_scientific_name.name}"+ (@entry.taxon.taxon_genbank_common_name ? "(#{@entry.taxon.taxon_genbank_common_name.name})" : '')
end

#organism=(value) ⇒ Object Also known as: species=



206
207
208
209
210
211
212
213
214
# File 'lib/bio/db/biosql/sequence.rb', line 206

def organism=(value)
  taxon_name=TaxonName.find_by_name_and_name_class(value.gsub(/\s+\(.+\)/,''),'scientific name')
  if taxon_name.nil?
    puts "Error value doesn't exists in taxon_name table with scientific name constraint."
  else
    @entry.taxon_id=taxon_name.taxon_id
    @entry.save!
  end
end

#primary_accessionObject



189
190
191
# File 'lib/bio/db/biosql/sequence.rb', line 189

def primary_accession
  @entry.accession
end

#primary_accession=(value) ⇒ Object



193
194
195
# File 'lib/bio/db/biosql/sequence.rb', line 193

def primary_accession=(value)
  @entry.accession=value
end

#reference=(value) ⇒ Object



369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
# File 'lib/bio/db/biosql/sequence.rb', line 369

def reference=(value)

		locations=Array.new
		locations << "journal=#{value.journal}" unless value.journal.empty?
		locations << "volume=#{value.volume}" unless value.volume.empty?
		locations << "issue=#{value.issue}" unless value.issue.empty?
		locations << "pages=#{value.pages}" unless value.pages.empty?
		locations << "year=#{value.year}" unless value.year.empty?
		locations << "pubmed=#{value.pubmed}" unless value.pubmed.empty?
		locations << "medline=#{value.medline}" unless value.medline.empty?
		locations << "doi=#{value.doi}" unless value.doi.nil?
		locations << "abstract=#{value.abstract}" unless value.abstract.empty?
		locations << "url=#{value.url}" unless value.url.nil?
		locations << "mesh=#{value.mesh}" unless value.mesh.empty?      		
		locations << "affiliations=#{value.affiliations}" unless value.affiliations.empty?
		locations << "comments=#{value.comments.join('~')}"unless value.comments.nil?
	      start_pos, end_pos = value.sequence_position ? value.sequence_position.gsub(/\s*/,'').split('-') : [nil,nil] 
 reference=Reference.find_or_create_by_title(:title=>value.title, :authors=>value.authors.join(' '), :location=>locations.join('|'))
 
 bio_reference=BioentryReference.new(:bioentry=>@entry,:reference=>reference,:rank=>value.embl_gb_record_number, :start_pos=>start_pos, :end_pos=>end_pos)
 bio_reference.save!
end

#referencesObject



338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
# File 'lib/bio/db/biosql/sequence.rb', line 338

def references
  #return and array of hash, hash has these keys ["title", "dbxref_id", "reference_id", "authors", "crc", "location"]
  #probably would be better to d a class refrence to collect these informations
  @entry.bioentry_references.collect do |bio_ref|
    hash = Hash.new
    hash['authors'] = bio_ref.reference.authors.gsub(/\.\s/, "\.\s\|").split(/\|/)

	  hash['sequence_position'] = "#{bio_ref.start_pos}-#{bio_ref.end_pos}" if (bio_ref.start_pos and bio_ref.end_pos)
    hash['title'] = bio_ref.reference.title
    hash['embl_gb_record_number'] = bio_ref.rank
    #TODO: solve the problem with specific comment per reference.
    #TODO: get dbxref
    #take a look when location is build up in def reference=(value)

    bio_ref.reference.location.split('|').each do |element|
    	key,value=element.split('=')
    	hash[key]=value
    end unless bio_ref.reference.location.nil?

    hash['xrefs'] = bio_ref.reference.dbxref ? "#{bio_ref.reference.dbxref.dbname}; #{bio_ref.reference.dbxref.accession}." : ''
    Bio::Reference.new(hash)
  end        
end

#saveObject



397
398
399
400
401
# File 'lib/bio/db/biosql/sequence.rb', line 397

def save
  #I should add chks for SQL errors
  @entry.biosequence.save!
  @entry.save!
end

#seqObject

Returns the sequence. Returns a Bio::Sequence::Generic object.



301
302
303
304
# File 'lib/bio/db/biosql/sequence.rb', line 301

def seq
  s = @entry.biosequence
  Bio::Sequence::Generic.new(s ? s.seq : '')
end

#seq=(value) ⇒ Object



306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
# File 'lib/bio/db/biosql/sequence.rb', line 306

def seq=(value)

  #chk which type of alphabet is, NU/NA/nil
  if @entry.biosequence.nil?
#          puts "intoseq1"
    @entry.biosequence = Biosequence.new(:seq=>value)
	  @entry.biosequence.save!

  else
    @entry.biosequence.seq=value
  end
  self.length=value.length
  #@entry.biosequence.length=value.length
  #break
  @entry.save!
end

#taxonomyObject



323
324
325
326
327
328
329
330
331
332
# File 'lib/bio/db/biosql/sequence.rb', line 323

def taxonomy
  tax = []
  taxon = @entry.taxon
  while taxon and taxon.taxon_id != taxon.parent_taxon_id
    tax << taxon.taxon_scientific_name.name
    #Note: I don't like this call very much, correct with a relationship in the ref class.
    taxon = Taxon.find(taxon.parent_taxon_id)
  end
  tax.reverse
end

#to_biosequenceObject



415
416
417
# File 'lib/bio/db/biosql/sequence.rb', line 415

def to_biosequence
	 Bio::Sequence.adapter(self,Bio::Sequence::Adapter::BioSQL)
end

#to_biosql(bs, biodatabase_id) ⇒ Object



97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/bio/db/biosql/sequence.rb', line 97

def to_biosql(bs,biodatabase_id)
  #Transcaction works greatly!!!

  #
  begin
    Bioentry.transaction do           
  
      @entry = Bioentry.new(:biodatabase_id=>biodatabase_id, :name=>bs.entry_id)

                  puts "primary" if $DEBUG
      self.primary_accession = bs.primary_accession

                  puts "def" if $DEBUG
      self.definition = bs.definition unless bs.definition.nil?

                  puts "seqver" if $DEBUG
      self.sequence_version = bs.sequence_version || 0

                  puts "divi" if $DEBUG
      self.division = bs.division unless bs.division.nil?

      @entry.save!
                  puts "secacc" if $DEBUG
      
      bs.secondary_accessions.each do |sa|
        #write as qualifier every secondary accession into the array
        self.secondary_accessions = sa
      end unless bs.secondary_accessions.nil?

      
      #to create the sequence entry needs to exists
		puts "seq" if $DEBUG
       puts bs.seq if $DEBUG
      self.seq = bs.seq unless bs.seq.nil?
                 puts "mol" if $DEBUG
      
      self.molecule_type = bs.molecule_type unless bs.molecule_type.nil?
                  puts "dc" if $DEBUG

      self.data_class = bs.data_class unless bs.data_class.nil?
                  puts "top" if $DEBUG
      self.topology = bs.topology unless bs.topology.nil?
                  puts "datec" if $DEBUG
      self.date_created = bs.date_created unless bs.date_created.nil?
                  puts "datemod" if $DEBUG
      self.date_modified = bs.date_modified unless bs.date_modified.nil?
                  puts "key" if $DEBUG
      
      bs.keywords.each do |kw|
        #write as qualifier every secondary accessions into the array
        self.keywords = kw
      end unless bs.keywords.nil?
      #FIX: problem settinf taxon_name: embl has "Arabidopsis thaliana (thale cress)" but in taxon_name table there isn't this name. I must check if there is a new version of the table
      puts "spec" if $DEBUG
      self.species = bs.species unless bs.species.nil?
                  puts "Debug: #{bs.species}" if $DEBUG
                  puts "Debug: feat..start" if $DEBUG
      
      bs.features.each do |feat|
        self.feature=feat
      end unless bs.features.nil?
			puts "Debug: feat...end" if $DEBUG
      
      #TODO: add comments and references
	    bs.references.each do |reference|
		 #   puts reference.inspect
        self.reference=reference
	    end unless bs.references.nil?
      
      bs.comments.each do |comment|
      	self.comment=comment
      end unless bs.comments.nil?
      
    end #transaction
    return self
  rescue Exception => e
    puts "to_biosql exception: #{e}"
    puts $!
	end #rescue
end

#to_fastaObject



402
403
404
405
406
407
# File 'lib/bio/db/biosql/sequence.rb', line 402

def to_fasta
  #prima erano 2 print in stdout, meglio ritornare una stringa in modo che poi ci si possa fare quello che si vuole
  #print ">" + accession + "\n" 
  #print seq.gsub(Regexp.new(".{1,#{60}}"), "\\0\n")
				">" + accession + "\n" + seq.gsub(Regexp.new(".{1,#{60}}"), "\\0\n")
end

#to_fasta_reverse_complememtObject



409
410
411
# File 'lib/bio/db/biosql/sequence.rb', line 409

def to_fasta_reverse_complememt
				">" + accession + "\n" + seq.reverse_complement.gsub(Regexp.new(".{1,#{60}}"), "\\0\n")
end

#versionObject Also known as: sequence_version



225
226
227
# File 'lib/bio/db/biosql/sequence.rb', line 225

def version
  @entry.version
end

#version=(value) ⇒ Object Also known as: sequence_version=



230
231
232
# File 'lib/bio/db/biosql/sequence.rb', line 230

def version=(value)
  @entry.version=value
end