Class: Exodb::Generef
- Includes:
- GenomeLocationField
- Defined in:
- lib/exodb/datamodel/reference.rb
Instance Method Summary collapse
-
#can_translated? ⇒ Boolean
Check if Generef can translate.
-
#dl_occurrent! ⇒ Object
Download incident data from TCGA.
-
#dl_seq! ⇒ Object
Download sequence from web service please use by caution.
-
#dl_symbol! ⇒ Object
Download gene symbol from HGNC service.
-
#has_sequence? ⇒ Boolean
Check if Generef has sequence.
-
#has_splices? ⇒ Boolean
Check that this gene has any splice variant.
-
#longest_splice ⇒ Object
return longest splice of this gene.
-
#symbol ⇒ String
Get gene symbol.
-
#to_seq ⇒ Bio::Sequence
return sequence as Bio::Sequence object.
Methods included from GenomeLocationField
#begin, #chromosome, #end, #location_str, #parse_location, #start, #stop
Methods included from XrefsField
Instance Method Details
#can_translated? ⇒ Boolean
Check if Generef can translate
212 213 214 |
# File 'lib/exodb/datamodel/reference.rb', line 212 def can_translated?() return self.has_sequence? && self.has_splices? && self.longest_splice != nil ? true : false end |
#dl_occurrent! ⇒ Object
Download incident data from TCGA
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
# File 'lib/exodb/datamodel/reference.rb', line 116 def dl_occurrent! if self.get_xref('urn:miriam:hgnc.symbol') cancerstudies = [] open("http://www.cbioportal.org/public-portal/webservice.do?cmd=getCancerStudies") {|f| f.each_line {|line| cancerstudies.push(line.chomp.split("\t")[0])} } occurrents = {} totalcase = {} cancerstudies.each do |study| totalcase[study] = 0 if !totalcase.has_key?(study) open("http://www.cbioportal.org/public-portal/webservice.do?cmd=getCaseLists&cancer_study_id=#{study}") do |f| f.each_line do |line| totalcase[study] += line.chomp.split(/\t/)[4].split(' ').length if line =~ /\tSequenced Tumors\t/ end end occurrents[study] = {} if !occurrents.has_key?(study) open("http://www.cbioportal.org/public-portal/webservice.do?cmd=getMutationData&genetic_profile_id=#{study}_mutations&gene_list=#{self.get_xref('urn:miriam:hgnc.symbol').id}") do |f| f.each_line do |line| dat = line.chomp.split(/\t/) if dat[5] == 'Missense_Mutation' occurrents[study][dat[7].split(/(\d+)/)[1]] = [] if !occurrents[study].has_key?(dat[7].split(/(\d+)/)[1]) occurrents[study][dat[7].split(/(\d+)/)[1]].push(dat[2]) end end end end self.occurrents.clear if self.occurrents occurrents.each_pair do |cancertype, v| v.each_pair do |position, occur| self.occurrents << Occurrent.new({cancertype: cancertype, position: position, occur: occur.uniq.sort, casenumber: totalcase[cancertype]}) end end self.save! end end |
#dl_seq! ⇒ Object
Download sequence from web service please use by caution. NCBI will block scamming sequest
71 72 73 74 75 76 77 78 79 80 |
# File 'lib/exodb/datamodel/reference.rb', line 71 def dl_seq! case self.chrrefseq when /\Aurn:miriam:refseq:/ self.sequence = Bio::FastaFormat.new(Bio::NCBI::REST.efetch(self.chrrefseq.split(':', 4), {"db"=>"nucleotide", "rettype"=>"fasta", "retmode"=>"text", "seq_start"=>self.start, "seq_stop"=>self.end})).seq end self.save! end |
#dl_symbol! ⇒ Object
Download gene symbol from HGNC service
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/exodb/datamodel/reference.rb', line 84 def dl_symbol! baseuri = "http://rest.genenames.org/search" query = "" if self.get_xref('urn:miriam:refseq') query = "#{baseuri}/refseq_accession/#{self.chrrefseq.id.split('.')[0]}" elsif self.get_xref('urn:miriam:ncbigene') query = "" end if !query.empty? response = JSON.parse(open(query, 'Accept' => 'application/json').read)['response'] if !response['docs'].empty? response['docs'].each do |e| self.add_to_set(:xrefs, "urn:miriam:hgnc:#{e["hgnc_id"]}") self.add_to_set(:xrefs, "urn:miriam:hgnc.symbol:#{e["symbol"]}") end self.save! end end end |
#has_sequence? ⇒ Boolean
Check if Generef has sequence
205 206 207 |
# File 'lib/exodb/datamodel/reference.rb', line 205 def has_sequence?() return self[:sequence] ? true : false end |
#has_splices? ⇒ Boolean
Check that this gene has any splice variant
198 199 200 |
# File 'lib/exodb/datamodel/reference.rb', line 198 def has_splices? return self.isoforms.exists? end |
#longest_splice ⇒ Object
return longest splice of this gene
180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
# File 'lib/exodb/datamodel/reference.rb', line 180 def longest_splice() length = 0 longest = nil self.isoforms.each do |e| if e.prot_len > length length = e.prot_len longest = e end end return longest end |
#symbol ⇒ String
Get gene symbol
219 220 221 222 223 224 225 226 227 228 229 |
# File 'lib/exodb/datamodel/reference.rb', line 219 def symbol if self.get_xref('urn:miriam:hgnc.symbol') return self.get_xref('urn:miriam:hgnc.symbol').id elsif self.xrefs && !self.xrefs.empty? return self.xrefs.sort[0].id else return 'nosymbol' end end |
#to_seq ⇒ Bio::Sequence
return sequence as Bio::Sequence object
175 176 177 |
# File 'lib/exodb/datamodel/reference.rb', line 175 def to_seq return self.sequence ? Bio::Sequence.auto(self.sequence) : Bio::Sequence.auto("") end |