Class: Bio::Ucsc::File::Twobit
- Inherits:
-
Object
- Object
- Bio::Ucsc::File::Twobit
show all
- Defined in:
- lib/bio-ucsc/file/twobit.rb
Defined Under Namespace
Classes: TwoBitHeader, TwoBitRecord
Constant Summary
collapse
- BINCODE =
{0b00 => "T", 0b01 => "C", 0b10 => "A", 0b11 => "G"}
Instance Attribute Summary collapse
Class Method Summary
collapse
Instance Method Summary
collapse
Constructor Details
#initialize(twobit_queue, filename, header, offsets) ⇒ Twobit
Returns a new instance of Twobit.
21
22
23
24
25
26
27
|
# File 'lib/bio-ucsc/file/twobit.rb', line 21
def initialize(twobit_queue, filename, , offsets)
@tbq = twobit_queue
@filename = filename
@header =
@offsets = offsets
@records = Hash.new
end
|
Instance Attribute Details
#filename ⇒ Object
Returns the value of attribute filename.
29
30
31
|
# File 'lib/bio-ucsc/file/twobit.rb', line 29
def filename
@filename
end
|
Returns the value of attribute header.
29
30
31
|
# File 'lib/bio-ucsc/file/twobit.rb', line 29
def
@header
end
|
#offsets ⇒ Object
Returns the value of attribute offsets.
29
30
31
|
# File 'lib/bio-ucsc/file/twobit.rb', line 29
def offsets
@offsets
end
|
Class Method Details
.load(filename) ⇒ Object
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
|
# File 'lib/bio-ucsc/file/twobit.rb', line 35
def self.load(filename)
two_bit = nil
::File.open(filename, 'rb') {|f|two_bit = f.read}
tbq = Bio::Ucsc::File::ByteQueue.new(two_bit)
= TwoBitHeader.new
.signature = tbq.next(4).unpack('L').first
.version = tbq.next(4).unpack('L').first
.sequence_count = tbq.next(4).unpack('L').first
.reserved = tbq.next(4).unpack('L').first
offsets = Hash.new
.sequence_count.times do
name_length = tbq.next(1).unpack('C').first
offsets[tbq.next(name_length).unpack('a*').first] =
tbq.next(4).unpack('L').first
end
new(tbq, filename, , offsets)
end
|
.open(filename) ⇒ Object
55
56
57
58
59
60
61
|
# File 'lib/bio-ucsc/file/twobit.rb', line 55
def self.open(filename)
if block_given?
yield self.load(filename)
else
return self.load(filename)
end
end
|
Instance Method Details
#find_by_interval(interval) ⇒ Object
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
|
# File 'lib/bio-ucsc/file/twobit.rb', line 114
def find_by_interval(interval)
interval = Bio::Ucsc::Gi.wrap(interval)
seq = self.find_by_interval_raw(interval)
@records[interval.chrom].n_block_intervals.map do |nb|
if interval.overlapped?(nb)
case interval.compare(nb)
when :equal,:contains
seq = 'N' * interval.overlap(nb)
when :contained_by
left_len = nb.chr_start - interval.chr_start
right_len = interval.chr_end - nb.chr_end
case
when left_len > 0 && right_len > 0
seq =
seq[0 .. (left_len-1)] +
'N' * (seq.length - left_len - right_len) +
seq[-right_len .. -1]
when left_len == 0 && right_len > 0
seq =
'N' * (seq.length - left_len - right_len) +
seq[-right_len .. -1]
when left_len > 0 && right_len == 0
seq =
seq[0 .. (left_len-1)] +
'N' * (seq.length - left_len - right_len)
else
raise "This should not happen! (Bio::Ucsc::File::Twobit#find_by_interval)"
end
when :left_overlapped
left_len = nb.chr_end - interval.chr_start + 1
seq[0, left_len] = 'N' * left_len
when :right_overlapped
right_len = interval.chr_end - nb.chr_start + 1
seq[-right_len, right_len] = 'N' * right_len
when :right_adjacent, :right_off
seq
end
end
end
seq
end
|
#find_by_interval_raw(interval) ⇒ Object
161
162
163
164
165
166
167
168
169
170
171
172
173
174
|
# File 'lib/bio-ucsc/file/twobit.rb', line 161
def find_by_interval_raw(interval)
interval = Bio::Ucsc::Gi.wrap(interval)
byte_count, byte_mod = interval.zero_start.divmod 4
chrom_top = self.records(interval.chrom).packed_dna_offset
div_start, mod_start = interval.zero_start.divmod 4
div_end, mod_end = interval.zero_end.divmod 4
div_len, mod_len = interval.length.divmod 4
byte_length = div_end - div_start + 1
@tbq.index = chrom_top + div_start
bytes = @tbq.next(byte_length).unpack('C*')
seq = bytes_to_nucleotides(bytes)
seq[mod_start..(-1-(4-mod_end))]
end
|
#inspect ⇒ Object
31
32
33
|
# File 'lib/bio-ucsc/file/twobit.rb', line 31
def inspect
"#<#{self.class}:0x#{self.__id__.to_s(16)} @filename=#{@filename} ...>"
end
|
#records(chrom) ⇒ Object
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
# File 'lib/bio-ucsc/file/twobit.rb', line 63
def records(chrom)
return @records[chrom] if @records[chrom]
@tbq.index = @offsets[chrom]
@records[chrom] = TwoBitRecord.new
@records[chrom].dna_size = @tbq.next(4).unpack('L').first
n_block_count = @tbq.next(4).unpack('L').first
n_block_starts = Array.new
n_block_count.times do
n_block_starts << @tbq.next(4).unpack('L').first
end
n_block_sizes = Array.new
n_block_count.times do
n_block_sizes << @tbq.next(4).unpack('L').first
end
@records[chrom].n_block_intervals = Array.new
n_block_count.times do |idx|
@records[chrom].n_block_intervals <<
Bio::GenomicInterval.zero_based(chrom,
n_block_starts[idx],
n_block_starts[idx]+n_block_sizes[idx])
end
mask_block_count = @tbq.next(4).unpack('L').first
mask_block_starts = Array.new
mask_block_count.times do
mask_block_starts << @tbq.next(4).unpack('L').first
end
mask_block_sizes = Array.new
mask_block_count.times do
mask_block_sizes << @tbq.next(4).unpack('L').first
end
@records[chrom].mask_block_intervals = Array.new
mask_block_count.times do |idx|
@records[chrom].mask_block_intervals <<
Bio::GenomicInterval.zero_based(chrom,
mask_block_starts[idx],
mask_block_starts[idx]+mask_block_sizes[idx])
end
@records[chrom].reserved = @tbq.next(4).unpack('L').first
@records[chrom].packed_dna_offset = @tbq.index
@records[chrom]
end
|
#subseq(interval) ⇒ Object
110
111
112
|
# File 'lib/bio-ucsc/file/twobit.rb', line 110
def subseq(interval)
find_by_interval(interval)
end
|