Class: Cassava::Document

Inherits:
Object
  • Object
show all
Defined in:
lib/cassava/document.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts = nil) ⇒ Document

Returns a new instance of Document.



9
10
11
12
13
14
15
16
17
18
# File 'lib/cassava/document.rb', line 9

def initialize opts = nil
  @opts = opts
  @name = opts[:name] or raise ArgumentError, "name not specified"
  @rows = [ ]
  @index = { }
  self.columns = opts[:columns] || [ ]
  if x = opts[:rows]
    append_row! x
  end
end

Instance Attribute Details

#column_offsetObject

Returns the value of attribute column_offset.



5
6
7
# File 'lib/cassava/document.rb', line 5

def column_offset
  @column_offset
end

#columnsObject

Returns the value of attribute columns.



5
6
7
# File 'lib/cassava/document.rb', line 5

def columns
  @columns
end

#debugObject

Returns the value of attribute debug.



7
8
9
# File 'lib/cassava/document.rb', line 7

def debug
  @debug
end

#nameObject

Returns the value of attribute name.



5
6
7
# File 'lib/cassava/document.rb', line 5

def name
  @name
end

#offset_columnObject

Returns the value of attribute offset_column.



5
6
7
# File 'lib/cassava/document.rb', line 5

def offset_column
  @offset_column
end

#rowsObject

Returns the value of attribute rows.



6
7
8
# File 'lib/cassava/document.rb', line 6

def rows
  @rows
end

Instance Method Details

#_emit!(file) ⇒ Object Also known as: emit!



181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/cassava/document.rb', line 181

def _emit! file
  CSV.open(file, "wb") do | out |
    a = @offset_column.map do | c |
      c && c.to_s
    end
    out << a
    @rows.each do | r |
      a = @offset_column.map do | i |
        i && r[i]
      end
      out << a
    end
  end
  self
end

#add_column!(c) ⇒ Object



44
45
46
47
48
49
50
51
52
# File 'lib/cassava/document.rb', line 44

def add_column! c
  c = c.to_sym
  unless i = @column_offset[c]
    i = @columns.size
    @columns << c
    update_columns!
  end
  i
end

#append_rows!(rows) ⇒ Object



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/cassava/document.rb', line 87

def append_rows! rows
  return self unless rows
  if Document === rows
    rows.columns.each { | c | add_column!(c) }
    rows = rows.rows
  end
  rows.map!{ | r | array_to_row(r) }
  row_i = @rows.size
  @rows.concat(rows)
  rows.each do | r |
    @ncols = r.size if @ncols < r.size
    r[:_row_i] = row_i
    row_i += 1
  end
  @index.clear
  @column_types = nil
  self
end

#array_to_row(a, columns = nil) ⇒ Object



198
199
200
201
202
203
204
205
206
207
208
# File 'lib/cassava/document.rb', line 198

def array_to_row a, columns = nil
  if Array === a
    columns ||= self.columns
    h = { }
    columns.each_with_index do | c, i |
      h[c] = a[i]
    end
    a = h
  end
  a
end

#cast_strings(rows) ⇒ Object



228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
# File 'lib/cassava/document.rb', line 228

def cast_strings rows
  rows.each do | r |
    r.each do | k, v |
      next if v.nil?
      old_v = v
      v = v.to_s
      if String === v
        case v
        when /\A[-+]?\d+\Z/
          v = v.to_i
        when /\A([-+]?([0-9]+\.[0-9]+|\.[0-9]+|[0-9]+\.)(e[-+]?\d+)?|[-+]?\d+e[-+]?\d+)\Z/i
          v = v.to_f
        end
        # puts "old_v = #{old_v.inspect} => #{v.inspect}"
      end
      r[k] = v
    end
  end
  rows
end

#cast_strings!Object



249
250
251
252
253
# File 'lib/cassava/document.rb', line 249

def cast_strings!
  cast_strings @rows
  @column_types = nil
  self
end

#clone_rows(rows = self.rows) ⇒ Object



331
332
333
# File 'lib/cassava/document.rb', line 331

def clone_rows rows = self.rows
  rows.map { | r | r.dup }
end

#coerce_to_strings!Object



219
220
221
222
223
224
225
226
# File 'lib/cassava/document.rb', line 219

def coerce_to_strings!
  rows.each do | r |
    r.each do | k, v |
      r[k] = v.to_s unless String === v
    end
  end
  self
end

#column_typesObject



324
325
326
327
328
329
# File 'lib/cassava/document.rb', line 324

def column_types
  unless @column_types
    @column_types ||= infer_column_types
  end
  @column_types
end

#empty_rows!Object



80
81
82
83
84
85
# File 'lib/cassava/document.rb', line 80

def empty_rows!
  @rows = [ ]
  @index.clear
  @column_types = nil
  self
end

#get(c, v) ⇒ Object



151
152
153
# File 'lib/cassava/document.rb', line 151

def get c, v
  index!(c)[v]
end

#index!(c) ⇒ Object



139
140
141
142
143
144
145
146
147
148
149
# File 'lib/cassava/document.rb', line 139

def index! c
  c = @column[c] if Integer === c
  unless ind = @index[c]
    ind = { }
    @rows.each do | r |
      (ind[r[c]] ||= [ ]) << r
    end
    @index[c] = ind
  end
  ind
end

#infer_column_types(rows = self.rows) ⇒ Object



286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
# File 'lib/cassava/document.rb', line 286

def infer_column_types rows = self.rows
  column_types = [ nil ] * @columns.size
  ancestors_cache = { }
  common_ancestor_cache = { }
  rows.each do | r |
    raise unless Hash === r
    @columns.each_with_index do | k, i |
      v = r[k]
      next if v.nil?
      ct = column_types[i]
      vt = v.class
      if ct.nil?
        column_types[i] = vt
        next
      end
      common_ancestor =
        common_ancestor_cache[[ct, vt]] ||=
        begin
          ca =
            ancestors_cache[ct] ||=
            ct.ancestors.delete_if{|x| x.class == Module}
          va =
            ancestors_cache[vt] ||=
            vt.ancestors.delete_if{|x| x.class == Module}
          (ca & va).first || Object
        end
      if @debug && k == :float
        pp [ :k, k, :v, v, :ct, ct, :vt, vt, :ca, ca, :va, va, :common_ancestor, common_ancestor ]
      end
      # if Value's class is not a specialization of column class.
      ct = common_ancestor
      column_types[i] = ct
    end
  end
  # pp columns.zip(column_types)
  column_types
end

#nrowsObject



76
77
78
# File 'lib/cassava/document.rb', line 76

def nrows
  @rows.size
end

#parse!Object



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/cassava/document.rb', line 106

def parse!
  # debugger if $DEBUG
  csv = nil
  if RUBY_VERSION =~ /^1\.8/
    @rows = [ ]
    csv = CSV.open(name, "rb", @opts[:col_sep]) do | r |
      @rows << r
    end
  else
    csv_opts = { }
    csv_opts[:col_sep] = @opts[:col_sep] if @opts[:col_sep]
    csv = CSV.open(name, "rb", csv_opts)
    @rows = csv.read
  end

  @columns = @rows.shift if @columns.empty?
  update_columns!

  row_i = 0
  @rows.map! do | r |
    @ncols = r.size if @ncols < r.size
    h = { :_row_i => (row_i += 1) }
    @column_offset.each do | c, i |
      h[c] = r[i] if c && i
    end
    h
  end
  # debugger
  self
ensure
  csv.close if csv
end

#row_to_array(r) ⇒ Object



210
211
212
213
214
215
216
217
# File 'lib/cassava/document.rb', line 210

def row_to_array r
  unless Array === r
    r = @offset_column.map do | c |
      c && r[c]
    end
  end
  r
end

#sort!(by = nil) ⇒ Object



255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
# File 'lib/cassava/document.rb', line 255

def sort! by = nil
  by ||= @columns
  cast_strings!
  # by = by.map { | x | column_offset[x] }
  ct = { }
  columns.each_with_index do | c, i |
    ct[c] = column_types[i]
  end
  @rows.sort! do | a, b |
    r = 0
    by.each do | c |
      av = a[c]
      bv = b[c]
      case
      when av.nil? && bv.nil?
        r = 0
      when av.nil?
        r = -1
      when bv.nil?
        r = 1
      else
        r = (av <=> bv rescue nil) || 0
      end
      break if r != 0
    end
    r
  end
  self
end

#thousands(x, sep = '_') ⇒ Object



382
383
384
# File 'lib/cassava/document.rb', line 382

def thousands x, sep = '_'
  x && x.to_s.reverse!.gsub(/(\d{3})/, "\\1#{sep}").reverse!.sub(/^(\D|\A)#{sep}/, '')
end

#to_column_names!(a) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/cassava/document.rb', line 54

def to_column_names! a
  a.map! do | x |
    case x
    when Integer
      c = @columns[x]
    when String
      if x == (i = x.to_i).to_s
        c = @columns[i]
      else
        c = x.to_sym
      end
    when Symbol
      c = x
    else
      raise TypeError, "#{x.inspect}"
    end
    raise TypeError, "#{x.inspect} => #{c.inspect}" unless c
    c
  end
  a
end

#to_text(opts = { }) ⇒ Object

Format as ASCII table.



336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
# File 'lib/cassava/document.rb', line 336

def to_text opts = { }
  gem 'terminal-table'
  require 'terminal-table'

  table = Terminal::Table.new() do | table |
    # t.title = self.name
    s = table.style
    s.border_x = s.border_y = s.border_i = ''
    # s.border_i = '|'
    s.padding_left = 0
    s.padding_right = 1

    table << self.columns.map{|c| { :value => c.to_s, :alignment => :center }}

    # Convert rows to Arrays and handle nil, etc.
    self.rows.each do | r |
      r = self.row_to_array(r)
      r.map! do | c |
        c = case c
            when nil
              ''
            when Integer
              thousands(c)
            else
              c
            end
        # c = "#{c} |"
      end
      table << r
    end

    # Align numeric columns to the left.
    column_types = infer_column_types(cast_strings(clone_rows))
    column_types.each_with_index do | type, ci |
      if type && type.ancestors.include?(Numeric)
        # puts "  column #{ci} #{columns[ci]} #{t}"
        table.align_column(ci, :right)
      end
    end
  end


  # Return formatted table.
  table.to_s
end

#update_columns!Object



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/cassava/document.rb', line 25

def update_columns!
  @ncols = @columns.size
  @column_offset = { }
  @offset_column = [ ]
  i = -1
  @columns.map! do | c |
    i += 1
    c = c.to_s
    next if c.empty?
    c = c.to_sym
    @column_offset[c] = i
    @offset_column[i] = c
    c
  end
  @column_types = nil
  @index.keep_if { | c, h | @column_offset[c] }
  self
end