Class: Datasets::CaliforniaHousing

Inherits:
Dataset
  • Object
show all
Defined in:
lib/datasets/california-housing.rb

Defined Under Namespace

Classes: Record

Instance Attribute Summary

Attributes inherited from Dataset

#metadata

Instance Method Summary collapse

Methods inherited from Dataset

#clear_cache!, #to_table

Constructor Details

#initializeCaliforniaHousing

Returns a new instance of CaliforniaHousing.



16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/datasets/california-housing.rb', line 16

def initialize
  super()
  @metadata.id = "california-housing"
  @metadata.name = "California Housing"
  @metadata.url = "http://lib.stat.cmu.edu/datasets/"
  @metadata.licenses = ["CCO"]
  @metadata.description = <<-DESCRIPTION
Housing information from the 1990 census used in
Pace, R. Kelley and Ronald Barry,
"Sparse Spatial Autoregressions",
Statistics and Probability Letters, 33 (1997) 291-297.
Available from http://lib.stat.cmu.edu/datasets/.
  DESCRIPTION
end

Instance Method Details

#eachObject



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/datasets/california-housing.rb', line 31

def each
  return to_enum(__method__) unless block_given?

  data_path = cache_dir_path + "houses.zip"
  data_url = "http://lib.stat.cmu.edu/datasets/houses.zip"
  file_name = "cadata.txt"
  download(data_path, data_url)
  open_data(data_path, file_name) do |input|
    data = ""
    input.each_line do |line|
      next unless line.start_with?(" ")
      data << line.lstrip.gsub(/ +/, ",")
    end
    options = {
      converters: [:numeric],
    }
    CSV.parse(data, **options) do |row|
      yield(Record.new(*row))
    end
  end
end