Class: Nhgis::AggDataVar
- Inherits:
-
NhgisActiveRecord::Base
- Object
- NhgisActiveRecord::Base
- Nhgis::AggDataVar
- Defined in:
- app/models/nhgis/agg_data_var.rb
Constant Summary collapse
- PAGE_SIZE =
1000
Class Method Summary collapse
- .all_variables(limit_dataset_ids = nil) ⇒ Object
- .variables(dataset_id, page = -1)) ⇒ Object
- .variables_count(dataset_id) ⇒ Object
Class Method Details
.all_variables(limit_dataset_ids = nil) ⇒ Object
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
# File 'app/models/nhgis/agg_data_var.rb', line 67 def self.all_variables(limit_dataset_ids = nil) dataset_ids = limit_dataset_ids.nil? ? Dataset.where({}).select("id") : Dataset.where({id: (limit_dataset_ids.is_a?(Array) ? limit_dataset_ids : [limit_dataset_ids])}).select("id") file = File.join(TerrapopConfiguration['application']['environments'][Rails.env.to_s]['source_data']['nhgis_metadata'], 'nhgis-agg-data-vars.csv.gz') File.open(file, 'w') do |f| gz = Zlib::GzipWriter.new(f) csv = CSV.new(gz) variables = Nhgis::AggDataVar.variables(dataset_ids.first, 0) columns = variables.first.keys csv << columns csv.close gz.close end dataset_ids.each{|dataset| $stderr.print "===> Working on dataset[#{dataset.id}]..." count = AggDataVar.variables_count(dataset.id) pages = (count.to_f/PAGE_SIZE.to_f).ceil (0...pages).each{|page| variables = AggDataVar.variables(dataset.id, page) File.open(file, 'a+') do |f| gz = Zlib::GzipWriter.new(f) csv = CSV.new(gz) variables.each {|row| csv << row.values } csv.close gz.close end } $stderr.puts "done" } $stderr.puts "==> Done with datasets..." end |
.variables(dataset_id, page = -1)) ⇒ Object
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
# File 'app/models/nhgis/agg_data_var.rb', line 30 def self.variables(dataset_id, page = -1) sql = "select ds.code as ds_code, dt.label as dt_label, adv.label as adv_label, adv.id as adv_id, ts.id as ts_id, ts.label as ts_label, tst.id as tst_id, tst.label as tst_label, gl.id as gl_id, gl.istads_id as gl_istads_id, dg.id as dg_id, dg.relative_pathname as dg_relative_pathname, df.id as df_id, df.filename as df_filename, ds.id as ds_id from agg_data_vars adv join data_tables dt on dt.id = adv.data_table_id join datasets ds on ds.id = dt.dataset_id and ds.id = ? join data_groups dg on dg.dataset_id = dt.dataset_id AND dg.relative_pathname IS NOT NULL join geotimes gt on gt.id = dg.geotime_id join geog_levels gl on gl.id = gt.geog_level_id and gl.istads_id IN ('state', 'county', 'nation') join data_files df on df.data_group_id = dg.id join time_series_components tsc on tsc.agg_data_var_id = adv.id join time_series ts on ts.id= tsc.time_series_id join time_series_tables_x_time_series x on x.time_series_id = ts.id join time_series_tables tst on tst.id = x.time_series_table_id LIMIT #{page},#{PAGE_SIZE}" #order by adv.istads_seq' #connection.execute([sanitize(sql), dataset_id, geog_level_id]) sql = send(:sanitize_sql_array, [sql, dataset_id]) connection.execute(sql) end |
.variables_count(dataset_id) ⇒ Object
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
# File 'app/models/nhgis/agg_data_var.rb', line 8 def self.variables_count(dataset_id) sql = 'select COUNT(ds.code) AS count from agg_data_vars adv join data_tables dt on dt.id = adv.data_table_id join datasets ds on ds.id = dt.dataset_id and ds.id = ? join data_groups dg on dg.dataset_id = dt.dataset_id AND dg.relative_pathname IS NOT NULL join geotimes gt on gt.id = dg.geotime_id join geog_levels gl on gl.id = gt.geog_level_id and gl.istads_id IN ("state", "county", "nation") join data_files df on df.data_group_id = dg.id join time_series_components tsc on tsc.agg_data_var_id = adv.id join time_series ts on ts.id= tsc.time_series_id join time_series_tables_x_time_series x on x.time_series_id = ts.id join time_series_tables tst on tst.id = x.time_series_table_id' sql = send(:sanitize_sql_array, [sql, dataset_id]) result = connection.execute(sql) count = result[0]['count'].to_i end |