Class: TerrapopSample

Inherits:
ActiveRecord::Base
  • Object
show all
Defined in:
app/models/terrapop_sample.rb

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.from_nhgis(nhgis_dataset, year = nil) ⇒ Object

NHGIS Ingest related



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'app/models/terrapop_sample.rb', line 121

def self.from_nhgis(nhgis_dataset, year = nil)
  #When provided a dataset from NHGIS, create a new UNSAVED terrapop sample setting five (ehem... four) attributes:
  #sample_id: integer,        ==>   nil
  #label: string,             ==>   Full country name with year, e.g. "Armenia 2000"; from NHGIS, this could be (from NHGIS): United States 1790 Census (NHGIS)
  #country_id: integer,       ==>   Country.find_by(short_name: :us).id
  #year: integer,             ==>   Note: might be nil and then set begin_year and end_year
  #short_country_name: string ==>   'US'  (notice, capital letters, versus the lower case in the above Country query)

  #generate a new instance of TerrapopSample
  terrapop_sample = self.new

  #associate the terrapop sample to the NHGIS dataset
  terrapop_sample.nhgis_dataset_id = nhgis_dataset.id

  #set the country code and the short label to the United States : NHGIS covers only the US (and PR)
  terrapop_sample.country_id = Country.find_by(short_name: :us).id
  terrapop_sample.short_country_name = "US"

  #set the label from the NHGIS dataset
  terrapop_sample.label = nhgis_dataset.terrapop_label

  #set the short_label from the NHGIS dataset
  terrapop_sample.short_label = nhgis_dataset.terrapop_short_label

  terrapop_sample.source_project = 'NHGIS'

  #set the year or years:  most NHGIS datasets cover only one year; a few others cover many, e.g. ACS 2009-2013
  #NOTE terrapop_years returns a label, so split on "-" -- if one exists at all
  terrapop_years = nhgis_dataset.terrapop_years       #prepare the list of years
  case terrapop_years.size
  when 2
    terrapop_sample.begin_year = terrapop_years.first
    terrapop_sample.end_year = terrapop_years.last
  when 1
    terrapop_sample.year = terrapop_years.first
  else
    raise "invalid value for terrapop_years: #{terrapop_years}" if year.nil?
    if year.is_a? Array and year.size == 2
      terrapop_sample.begin_year = year.first
      terrapop_sample.end_year   = year.last
    else
      terrapop_sample.year = year
    end
  end
  #do not save the terrapop sample here; save it elsewhere
  terrapop_sample
end

.long_citation(terrapop_samples) ⇒ Object



279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# File 'app/models/terrapop_sample.rb', line 279

def self.long_citation(terrapop_samples)

  if terrapop_samples.count > 0
    str = []
    terrapop_samples.each do |tps|
      str << tps.long_citation
      str << ""
    end
    str << ""
    str.join("\n")
  else
    ""
  end
  
end

.long_description(terrapop_samples) ⇒ Object



218
219
220
221
222
223
224
225
226
227
228
# File 'app/models/terrapop_sample.rb', line 218

def self.long_description(terrapop_samples)
  if terrapop_samples.count > 0
    str = []
    terrapop_samples.each do |tps|
      str << tps.long_description
    end
    str.join("\n")
  else
    "No Area-level Datasets"
  end
end

Instance Method Details

#country_name_long_year(pieces = false) ⇒ Object



242
243
244
245
246
247
248
249
250
251
252
253
254
# File 'app/models/terrapop_sample.rb', line 242

def country_name_long_year(pieces = false)
  yr = if year.nil?
    begin_year.to_s + "-" + end_year.to_s
  else
    year
  end
  
  unless pieces
    country.full_name + " " + yr.to_s
  else
    {country_name: country.full_name, long_year: yr.to_s}
  end
end

#country_name_yearObject



231
232
233
234
235
236
237
238
239
# File 'app/models/terrapop_sample.rb', line 231

def country_name_year
  yr = if year.nil?
    begin_year.to_s[-2..-1] + end_year.to_s[-2..-1]
  else
    year
  end
  
  country.full_name + " " + yr.to_s
end

#dataset_identifierObject

Generate/Retrieve Short Dataset Identifier



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'app/models/terrapop_sample.rb', line 78

def dataset_identifier
  if sample_id.nil?
    # Terrapop Exclusive or NHGIS backed dataset
    dataset = short_country_name + (year.nil? ? (!(begin_year.nil? && end_year.nil?) && (begin_year.to_s[-2,2] + end_year.to_s[-2,2]) || '') : year.to_s[-2,2])
    if nhgis_dataset_id.nil?
      # Terrapop Exclusive dataset
      dataset += "_TP"
    else
      # NHGIS backed dataset
      dataset += "_NHGIS"
    end
    dataset
  else
    sample.name
  end
end

#is_tabulated?Boolean

Returns:

  • (Boolean)


178
179
180
# File 'app/models/terrapop_sample.rb', line 178

def is_tabulated?
  !sample_id.nil?
end

#long_citationObject



257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
# File 'app/models/terrapop_sample.rb', line 257

def long_citation
  str = []
  snly = short_name_long_year
  cnly = country_name_long_year(true)  # true => break the country and year into pieces; hash
  
  str << snly[:short_name] + " " + cnly[:country_name] + " " + snly[:long_year].to_s
  
  unless is_tabulated?
    
    str << "<citation for " + cnly[:country_name] + " " + snly[:long_year].to_s + " Census>.  Source data downloaded from <URL> on <date>"

  else
    
    str << "Tabulated from IPUMS-I microdata (https://international.ipums.org/international/)."
    str << sample_detail_values_fields["stats_office"][:value]
    
  end
  
  str.join("\n")
end

#long_descriptionObject



191
192
193
194
195
196
197
# File 'app/models/terrapop_sample.rb', line 191

def long_description
  
  snly = short_name_long_year
  
  snly[:short_name] + " " + country.full_name + " " + snly[:long_year].to_s + " " + tabulated_or_published 
  
end

#lowest_sample_geog_levelObject



21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'app/models/terrapop_sample.rb', line 21

def lowest_sample_geog_level
  geog_units_to_sgls = sample_geog_levels.joins(:country_level).joins("JOIN geog_units gu ON gu.id = country_levels.geog_unit_id").map{ |sgl| {sgl.country_level.geog_unit.code => sgl.id} }.reduce Hash.new, :merge
  if geog_units_to_sgls.count > 0
    if geog_units_to_sgls.keys.include? 'HSLAD'
      return SampleGeogLevel.find(geog_units_to_sgls['HSLAD'])
    elsif geog_units_to_sgls.keys.include? 'SLAD'
      return SampleGeogLevel.find(geog_units_to_sgls['SLAD'])
    elsif geog_units_to_sgls.keys.include? 'HFLAD'
      return SampleGeogLevel.find(geog_units_to_sgls['HFLAD'])
    elsif geog_units_to_sgls.keys.include? 'FLAD'
      return SampleGeogLevel.find(geog_units_to_sgls['FLAD'])
    elsif geog_units_to_sgls.keys.include? 'NAT'
      return SampleGeogLevel.find(geog_units_to_sgls['NAT'])
    end
  end
  # empty -- this is an error with metadata
  raise "No GeogUnits for TerrapopSample[#{self.id}]"
end

#map_for_level(lev_name) ⇒ Object

call with the code given in GeogUnits - NAT, FLAD, etc… If that code isn’t found in GeogUnits, return nil.



43
44
45
46
47
48
49
50
# File 'app/models/terrapop_sample.rb', line 43

def map_for_level(lev_name)
  country_level = get_country_level(lev_name)
  if country_level.nil?
    nil
  else
    country_level.maps.find_by(terrapop_sample_id: id)
  end
end

#redact_from_small_regions(region_size) ⇒ Object



109
110
111
112
113
114
115
116
117
# File 'app/models/terrapop_sample.rb', line 109

def redact_from_small_regions(region_size)
  sample.density == 100.0 or raise "Can only redact data from full count (100% sample)."
  region_count_var_ids = area_data_variables.joins(:measurement_type).where(mnemonic: :TOTPOP, measurement_types: {label: 'Count'}).pluck(:id).uniq
  region_counts = AreaDataValue.all_data.where(area_data_variable_id: region_count_var_ids)
  small_regions = region_counts.where(["value <= ?", region_size])
  small_region_ids = small_regions.all.map{ |r| r.geog_instance.id }
  values_to_redact = AreaDataValue.where(geog_instance_id: small_region_ids)
  values_to_redact.update_all(value: -1.0)
end

#sample_geog_level_for_code(lev_name) ⇒ Object

call with the code given in GeogUnits - NAT, FLAD, etc…



60
61
62
63
64
65
66
67
# File 'app/models/terrapop_sample.rb', line 60

def sample_geog_level_for_code(lev_name)
  country_level = get_country_level(lev_name)
  if country_level.nil?
    nil
  else
    sample_geog_level_for_country_level(country_level)
  end
end

#sample_geog_level_for_country_level(country_level) ⇒ Object

call with a country_level instance



54
55
56
# File 'app/models/terrapop_sample.rb', line 54

def sample_geog_level_for_country_level(country_level)
  SampleGeogLevel.where(country_level_id: country_level.id, terrapop_sample_id: id).first
end

#short_country_name_long_yearObject



70
71
72
# File 'app/models/terrapop_sample.rb', line 70

def short_country_name_long_year
  short_country_name + year.to_s
end

#short_name_long_yearObject



199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
# File 'app/models/terrapop_sample.rb', line 199

def short_name_long_year
  if sample_id.nil?
    country_code = short_country_name.downcase
    
    short_yr = if year.nil?
      begin_year.to_s[-2..-1] + end_year.to_s[-2..-1]
    else
      year
    end
    long_year = begin_year.to_s + "-" + end_year.to_s
    short_name = country_code + short_yr.to_s
  else
    long_year = year
    short_name = sample.name
  end
  
  {long_year: long_year, short_name: short_name}
end

#smooth_small_counts(threshold, new_value) ⇒ Object

Privacy Controls for Small Samples

Should only apply to samples with 100% sampling / complete count data



100
101
102
103
104
105
106
# File 'app/models/terrapop_sample.rb', line 100

def smooth_small_counts(threshold, new_value)
  sample.density == 100.0 or raise "Can only redact data from full count (100% sample)."
  # Get count variables attached to this sample
  vars_to_smooth = area_data_variables.joins(:measurement_type).where(measurement_types: {label: 'Count'}).pluck(:id).uniq
  data_to_update = AreaDataValue.all_data.where(area_data_variable_id: vars_to_smooth).where(['value <= ? and value >=0.0', threshold])
  data_to_update.update_all(value: new_value)
end

#tabulated_or_publishedObject



169
170
171
172
173
174
175
# File 'app/models/terrapop_sample.rb', line 169

def tabulated_or_published
  unless is_tabulated?
    "published area-level data"
  else
    "tabulated from " + tabulated_sample_size[:value] + " microdata sample"
  end
end

#tabulated_sample_sizeObject



183
184
185
186
187
188
189
# File 'app/models/terrapop_sample.rb', line 183

def tabulated_sample_size
  if sample_detail_values.size > 0
    sample_detail_values_fields['sample_fraction']
  else
    [value: 'N/A', label: "Sample fraction"]
  end
end