Class: Statsample::StratifiedSample

Inherits:
Object
  • Object
show all
Defined in:
lib/statsample/multiset.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(ms, strata_sizes) ⇒ StratifiedSample

Returns a new instance of StratifiedSample.

Raises:

  • (TypeError)


203
204
205
206
207
208
209
210
211
# File 'lib/statsample/multiset.rb', line 203

def initialize(ms,strata_sizes)
  raise TypeError,"ms should be a Multiset" unless ms.is_a? Statsample::Multiset
  @ms=ms
  raise ArgumentError,"You should put a strata size for each dataset" if strata_sizes.keys.sort!=ms.datasets_names
  @strata_sizes=strata_sizes
  @population_size=@strata_sizes.inject(0) { |a,x| a+x[1] }
  @strata_number=@ms.n_datasets
  @sample_size=@ms.datasets.inject(0) { |a,x| a+x[1].nrows }
end

Class Method Details

.calculate_n_total(es) ⇒ Object



121
122
123
# File 'lib/statsample/multiset.rb', line 121

def calculate_n_total(es)
  es.inject(0) {|a,h| a+h['N'] }
end

.mean(*vectors) ⇒ Object

mean for an array of vectors



99
100
101
102
103
104
105
106
# File 'lib/statsample/multiset.rb', line 99

def mean(*vectors)
  n_total=0
  means=vectors.inject(0){|a,v|
    n_total+=v.size
    a+v.sum
  }
  means.to_f/n_total
end

.proportion_sd_esd_wor(es) ⇒ Object



198
199
200
# File 'lib/statsample/multiset.rb', line 198

def proportion_sd_esd_wor(es)
    Math::sqrt(proportion_variance_ksd_wor(es))
end

.proportion_sd_ksd_wor(es) ⇒ Object



172
173
174
# File 'lib/statsample/multiset.rb', line 172

def proportion_sd_ksd_wor(es)
    Math::sqrt(proportion_variance_ksd_wor(es))
end

.proportion_sd_ksd_wr(es) ⇒ Object



177
178
179
180
181
182
183
184
# File 'lib/statsample/multiset.rb', line 177

def proportion_sd_ksd_wr(es)
  n_total=calculate_n_total(es)
  sum=es.inject(0){|a,h|
    val= (h['N']**2 * h['p']*(1-h['p'])) / h['n'].to_f
    a+val
  }
  Math::sqrt(sum) * (1.0/n_total)
end

.proportion_variance_esd_wor(es) ⇒ Object



189
190
191
192
193
194
195
196
197
# File 'lib/statsample/multiset.rb', line 189

def proportion_variance_esd_wor(es)
  n_total=n_total=calculate_n_total(es)
  
  sum=es.inject(0){|a,h|
    a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
    a+val
  }
  Math::sqrt(sum) * (1.0/n_total**2)
end

.proportion_variance_ksd_wor(es) ⇒ Object



165
166
167
168
169
170
171
# File 'lib/statsample/multiset.rb', line 165

def proportion_variance_ksd_wor(es)
  n_total=calculate_n_total(es)
    es.inject(0){|a,h|
      val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
      a+val
    }
end

.proportion_variance_ksd_wr(es) ⇒ Object



185
186
187
# File 'lib/statsample/multiset.rb', line 185

def proportion_variance_ksd_wr(es)
    proportion_variance_ksd_wor(es)**2
end

.standard_error_esd_wor(es) ⇒ Object



149
150
151
# File 'lib/statsample/multiset.rb', line 149

def standard_error_esd_wor(es)
  Math::sqrt(variance_ksd_wor(es))
end

.standard_error_esd_wr(es) ⇒ Object



161
162
163
# File 'lib/statsample/multiset.rb', line 161

def standard_error_esd_wr(es)
  Math::sqrt(variance_esd_wr(es))
end

.standard_error_ksd_wor(es) ⇒ Object



133
134
135
# File 'lib/statsample/multiset.rb', line 133

def standard_error_ksd_wor(es)
  Math::sqrt(variance_ksd_wor(es))
end

.standard_error_ksd_wr(es) ⇒ Object



108
109
110
111
112
113
114
115
# File 'lib/statsample/multiset.rb', line 108

def standard_error_ksd_wr(es)
  n_total=0
  sum=es.inject(0){|a,h|
      n_total+=h['N']
      a+((h['N']**2 * h['s']**2) / h['n'].to_f)
  }
  (1.to_f / n_total)*Math::sqrt(sum)
end

.variance_esd_wor(es) ⇒ Object



139
140
141
142
143
144
145
146
# File 'lib/statsample/multiset.rb', line 139

def variance_esd_wor(es)
  n_total=calculate_n_total(es)
  sum=es.inject(0){|a,h|
    val=h['N']*(h['N']-h['n'])*(h['s']**2 / h['n'].to_f)
    a+val
  }
  (1.0/(n_total**2))*sum
end

.variance_esd_wr(es) ⇒ Object



153
154
155
156
157
158
159
160
# File 'lib/statsample/multiset.rb', line 153

def variance_esd_wr(es)
  n_total=calculate_n_total(es)
    sum=es.inject(0){|a,h|
      val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
      a+val
    }
    (1.0/(n_total**2))*sum
end

.variance_ksd_wor(es) ⇒ Object

Source : Cochran (1972)



126
127
128
129
130
131
132
# File 'lib/statsample/multiset.rb', line 126

def variance_ksd_wor(es)
n_total=calculate_n_total(es)
es.inject(0){|a,h|
  val=((h['N'].to_f / n_total)**2) * (h['s']**2 / h['n'].to_f) * (1 - (h['n'].to_f / h['N']))
  a+val
}
end

.variance_ksd_wr(es) ⇒ Object



118
119
120
# File 'lib/statsample/multiset.rb', line 118

def variance_ksd_wr(es)
  standard_error_ksd_wr(es)**2
end

Instance Method Details

#mean(field) ⇒ Object

Population mean based on strata



248
249
250
251
252
# File 'lib/statsample/multiset.rb', line 248

def mean(field)
  @ms.sum_field(field) {|s_name,vector|
  stratum_ponderation(s_name)*vector.mean
  }
end

#population_sizeObject

Population size. Equal to sum of strata sizes Symbol: N<sub>h</sub>



218
219
220
# File 'lib/statsample/multiset.rb', line 218

def population_size
  @population_size
end

#proportion(field, v = 1) ⇒ Object

Population proportion based on strata



235
236
237
238
239
# File 'lib/statsample/multiset.rb', line 235

def proportion(field, v=1)
  @ms.sum_field(field) {|s_name,vector|
  stratum_ponderation(s_name)*vector.proportion(v)
  }
end

#proportion_sd_esd_wor(field, v = 1) ⇒ Object



281
282
283
284
285
286
287
# File 'lib/statsample/multiset.rb', line 281

def proportion_sd_esd_wor(field,v=1)
  es=@ms.collect_vector(field) {|s_n, vector|
    {'N'=>@strata_sizes[s_n],'n'=>vector.size, 'p'=>vector.proportion(v)}
  }
  
  StratifiedSample.proportion_sd_esd_wor(es)
end

#proportion_standard_error(field, v = 1) ⇒ Object



289
290
291
292
293
294
295
296
297
# File 'lib/statsample/multiset.rb', line 289

def proportion_standard_error(field,v=1)
  prop=proportion(field,v)
  sum=@ms.sum_field(field) {|s_name,vector|
    nh=vector.size
    s_size=@strata_sizes[s_name]
    (s_size**2 * (1-(nh / s_size)) * prop * (1-prop) / (nh - 1 ))
  }
  (1.quo(@population_size)) * Math::sqrt(sum)
end

#sample_sizeObject

Sample size. Equal to sum of sample of each stratum



222
223
224
# File 'lib/statsample/multiset.rb', line 222

def sample_size
  @sample_size
end

#standard_error_wor(field) ⇒ Object

Standard error with estimated population variance and without replacement. Source: Cochran (1972)



255
256
257
258
259
260
261
# File 'lib/statsample/multiset.rb', line 255

def standard_error_wor(field)
  es=@ms.collect_vector(field) {|s_n, vector|
    {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
  }
  
  StratifiedSample.standard_error_esd_wor(es)
end

#standard_error_wor_2(field) ⇒ Object

Standard error with estimated population variance and without replacement. Source: stattrek.com/Lesson6/STRAnalysis.aspx



266
267
268
269
270
271
272
# File 'lib/statsample/multiset.rb', line 266

def standard_error_wor_2(field)
  sum=@ms.sum_field(field) {|s_name,vector|
    s_size=@strata_sizes[s_name]
  (s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
  }
  (1/@population_size.to_f)*Math::sqrt(sum)
end

#standard_error_wr(field) ⇒ Object



274
275
276
277
278
279
280
# File 'lib/statsample/multiset.rb', line 274

def standard_error_wr(field)
  es=@ms.collect_vector(field) {|s_n, vector|
    {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
  }
  
  StratifiedSample.standard_error_esd_wr(es)
end

#strata_numberObject

Number of strata



213
214
215
# File 'lib/statsample/multiset.rb', line 213

def strata_number
  @strata_number
end

#stratum_ponderation(h) ⇒ Object Also known as: wh

Stratum ponderation. Symbol: W<sub>h</sub>



242
243
244
# File 'lib/statsample/multiset.rb', line 242

def stratum_ponderation(h)
  @strata_sizes[h].to_f / @population_size
end

#stratum_size(h) ⇒ Object

Size of stratum x



226
227
228
# File 'lib/statsample/multiset.rb', line 226

def stratum_size(h)
  @strata_sizes[h]
end

#variance_pst(field, v = 1) ⇒ Object

Cochran(1971), p. 150



299
300
301
302
303
304
305
306
307
308
309
# File 'lib/statsample/multiset.rb', line 299

def variance_pst(field,v=1)
  sum=@ms.datasets.inject(0) {|a,da|
    stratum_name=da[0]
    ds=da[1]
    nh=ds.cases.to_f
    s_size=@strata_sizes[stratum_name]
    prop=ds[field].proportion(v)
    a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
  }
  (1/@population_size.to_f ** 2)*sum
end

#vectors_by_field(field) ⇒ Object



229
230
231
232
233
# File 'lib/statsample/multiset.rb', line 229

def vectors_by_field(field)
  @ms.datasets.collect{|k,ds|
    ds[field]
  }
end