Module: ForChrisLib

Includes:
ChrisLib, Math
Defined in:
lib/chris_lib/for_chris_lib.rb

Overview

Aggregated analytical helpers formerly housed in golf_lab.

Defined Under Namespace

Classes: ChiSquaredStdErr, Framed, PChiSquared

Constant Summary

Constants included from ChrisLib

ChrisLib::VERSION

Instance Method Summary collapse

Instance Method Details

#acf(x_a, lag) ⇒ Float

Autocorrelation at a specific lag.

Parameters:

  • x_a (Array<Numeric>)
  • lag (Integer)

Returns:

  • (Float)

Raises:



292
293
294
295
296
297
298
299
300
301
302
303
304
# File 'lib/chris_lib/for_chris_lib.rb', line 292

def acf(x_a, lag)
  raise ForChrisLibError, 'lag must be a non-negative Integer' unless lag.is_a?(Integer) && lag >= 0
  raise ForChrisLibError, 'x_a must respond to #size and #[ ]' unless x_a.respond_to?(:size) && x_a.respond_to?(:[])
  n = x_a.size
  raise "Lag is too large, n = #{n}, lag = #{lag}" if n < lag + 1

  mu = x_a.mean
  total = 0
  x_a[0..-(lag + 1)].each_with_index do |x, i|
    total += (x - mu) * (x_a[i + lag] - mu)
  end
  total.to_f / (n - lag) / x_a.var
end

#arbitrary_cdf_a(func, options, n_samples: 100) ⇒ Array<Array<Float, Float>>

Sample a cumulative distribution function for plotting.

Parameters:

  • func (Symbol)
  • options (Hash)
  • n_samples (Integer) (defaults to: 100)

Returns:

  • (Array<Array<Float, Float>>)

Raises:



505
506
507
508
509
510
511
512
513
514
# File 'lib/chris_lib/for_chris_lib.rb', line 505

def arbitrary_cdf_a(func, options, n_samples: 100)
  raise ForChrisLibError, 'n_samples must be greater than 1' unless n_samples.is_a?(Integer) && n_samples > 1
  raise ForChrisLibError, 'function must be defined' unless respond_to?(func)
  width = 8.0
  h = width / (n_samples - 1)
  x_a = (1..n_samples).map { |i| -width / 2 + (i - 1) * h }
  x_a.map do |x|
    [x, cdf_calc(x, func, options)]
  end
end

#bias_estimate_by_min(store, win_loss_calculator: nil, minimizer_class: nil) ⇒ Float

Estimate bias in a histogram by minimising win/loss difference between players.

Parameters:

  • store (#histogram, #min, #max)
  • win_loss_calculator (#win_loss_graph, #win_loss_stats) (defaults to: nil)
  • minimizer_class (Class) (defaults to: nil)

Returns:

  • (Float)


207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/chris_lib/for_chris_lib.rb', line 207

def bias_estimate_by_min(store, win_loss_calculator: nil, minimizer_class: nil)
  win_loss = win_loss_calculator || default_win_loss_calculator
  unless store.respond_to?(:histogram) && store.respond_to?(:min) && store.respond_to?(:max)
    raise ForChrisLibError, 'store must respond to :histogram, :min, and :max'
  end
  histogram_data = store.histogram
  unless histogram_data.respond_to?(:[]) && histogram_data[0]
    raise ForChrisLibError, 'store.histogram must include counts in the first slot'
  end

  fn = lambda do |x|
    bins = store.histogram[0].bin_shift(x)
    pdf = pdf_from_hist(bins, min: store.min)
    wl_graph = win_loss.win_loss_graph(nil, pdf: pdf)
    outcome = win_loss.win_loss_stats(wl_graph)[0]
    (outcome - 50.0)**2
  end

  minimizer_class ||= default_minimizer_class
  unless minimizer_class.respond_to?(:new)
    raise ForChrisLibError, 'minimizer_class must respond to .new'
  end
  minimizer = minimizer_class.new(store.min, store.max, fn)
  minimizer.expected = 0.0 if minimizer.respond_to?(:expected=)
  minimizer.iterate
  -minimizer.x_minimum
end

#cdf_calc(x, func, options, mu: 0, sigma: 1, n_pts: 100) ⇒ Float

Numerical integration helper for CDFs.

Parameters:

  • x (Numeric)
  • func (Symbol)
  • options (Hash)
  • mu (Numeric) (defaults to: 0)
  • sigma (Numeric) (defaults to: 1)
  • n_pts (Integer) (defaults to: 100)

Returns:

  • (Float)

Raises:



536
537
538
539
540
541
542
# File 'lib/chris_lib/for_chris_lib.rb', line 536

def cdf_calc(x, func, options, mu: 0, sigma: 1, n_pts: 100)
  raise "n_pts must be even (received n_pts=#{n_pts})" unless n_pts.even?
  raise ForChrisLibError, 'integration function must be defined' unless respond_to?(func)

  a = x - mu < -3 * sigma ? x - 2 * sigma + mu : -5 * sigma + mu
  simpson(func, a, x, n_pts, options)
end

#cdf_from_bins(bins, min = 0, delta = 1) ⇒ Hash{Numeric=>Float}

Cumulative distribution function derived from histogram bins.

Returns:

  • (Hash{Numeric=>Float})


396
397
398
# File 'lib/chris_lib/for_chris_lib.rb', line 396

def cdf_from_bins(bins, min = 0, delta = 1)
  pdf_from_bins(bins, min, delta).cdf_from_pdf
end

#computer_name_shortString

Returns hostname truncated to ten characters.

Returns:

  • (String)

    hostname truncated to ten characters



557
558
559
560
561
562
563
564
# File 'lib/chris_lib/for_chris_lib.rb', line 557

def computer_name_short
  host = `hostname`
  if host.nil? || host.empty?
    warn 'computer_name_short could not determine hostname'
    return nil
  end
  host[0..9]
end

#delimit(number, delimiter = ',', separator = '.') ⇒ String

Format a number with thousands delimiters.

Parameters:

  • number (Numeric, String)
  • delimiter (String) (defaults to: ',')
  • separator (String) (defaults to: '.')

Returns:

  • (String)

Raises:



549
550
551
552
553
554
# File 'lib/chris_lib/for_chris_lib.rb', line 549

def delimit(number, delimiter = ',', separator = '.')
  raise ForChrisLibError, 'number must respond to #to_s' unless number.respond_to?(:to_s)
  parts = number.to_s.split('.')
  parts[0].gsub!(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1#{delimiter}")
  parts.join separator
end

#fvu(y_hat_a:, y_a:) ⇒ Float

Fraction of variance unexplained given predictions and observations.

Parameters:

  • y_hat_a (Array<Numeric>)
  • y_a (Array<Numeric>)

Returns:

  • (Float)

Raises:



190
191
192
193
194
195
196
197
198
199
200
# File 'lib/chris_lib/for_chris_lib.rb', line 190

def fvu(y_hat_a:, y_a:)
  raise ForChrisLibError, 'y_hat_a must respond to #size and #zip' unless y_hat_a.respond_to?(:size) && y_hat_a.respond_to?(:zip)
  raise ForChrisLibError, 'y_a must respond to #size' unless y_a.respond_to?(:size)
  raise ForChrisLibError, 'y_hat_a must contain at least two values' if y_hat_a.size < 2
  raise ForChrisLibError, 'y_hat_a and y_a must be the same length' unless y_hat_a.size == y_a.size

  ss_err = y_hat_a.zip(y_a).sum { |yh, y| (y - yh)**2 }.to_f
  y_mu = y_a.mean
  ss_tot = y_a.sum { |y| (y - y_mu)**2 }.to_f
  ss_err / ss_tot
end

#inc_m2_var(x, accumulator) ⇒ Array<Numeric>

Incremental mean and second central moment accumulator.

Parameters:

  • x (Numeric)
  • accumulator (Array<Numeric>)
    mean, m2, n

Returns:

  • (Array<Numeric>)

Raises:



277
278
279
280
281
282
283
284
285
286
# File 'lib/chris_lib/for_chris_lib.rb', line 277

def inc_m2_var(x, accumulator)
  raise ForChrisLibError, 'accumulator must be an array of [mean, m2, n]' unless accumulator.is_a?(Array) && accumulator.size == 3
  mean, m2, n = accumulator
  n += 1
  delta = x - mean
  mean += delta.to_f / n
  delta2 = x - mean
  m2 += delta * delta2
  [mean, m2, n]
end

#interpolate(x, x_L, x_U, y_L, y_U) ⇒ Float

Linear interpolation between two points.

Returns:

  • (Float)


495
496
497
498
# File 'lib/chris_lib/for_chris_lib.rb', line 495

def interpolate(x, x_L, x_U, y_L, y_U)
  m = (y_U - y_L) / (x_U - x_L)
  (x - x_L) * m + y_L
end

#inverse_transform_rand(cdf_a) ⇒ Float

Inverse transform sampling based on a discretised CDF array.

Parameters:

  • cdf_a (Array<Array(Float, Float)>)

Returns:

  • (Float)

Raises:



479
480
481
482
483
484
485
486
487
488
489
490
491
# File 'lib/chris_lib/for_chris_lib.rb', line 479

def inverse_transform_rand(cdf_a)
  raise ForChrisLibError, 'cdf_a must be an array of coordinate pairs' unless cdf_a.respond_to?(:map) && cdf_a.all? { |pair| pair.is_a?(Array) && pair.size >= 2 }
  p_a = cdf_a.map { |pair| pair[1] }
  x_a = cdf_a.map { |pair| pair[0] }
  p_min = p_a.first
  p_max = p_a.last
  p_rand = rand
  return p_min if p_rand <= p_min
  return p_max if p_rand >= p_max

  i = p_a.find_index { |p| p > p_rand }
  interpolate(p_rand, p_a[i - 1], p_a[i], x_a[i - 1], x_a[i])
end

#normal_cdf(x) ⇒ Float

Standard normal cumulative distribution function.

Parameters:

  • x (Numeric)

Returns:

  • (Float)


415
416
417
# File 'lib/chris_lib/for_chris_lib.rb', line 415

def normal_cdf(x)
  0.5 * (1 + erf(x / sqrt(2)))
end

#normal_pdf(x, options = {}) ⇒ Float

Standard normal (or shifted) probability density function.

Parameters:

  • x (Numeric)
  • options (Hash) (defaults to: {})

    :mu and :sigma keys

Returns:

  • (Float)

Raises:



404
405
406
407
408
409
410
# File 'lib/chris_lib/for_chris_lib.rb', line 404

def normal_pdf(x, options = {})
  params = { mu: 0, sigma: 1 }.merge(options)
  mu = params[:mu]
  sigma = params[:sigma]
  raise ForChrisLibError, 'sigma must be positive' unless sigma.is_a?(Numeric) && sigma.positive?
  E**(-(x - mu)**2 / 2 / sigma**2) / sqrt(2 * PI) / sigma
end

#outcome(results) ⇒ Array<Float>

Compute probabilities of winning given an array of scores.

Parameters:

  • results (Array<Numeric>)

Returns:

  • (Array<Float>)

    probability mass for each input

Raises:



14
15
16
17
18
19
20
21
22
# File 'lib/chris_lib/for_chris_lib.rb', line 14

def outcome(results)
  raise ForChrisLibError, 'results must respond to #each' unless results.respond_to?(:each)
  results = results.to_a
  raise ForChrisLibError, 'results cannot be empty' if results.empty?
  s_min = results.min
  flags = results.map { |value| value == s_min ? 1 : 0 }
  total = flags.sum.nonzero? || 1
  flags.map { |value| value.to_f / total }
end

#parabola(x, options = {}) ⇒ Numeric

Evaluate quadratic polynomial with configurable coefficients.

Parameters:

  • x (Numeric)
  • options (Hash) (defaults to: {})

Returns:

  • (Numeric)


440
441
442
443
444
445
446
# File 'lib/chris_lib/for_chris_lib.rb', line 440

def parabola(x, options = {})
  params = { a: 2, b: 3, c: 4 }.merge(options)
  a = params[:a]
  b = params[:b]
  c = params[:c]
  a * x**2 + b * x + c
end

#pdf_from_bins(bins, min = 0, delta = 1) ⇒ Hash{Numeric=>Float}

Probability mass function derived from histogram bins.

Returns:

  • (Hash{Numeric=>Float})

Raises:



388
389
390
391
392
# File 'lib/chris_lib/for_chris_lib.rb', line 388

def pdf_from_bins(bins, min = 0, delta = 1)
  raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
  total = bins.sum.nonzero? || 1
  bins.each_with_index.map { |bin, i| [min * delta + i * delta, bin.to_f / total] }.to_h
end

#pdf_from_hist(bins, min: 0) ⇒ Hash{Integer=>Float}

Convert integer bin counts into a probability mass function.

Parameters:

  • bins (Array<Integer>)
  • min (Integer) (defaults to: 0)

Returns:

  • (Hash{Integer=>Float})


239
240
241
242
243
244
245
# File 'lib/chris_lib/for_chris_lib.rb', line 239

def pdf_from_hist(bins, min: 0)
  unless bins.respond_to?(:each_with_index) && bins.respond_to?(:sum)
    raise ForChrisLibError, 'bins must respond to #each_with_index and #sum'
  end
  total = bins.sum.nonzero? || 1
  bins.map.with_index { |b, i| [i + min, b.to_f / total] }.to_h
end

#simpson(func, a, b, n, options = {}) ⇒ Float

Simpson’s rule numerical integration for functions referenced by symbol.

Parameters:

  • func (Symbol)
  • a (Numeric)
  • b (Numeric)
  • n (Integer)

    even number of intervals

  • options (Hash) (defaults to: {})

Returns:

  • (Float)

Raises:



455
456
457
458
459
460
461
462
463
464
# File 'lib/chris_lib/for_chris_lib.rb', line 455

def simpson(func, a, b, n, options = {})
  raise "n must be even (received n=#{n})" unless n.even?
  raise ForChrisLibError, 'integration function must be defined' unless respond_to?(func)

  h = (b - a).to_f / n
  s = send(func, a, options) + send(func, b, options)
  (1..n).step(2) { |i| s += 4 * send(func, a + i * h, options) }
  (2..n - 1).step(2) { |i| s += 2 * send(func, a + i * h, options) }
  s * h / 3
end

#skew_normal_cdf_a(options, n_samples: 100) ⇒ Array<Array<Float, Float>>

Discretised skew-normal cumulative distribution function.

Returns:

  • (Array<Array<Float, Float>>)

Raises:



518
519
520
521
522
523
524
525
526
# File 'lib/chris_lib/for_chris_lib.rb', line 518

def skew_normal_cdf_a(options, n_samples: 100)
  raise ForChrisLibError, 'n_samples must be greater than 1' unless n_samples.is_a?(Integer) && n_samples > 1
  width = 8.0
  h = width / (n_samples - 1)
  x_a = (1..n_samples).map { |i| -width / 2 + (i - 1) * h }
  x_a.map do |x|
    [x, cdf_calc(x, :skew_normal_pdf, options)]
  end
end

#skew_normal_pdf(x, options = { alpha: 0 }) ⇒ Float

Skew-normal probability density function using alpha parameterisation.

Parameters:

  • x (Numeric)
  • options (Hash) (defaults to: { alpha: 0 })

Returns:

  • (Float)

Raises:



423
424
425
426
427
428
# File 'lib/chris_lib/for_chris_lib.rb', line 423

def skew_normal_pdf(x, options = { alpha: 0 })
  params = { alpha: 0 }.merge(options)
  alpha = params[:alpha]
  raise ForChrisLibError, 'alpha must be numeric' unless alpha.is_a?(Numeric)
  2 * normal_pdf(x) * normal_cdf(alpha * x)
end

#skew_normal_rand(_x, options = { alpha: 0 }) ⇒ Float

Placeholder skew-normal sampler backed by numerical integration.

Returns:

  • (Float)


432
433
434
# File 'lib/chris_lib/for_chris_lib.rb', line 432

def skew_normal_rand(_x, options = { alpha: 0 })
  cdf_calc(rand, :normal_pdf, { mu: 2, sigma: 4 }, n_pts: 100, sigma: 4, mu: 2)
end

#skew_normal_rand_a(n, alpha) ⇒ Array<Float>

Generate random samples from the skew-normal distribution using inverse transform.

Parameters:

  • n (Integer)
  • alpha (Numeric)

Returns:

  • (Array<Float>)

Raises:



470
471
472
473
474
# File 'lib/chris_lib/for_chris_lib.rb', line 470

def skew_normal_rand_a(n, alpha)
  raise ForChrisLibError, 'n must be a positive Integer' unless n.is_a?(Integer) && n.positive?
  cdf_a = arbitrary_cdf_a(:skew_normal_pdf, { alpha: alpha })
  (1..n).map { inverse_transform_rand(cdf_a) }
end

#summed_bins_histogram(x_y, n_bins) ⇒ Array<Array<Float, Numeric, Integer>>

Sum y values into equi-width x bins.

Parameters:

  • x_y (Array<Array(Float, Float)>)
  • n_bins (Integer)

Returns:

  • (Array<Array<Float, Numeric, Integer>>)

Raises:



251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
# File 'lib/chris_lib/for_chris_lib.rb', line 251

def summed_bins_histogram(x_y, n_bins)
  raise ForChrisLibError, 'x_y must respond to #transpose' unless x_y.respond_to?(:transpose)
  raise ForChrisLibError, 'n_bins must be a positive Integer' unless n_bins.is_a?(Integer) && n_bins.positive?
  x_a = x_y.transpose[0]
  y_a = x_y.transpose[1]
  min = x_a.min
  max = x_a.max
  bin_sums = Array.new(n_bins, 0)
  bins = Array.new(n_bins, 0)
  delta = (max - min).to_f / n_bins

  x_a.each_with_index do |x, i|
    j = [((x - min).to_f / delta), n_bins - 1].min
    bin_sums[j] += y_a[i]
    bins[j] += 1
  end

  bin_sums.each_with_index.map do |bin_sum, i|
    [min + delta / 2 + i * delta, bin_sum, bins[i]]
  end
end

#testString

Returns sentinel used in legacy tests.

Returns:

  • (String)

    sentinel used in legacy tests



182
183
184
# File 'lib/chris_lib/for_chris_lib.rb', line 182

def test
  'here'
end

#weighted_m_3(bins, mu, min = 0, delta = 1) ⇒ Float?

Weighted third central moment.

Returns:

  • (Float, nil)

Raises:



358
359
360
361
362
363
364
365
366
367
368
369
# File 'lib/chris_lib/for_chris_lib.rb', line 358

def weighted_m_3(bins, mu, min = 0, delta = 1)
  raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
  raise ForChrisLibError, 'mu must be Numeric' unless mu.is_a?(Numeric)
  n = bins.sum
  return if n < 1

  sum = bins.each_with_index.sum do |w, i|
    v = min * delta + i * delta
    (v - mu)**3 * w
  end
  sum / n
end

#weighted_m_4(bins, mu, min = 0, delta = 1) ⇒ Float?

Weighted fourth central moment.

Returns:

  • (Float, nil)

Raises:



373
374
375
376
377
378
379
380
381
382
383
384
# File 'lib/chris_lib/for_chris_lib.rb', line 373

def weighted_m_4(bins, mu, min = 0, delta = 1)
  raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
  raise ForChrisLibError, 'mu must be Numeric' unless mu.is_a?(Numeric)
  n = bins.sum
  return if n < 1

  sum = bins.each_with_index.sum do |w, i|
    v = min * delta + i * delta
    (v - mu)**4 * w
  end
  sum / n
end

#weighted_mean(bins, min = 0, delta = 1) ⇒ Float?

Weighted mean based on histogram bins.

Parameters:

  • bins (Array<Numeric>)
  • min (Numeric) (defaults to: 0)
  • delta (Numeric) (defaults to: 1)

Returns:

  • (Float, nil)

Raises:



311
312
313
314
315
316
317
318
319
# File 'lib/chris_lib/for_chris_lib.rb', line 311

def weighted_mean(bins, min = 0, delta = 1)
  raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
  return nil if bins.sum.zero?

  sum = bins.each_with_index.sum do |w, i|
    (min * delta + i * delta) * w
  end
  sum.to_f / bins.sum
end

#weighted_sd(bins, mu, min = 0, delta = 1) ⇒ Float?

Weighted sample standard deviation.

Parameters:

  • bins (Array<Numeric>)
  • mu (Numeric)
  • min (Numeric) (defaults to: 0)
  • delta (Numeric) (defaults to: 1)

Returns:

  • (Float, nil)

Raises:



327
328
329
330
331
332
333
334
335
336
337
# File 'lib/chris_lib/for_chris_lib.rb', line 327

def weighted_sd(bins, mu, min = 0, delta = 1)
  raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
  raise ForChrisLibError, 'mu must be Numeric' unless mu.is_a?(Numeric)
  return nil if bins.sum < 2

  sum = bins.each_with_index.sum do |w, i|
    v = min * delta + i * delta
    (v - mu)**2 * w
  end
  sqrt(sum / (bins.sum - 1))
end

#weighted_skewness(bins, mu, min = 0, delta = 1) ⇒ Float?

Weighted skewness using the third central moment.

Parameters:

  • bins (Array<Numeric>)
  • mu (Numeric)
  • min (Numeric) (defaults to: 0)
  • delta (Numeric) (defaults to: 1)

Returns:

  • (Float, nil)

Raises:



345
346
347
348
349
350
351
352
353
354
# File 'lib/chris_lib/for_chris_lib.rb', line 345

def weighted_skewness(bins, mu, min = 0, delta = 1)
  raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
  raise ForChrisLibError, 'mu must be Numeric' unless mu.is_a?(Numeric)
  n = bins.sum
  return nil if n < 2

  third_moment = weighted_m_3(bins, mu, min, delta)
  sd = weighted_sd(bins, mu, min, delta)
  third_moment / sd**3
end