Module: EverydayCliUtils::KmeansUtil
- Defined in:
- lib/everyday-cli-utils/safe/kmeans.rb
Class Method Summary collapse
- .f_test(clusters, means, cnt, avg) ⇒ Object
- .f_test2(clusters, means, cnt) ⇒ Object
- .f_test2_calc(clusters, i, means, uv) ⇒ Object
- .f_test_ev(avg, clusters, cnt2, means) ⇒ Object
- .f_test_uv(clusters, cnt, cnt2, means) ⇒ Object
- .f_test_uvi(clusters, i, means, uv) ⇒ Object
- .find_outliers(avg, cs, i, sensitivity) ⇒ Object
- .get_clusters(collection, means) ⇒ Object
- .normal(x, avg, std) ⇒ Object
Class Method Details
.f_test(clusters, means, cnt, avg) ⇒ Object
10 11 12 13 14 15 |
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 10 def self.f_test(clusters, means, cnt, avg) cnt2 = clusters.count { |i| !i.empty? } ev = f_test_ev(avg, clusters, cnt2, means) uv = f_test_uv(clusters, cnt, cnt2, means) (ev / uv) end |
.f_test2(clusters, means, cnt) ⇒ Object
34 35 36 37 38 39 |
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 34 def self.f_test2(clusters, means, cnt) uv = 0.0 cnt2 = clusters.count { |i| !i.empty? } (0...means.count).each { |i| uv += f_test2_calc(clusters, i, means, uv) unless clusters[i].empty? } (uv / (cnt - cnt2)) end |
.f_test2_calc(clusters, i, means, uv) ⇒ Object
41 42 43 44 45 46 |
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 41 def self.f_test2_calc(clusters, i, means, uv) tmp = 0.0 (0...clusters[i].count).each { |j| tmp += (clusters[i][j] - means[i]) ** 2.0 } tmp /= clusters[i].count Math.sqrt(tmp) end |
.f_test_ev(avg, clusters, cnt2, means) ⇒ Object
17 18 19 20 21 |
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 17 def self.f_test_ev(avg, clusters, cnt2, means) ev = 0.0 (0...means.count).each { |i| ev += clusters[i].empty? ? 0.0 : clusters[i].count * ((means[i] - avg) ** 2.0) } ev / (cnt2 - 1.0) end |
.f_test_uv(clusters, cnt, cnt2, means) ⇒ Object
23 24 25 26 27 |
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 23 def self.f_test_uv(clusters, cnt, cnt2, means) uv = 0.0 (0...means.count).each { |i| uv = f_test_uvi(clusters, i, means, uv) } uv / (cnt - cnt2) end |
.f_test_uvi(clusters, i, means, uv) ⇒ Object
29 30 31 32 |
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 29 def self.f_test_uvi(clusters, i, means, uv) (0...clusters[i].count).each { |j| uv += (clusters[i][j] - means[i]) * (clusters[i][j] - means[i]) } unless clusters[i].empty? uv end |
.find_outliers(avg, cs, i, sensitivity) ⇒ Object
65 66 67 68 69 70 |
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 65 def self.find_outliers(avg, cs, i, sensitivity) csi = cs[i] std = EverydayCliUtils::MapUtil.std_dev(csi) cnt = csi.count csi.select { |c| (normal(c, avg, std) * cnt) < sensitivity } end |
.get_clusters(collection, means) ⇒ Object
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 48 def self.get_clusters(collection, means) clusters = Array.new(means.count) { Array.new } collection.each { |item| cluster = false distance = false (0...means.count).each { |i| diff = (means[i] - item) ** 2 if distance == false || diff <= distance cluster = i distance = diff end } clusters[cluster] << item } clusters end |
.normal(x, avg, std) ⇒ Object
5 6 7 8 |
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 5 def self.normal(x, avg, std) exp = -(((x - avg) / std) ** 2.0) / 2.0 ((Math.exp(exp) / (std * Math.sqrt(2.0 * Math::PI)))) end |