Module: EverydayCliUtils::Kmeans
- Defined in:
- lib/everyday-cli-utils/safe/kmeans.rb
Class Method Summary collapse
- .kmeans(collection, k) ⇒ Object
- .nmeans(collection, max_k = 10, threshold = 0.05) ⇒ Object
- .nmeans_setup_1(collection) ⇒ Object
- .nmeans_setup_2(collection, avg, cnt, ks1) ⇒ Object
- .outliers(collection, sensitivity = 0.5, k = nil) ⇒ Object
- .run_kmean(collection, ks) ⇒ Object
- .run_nmean(collection, avg, cnt, ft, ft2, k, ks) ⇒ Object
- .run_nmeans(avg, cnt, collection, ft, ft1, ft2, ks, ks1, max_k, threshold) ⇒ Object
Class Method Details
.kmeans(collection, k) ⇒ Object
122 123 124 125 126 127 128 129 130 131 132 133 |
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 122 def self.kmeans(collection, k) mi = collection.min ma = collection.max diff = ma - mi ks = [] (1..k).each { |i| ks[i - 1] = mi + (i * (diff / (k + 1.0))) } kso = false while ks != kso kso, ks = run_kmean(collection, ks) end ks end |
.nmeans(collection, max_k = 10, threshold = 0.05) ⇒ Object
135 136 137 138 139 140 141 |
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 135 def self.nmeans(collection, max_k = 10, threshold = 0.05) collection = EverydayCliUtils::MapUtil.floats(collection) avg, cnt, ks1 = nmeans_setup_1(collection) return ks1 if cnt == 1 ft, ft1, ft2, ks = nmeans_setup_2(collection, avg, cnt, ks1) run_nmeans(avg, cnt, collection, ft, ft1, ft2, ks, ks1, max_k, threshold) end |
.nmeans_setup_1(collection) ⇒ Object
73 74 75 76 77 78 79 |
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 73 def self.nmeans_setup_1(collection) su = EverydayCliUtils::MapUtil.sum(collection) cnt = collection.count avg = su / cnt ks1 = kmeans(collection, 1) return avg, cnt, ks1 end |
.nmeans_setup_2(collection, avg, cnt, ks1) ⇒ Object
81 82 83 84 85 86 87 88 89 |
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 81 def self.nmeans_setup_2(collection, avg, cnt, ks1) cso = EverydayCliUtils::KmeansUtil.get_clusters(collection, ks1) ft1 = EverydayCliUtils::KmeansUtil.f_test2(cso, ks1, cnt) ks = kmeans(collection, 2) cs = EverydayCliUtils::KmeansUtil.get_clusters(collection, ks) ft = EverydayCliUtils::KmeansUtil.f_test(cs, ks, cnt, avg) ft2 = EverydayCliUtils::KmeansUtil.f_test2(cs, ks, cnt) return ft, ft1, ft2, ks end |
.outliers(collection, sensitivity = 0.5, k = nil) ⇒ Object
143 144 145 146 147 148 149 150 151 |
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 143 def self.outliers(collection, sensitivity = 0.5, k = nil) ks = k.nil? ? nmeans(collection) : kmeans(collection, k) cs = EverydayCliUtils::KmeansUtil.get_clusters(collection, ks) outliers = [] ks.each_with_index { |avg, i| outliers += EverydayCliUtils::KmeansUtil.find_outliers(avg, cs, i, sensitivity) } outliers end |
.run_kmean(collection, ks) ⇒ Object
110 111 112 113 114 115 116 117 118 119 120 |
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 110 def self.run_kmean(collection, ks) kso = ks clusters = EverydayCliUtils::KmeansUtil.get_clusters(collection, kso) ks = [] clusters.each_with_index { |val, key| ks[key] = (val.count <= 0) ? false : (val.sum / val.count) } min = collection.min max = collection.max ks = ks.map { |k| k || ((Random.rand * (max-min)) + min) } ks = ks.sort return kso, ks end |
.run_nmean(collection, avg, cnt, ft, ft2, k, ks) ⇒ Object
91 92 93 94 95 96 97 98 99 100 |
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 91 def self.run_nmean(collection, avg, cnt, ft, ft2, k, ks) kso = ks fto = ft fto2 = ft2 ks = kmeans(collection, k) cs = EverydayCliUtils::KmeansUtil.get_clusters(collection, ks) ft = EverydayCliUtils::KmeansUtil.f_test(cs, ks, cnt, avg) ft2 = EverydayCliUtils::KmeansUtil.f_test2(cs, ks, cnt) return ft, ft2, fto, fto2, ks, kso end |
.run_nmeans(avg, cnt, collection, ft, ft1, ft2, ks, ks1, max_k, threshold) ⇒ Object
102 103 104 105 106 107 108 |
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 102 def self.run_nmeans(avg, cnt, collection, ft, ft1, ft2, ks, ks1, max_k, threshold) (3..[max_k, cnt].min).each { |k| ft, ft2, fto, fto2, ks, kso = run_nmean(collection, avg, cnt, ft, ft2, k, ks) return kso if ((ft - fto) / fto) < threshold && fto2 < ft1 } ft2 >= ft1 ? ks1 : ks end |