Class: Zipfy::Zipf
- Inherits:
-
Object
- Object
- Zipfy::Zipf
- Defined in:
- lib/zipfy.rb
Constant Summary collapse
- TAB =
"\t"
Instance Attribute Summary collapse
-
#distribution ⇒ Object
readonly
Returns the value of attribute distribution.
-
#total ⇒ Object
readonly
Returns the value of attribute total.
-
#zipf_constant ⇒ Object
readonly
Returns the value of attribute zipf_constant.
Instance Method Summary collapse
-
#calculate_std_dev_from_reg ⇒ Object
must calculate zipfness first.
- #calculate_zipf_constant ⇒ Object
- #calculate_zipfness ⇒ Object
-
#create_distribution(words) ⇒ Object
sets instance var distribution to the distirubtion of the words set passed.
-
#load_file(file_path) ⇒ Object
Parses a file into an array of words.
- #puts_distribution ⇒ Object
- #save_dist_to_file(file_path, overwrite_file = false) ⇒ Object
- #sort_distribution ⇒ Object
Instance Attribute Details
#distribution ⇒ Object (readonly)
Returns the value of attribute distribution.
16 17 18 |
# File 'lib/zipfy.rb', line 16 def distribution @distribution end |
#total ⇒ Object (readonly)
Returns the value of attribute total.
17 18 19 |
# File 'lib/zipfy.rb', line 17 def total @total end |
#zipf_constant ⇒ Object (readonly)
Returns the value of attribute zipf_constant.
18 19 20 |
# File 'lib/zipfy.rb', line 18 def zipf_constant @zipf_constant end |
Instance Method Details
#calculate_std_dev_from_reg ⇒ Object
must calculate zipfness first
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
# File 'lib/zipfy.rb', line 99 def calculate_std_dev_from_reg #the zipfness should be equal to 1/rank. lets see the average deviatoin form that number length = @distribution.length deviations = [] @distribution.each_with_index do |wd,i| theoretical = 1/(length - i).to_f actual = wd.zip_number deviation = (theoretical - actual).abs deviations << deviation end sum = 0.0 deviations.each {|d| sum += d} (sum/length.to_f) end |
#calculate_zipf_constant ⇒ Object
85 86 87 88 89 90 91 92 93 94 95 96 |
# File 'lib/zipfy.rb', line 85 def calculate_zipf_constant #this word has rank =1 , so its zipf factor should be 1/n = 1/1 = 1 # so find its percentage and then multiply it up to one. #make the following true #freq of most common word / total words = 1 #by adding a constant to the right side # where constant = total words/ freq of most common word @zipf_constant = @total.to_f / @distribution.last.frequency.to_f @zipf_constant end |
#calculate_zipfness ⇒ Object
56 57 58 59 60 61 62 63 64 65 66 67 |
# File 'lib/zipfy.rb', line 56 def calculate_zipfness calculate_zipf_constant unless @zipf_constant #zipf number is equal to freq * zipf_const/total # so call zipf_const/total 'z' z = @zipf_constant/@total.to_f length = @distribution.length @distribution.map do |d| d.zip_number = z * d.frequency end end |
#create_distribution(words) ⇒ Object
sets instance var distribution to the distirubtion of the words set passed
37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/zipfy.rb', line 37 def create_distribution words temp_hash = Hash.new(0) @distribution = [] @total = 0 @zipf_constant = nil words.each do |word| temp_hash[word] += 1 @total += 1 end temp_hash.keys.each do |k| @distribution << WordData.new(k, temp_hash[k]) end end |
#load_file(file_path) ⇒ Object
Parses a file into an array of words
21 22 23 24 25 26 27 28 29 30 31 32 |
# File 'lib/zipfy.rb', line 21 def load_file file_path words = [] File.open(file_path, 'r') do |f| words = f.read.to_s.split(/[\s,-]/) end words.each do |word| word.gsub!(/\W+/, '') #remove non characters word.downcase! end words.delete("") #remove blank entries words end |
#puts_distribution ⇒ Object
69 70 71 72 73 74 75 76 |
# File 'lib/zipfy.rb', line 69 def puts_distribution underline " "*6 + "Word Distirbution" + " "*6 length = @distribution.length @distribution.each_with_index do |wd, i| tabs = 4 - (wd.word.length / 8).floor puts wd.word + (TAB * tabs).to_s + wd.frequency.to_s + TAB + (length - i).to_s + TAB + wd.zip_number.to_s end end |
#save_dist_to_file(file_path, overwrite_file = false) ⇒ Object
78 79 80 81 82 83 |
# File 'lib/zipfy.rb', line 78 def save_dist_to_file file_path, overwrite_file = false if File.exists?(file_path) && overwrite_file puts "Requires --force to overwrite existing file '#{file_path}'" exit end end |
#sort_distribution ⇒ Object
52 53 54 |
# File 'lib/zipfy.rb', line 52 def sort_distribution @distribution = @distribution.sort_by {|wd| wd.frequency.to_i} end |