Class: Birdwatcher::Modules::Statuses::Wordlist
- Inherits:
-
Birdwatcher::Module
- Object
- Birdwatcher::Module
- Birdwatcher::Modules::Statuses::Wordlist
- Defined in:
- lib/birdwatcher/modules/statuses/word_list.rb
Constant Summary
Constants inherited from Birdwatcher::Module
Birdwatcher::Module::MODULE_PATH
Constants included from Concerns::Concurrency
Concerns::Concurrency::DEFAULT_THREAD_POOL_SIZE
Constants included from Concerns::Core
Concerns::Core::DATA_DIRECTORY
Class Method Summary collapse
Instance Method Summary collapse
Methods inherited from Birdwatcher::Module
_file_path, _file_path=, descendants, #execute, inherited, meta, meta=, module_by_path, module_paths, modules, path
Methods included from Concerns::WordList
Methods included from Concerns::Concurrency
Methods included from Concerns::Persistence
included, #save_status, #save_user
Methods included from Concerns::Presentation
included, #make_status_summary_output, #make_url_summary_output, #make_user_details_output, #make_user_summary_output, #output_status_summary, #output_user_details, #output_user_summary, #page_text
Methods included from Concerns::Outputting
#confirm, #error, #fatal, included, #info, #line_separator, #newline, #output, #output_formatted, #task, #warn
Methods included from Concerns::Util
#escape_html, #excerpt, included, #number_to_human_size, #parse_time, #pluralize, #strip_control_characters, #strip_html, #suppress_output, #suppress_warnings, #time_ago_in_words, #unescape_html
Methods included from Concerns::Core
#console, #current_workspace, #current_workspace=, #database, included, #klout_client, #read_data_file, #twitter_client
Class Method Details
.info ⇒ Object
79 80 81 82 83 84 85 86 87 |
# File 'lib/birdwatcher/modules/statuses/word_list.rb', line 79 def self.info <<-INFO The Word List module can generate a simple word list or dictionary from words used in statuses across all or specific users. Since users Tweet about their hobbies, interests, work, etc. generating a word list from statuses can be very effective for password cracking. INFO end |
Instance Method Details
#run ⇒ Object
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
# File 'lib/birdwatcher/modules/statuses/word_list.rb', line 89 def run if option_setting("USERS") screen_names = option_setting("USERS").split(" ").map(&:strip) user_ids = current_workspace.users_dataset.where("screen_name IN ?", screen_names).map(&:id) statuses = current_workspace.statuses_dataset.where("user_id IN ?", user_ids) else statuses = current_workspace.statuses_dataset end if statuses.count.zero? error("There are no statuses to process") return false end word_list = make_word_list( :min_word_count => option_setting("MIN_WORD_COUNT"), :min_word_length => option_setting("MIN_WORD_LENGTH"), :exclude_words => option_setting("EXCLUDE_WORDS").to_s.split(" ").map(&:strip), :exclude_stopwords => option_setting("EXCLUDE_STOPWORDS"), :exclude_common_words => option_setting("EXCLUDE_COMMON"), :exclude_hashtags => option_setting("EXCLUDE_HASHTAGS"), :exclude_mentions => option_setting("EXCLUDE_MENTIONS"), :word_cap => option_setting("WORD_CAP"), :stopwords_file => File.join(DATA_DIRECTORY, "english_stopwords.txt"), :common_words_file => File.join(DATA_DIRECTORY, "top100Kenglishwords.txt") ) task("Processing #{statuses.count.to_s.bold} statuses...") do statuses.each do |status| word_list.add_to_corpus(status.text) if option_setting("INCLUDE_PAGE_TITLES") status.urls_dataset .where("title IS NOT NULL") .where("final_url NOT LIKE 'https://twitter.com/%'") .map(&:title).each do |page_title| word_list.add_to_corpus(page_title) end end end word_list.process end task("Writing #{pluralize(word_list.word_list.length, 'word', 'words')} to file...") do File.open(option_setting("DEST"), "w") do |f| word_list.word_list.each do |word_and_count| word, count = word_and_count if option_setting("INCLUDE_COUNT") f.puts("#{word}, #{count}") else f.puts(word) end end end end file_size = number_to_human_size(File.size(option_setting("DEST"))) info("Wrote #{file_size.bold} to #{option_setting('DEST').bold}") end |