Module: WikipediaWrapper
- Extended by:
- WikipediaWrapper
- Included in:
- WikipediaWrapper
- Defined in:
- lib/wikipedia_wrapper.rb,
lib/wikipedia_wrapper/page.rb,
lib/wikipedia_wrapper/util.rb,
lib/wikipedia_wrapper/image.rb,
lib/wikipedia_wrapper/version.rb,
lib/wikipedia_wrapper/exception.rb,
lib/wikipedia_wrapper/configuration.rb
Defined Under Namespace
Classes: Configuration, ConfigurationError, DisambiguationError, FormatError, HTTPTimeoutError, Image, InvalidRequestError, MultiplePagesError, Page, PageError, RedirectError, WikiImage, WikipediaError
Constant Summary collapse
- VERSION =
"0.1.0"
Class Method Summary collapse
- .check_results(term, raw_results) ⇒ Object
-
.fetch(params) ⇒ Hash
Given the request parameters, params, fetch the response from the API URL and parse it as JSON.
Instance Method Summary collapse
-
#autocomplete(term, limit: 10, redirect: true) ⇒ Hash{String=>String}
A hash where the keys are the titles of the articles and the values are a short description of the page.
-
#cache ⇒ Cache
Retrieve the cache for this module if it is already defined, otherwise create a new Cache, defaulting to an in-memory cache.
-
#cache=(raw_client, timeout: config.default_ttl) ⇒ Object
Define the caching client.
-
#check_page(term) ⇒ String
Function to determine whether there is a page with that term.
- #config ⇒ Object
-
#configure {|config| ... } ⇒ Object
Set up configuration options.
-
#page(term, auto_suggest: true, redirect: true) ⇒ WikipediaWrapper::Page
Convenience function to retrieve a Wikipedia page.
-
#search(term, limit: 10, suggestion: false) ⇒ {String => String}, Array<{String => String}, <String, nil>>
Do a Wikipedia search for the given term.
-
#summary(term, html: false, sentences: 0, chars: 0) ⇒ String
Plain text or basic HTML summary of the page.
Class Method Details
.check_results(term, raw_results) ⇒ Object
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/wikipedia_wrapper/util.rb', line 46 def self.check_results(term, raw_results) if raw_results['query']['pages'].length > 1 raise WikipediaWrapper::MultiplePagesError.new(raw_results['query']['pages'].map { |p| p['title'] }, term) elsif raw_results['query']['pages'].length < 1 raise WikipediaWrapper::PageError.new(term) end key, page_info = raw_results['query']['pages'].first if key == '-1' raise WikipediaWrapper::PageError.new(term) end # Check for disambiguation pages if page_info['pageprops'] && page_info['pageprops']['disambiguation'] raise WikipediaWrapper::DisambiguationError.new(term) end end |
.fetch(params) ⇒ Hash
Given the request parameters, params, fetch the response from the API URL and parse it as JSON. Raise an InvalidRequestError if an error occurrs.
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/wikipedia_wrapper/util.rb', line 18 def self.fetch(params) # if no action is defined, set it to 'query' if !params.key?(:action) params[:action] = 'query' end params[:format] = 'json' # always return json format # FIXME: deal with continuation #params[:continue] = '' # does not work for autocomplete query_part = params.map { |k, v| v.empty? ? "#{k}" : "#{k}=#{v}" }.join("&") endpoint_url = URI.encode("#{WikipediaWrapper.config.api_url}?#{query_part}") raw_results = cache.fetch(endpoint_url) { f = open(endpoint_url, "User-Agent" => config.user_agent) JSON.parse(f.read) } if params[:action] != 'opensearch' && raw_results.key?('error') raise WikipediaWrapper::InvalidRequestError.new(endpoint_url, raw_results['error']['info']) end return raw_results end |
Instance Method Details
#autocomplete(term, limit: 10, redirect: true) ⇒ Hash{String=>String}
Returns a hash where the keys are the titles of the articles and the values are a short description of the page.
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
# File 'lib/wikipedia_wrapper.rb', line 168 def autocomplete(term, limit: 10, redirect: true) query_params = { 'action': 'opensearch', 'search': term, 'redirects': redirect ? 'resolve' : 'return', 'limit': (limit > 100 ? 100 : limit).to_s } raw_results = fetch(query_params) if raw_results.length != 4 raise WikipediaWrapper::FormatError.new("autocomplete", "array had length of #{raw_results.length} instead of 4") end num_suggestions = raw_results[1].length - 1 results = {} for i in 0..num_suggestions results[raw_results[1][i]] = raw_results[2][i] end return results end |
#cache ⇒ Cache
Retrieve the cache for this module if it is already defined, otherwise create a new Cache, defaulting to an in-memory cache
35 36 37 38 39 40 41 |
# File 'lib/wikipedia_wrapper.rb', line 35 def cache if @cache.nil? @cache = Cache.new @cache.config.default_ttl = config.default_ttl end @cache end |
#cache=(raw_client, timeout: config.default_ttl) ⇒ Object
Define the caching client
53 54 55 56 |
# File 'lib/wikipedia_wrapper.rb', line 53 def cache=(raw_client, timeout: config.default_ttl) @cache = Cache.wrap(raw_client) @cache.config.default_ttl = timeout end |
#check_page(term) ⇒ String
Function to determine whether there is a page with that term. It uses the search and suggestion functionality to find a possible match and raises a PageError if no page could be found.
202 203 204 205 206 207 208 209 210 211 212 213 214 |
# File 'lib/wikipedia_wrapper.rb', line 202 def check_page(term) results, suggestion = search(term, limit: 1, suggestion: true) if !suggestion.nil? return suggestion elsif results.length == 1 title, snippet = results.first return title else raise WikipediaWrapper::PageError.new(term) end end |
#config ⇒ Object
14 15 16 |
# File 'lib/wikipedia_wrapper.rb', line 14 def config @config ||= Configuration.new end |
#configure {|config| ... } ⇒ Object
Set up configuration options
27 28 29 30 |
# File 'lib/wikipedia_wrapper.rb', line 27 def configure @config ||= Configuration.new yield(config) end |
#page(term, auto_suggest: true, redirect: true) ⇒ WikipediaWrapper::Page
Convenience function to retrieve a Wikipedia page
65 66 67 68 69 70 71 72 73 |
# File 'lib/wikipedia_wrapper.rb', line 65 def page(term, auto_suggest: true, redirect: true) if auto_suggest term = check_page(term) end return WikipediaWrapper::Page.new(term, redirect: redirect) end |
#search(term, limit: 10, suggestion: false) ⇒ {String => String}, Array<{String => String}, <String, nil>>
Do a Wikipedia search for the given term
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
# File 'lib/wikipedia_wrapper.rb', line 132 def search(term, limit: 10, suggestion: false) search_params = { 'list': 'search', 'srprop': 'snippet', 'srlimit': limit.to_s, 'srsearch': term } raw_results = fetch(search_params) results = {} raw_results['query']['search'].each do |sr| results[sr['title']] = sr['snippet'].gsub(/<span .*>(?<term>[^<]*)<\/span>/, '\k<term>') end if suggestion s = raw_results['query']['searchinfo'].key?('suggestion') ? raw_results['query']['searchinfo']['suggestion'] : nil return [results, s] else return results end end |
#summary(term, html: false, sentences: 0, chars: 0) ⇒ String
This is a convenience wrapper - auto_suggest and redirect are enabled by default
Plain text or basic HTML summary of the page. Redirects are always followed automatically.
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
# File 'lib/wikipedia_wrapper.rb', line 87 def summary(term, html: false, sentences: 0, chars: 0) # get auto_suggest term = check_page(term) query_params = { 'redirects': '', 'prop': 'extracts|pageprops', 'titles': term, 'ppprop': 'disambiguation', } if !html query_params['explaintext'] = '' end if sentences query_params[:exsentences] = (sentences > 10 ? 10 : sentences).to_s elsif chars query_params[:exchars] = chars.to_s else query_params[:exintro] = '' end raw_results = fetch(query_params) check_results(term, raw_results) id, info = raw_results['query']['pages'].first summary = info['extract'] return summary end |