Module: JobParser
- Defined in:
- lib/jobparser.rb,
lib/jobparser/cache.rb,
lib/jobparser/error.rb,
lib/jobparser/regex.rb,
lib/jobparser/parser.rb,
lib/jobparser/scorer.rb,
lib/jobparser/cleaner.rb,
lib/jobparser/version.rb,
lib/jobparser/parsehtml.rb,
lib/jobparser/parseschema.rb,
lib/jobparser/facets/apply.rb,
lib/jobparser/facets/facet.rb,
lib/jobparser/facets/title.rb,
lib/jobparser/specialcases.rb,
lib/jobparser/facets/salary.rb,
lib/jobparser/cache/textfile.rb,
lib/jobparser/facets/deadline.rb,
lib/jobparser/facets/location.rb,
lib/jobparser/facets/postcode.rb,
lib/jobparser/cache/mongostore.rb,
lib/jobparser/facets/salarystring.rb
Defined Under Namespace
Modules: Error, Facets
Classes: Cache, Cleaner, Match, ParseHtml, ParseSchema, Parser, Scorer, SpecialCases
Constant Summary
collapse
- SALARY_REGEX =
/£[\d,]*(?:.+)£[\d,]*/
- SALARY_STRING_REGEX =
/£[\d,]*.+£[\d,]*(\s.*$)?/
- SALARY_UP_TO_REGEX =
/(up to)(.+)£([\d,]*)/
- SALARY_TITLE_REGEX =
/salary|\srate/i
- VACANCY_TITLE_REGEX =
/vacancy|job title/i
- JOB_TITLE_ID_REGEX =
/job(.?)title|title/i
- APPLY_LINK_REGEX =
/^apply|submit an application|application form/i
- NBSP =
Nokogiri::HTML(" ").text
- LOCATION_REGEX =
/(?:location: )([\D]*)$/i
- SALARY_GROUP_REGEX =
/£([\d,]*)(?:.+)£([\d,]*)/
- CLEAN_SALARY_REGEX =
/,|\s/
- POSTCODE_REGEX =
/([A-PR-UWYZ0-9][A-HK-Y0-9][AEHMNPRTVXY0-9]?[ABEHMNPRVWXY0-9]?\s?[0-9][ABD-HJLN-UW-Z]{2}|GIR 0AA)/
- JOB_TITLE_WORDS =
words commonly used in job listings - not sure if this is a good way to go but I think it’s worth a go could scope this regex just to headers
/representative|sales|nurse|manager|assistant/i
- VERSION =
"0.15.1"
Class Method Summary
collapse
Class Method Details
.cache ⇒ Object
53
54
55
|
# File 'lib/jobparser.rb', line 53
def self.cache
@cache
end
|
.config ⇒ Object
49
50
51
|
# File 'lib/jobparser.rb', line 49
def self.config
@config
end
|
57
58
59
60
61
|
# File 'lib/jobparser.rb', line 57
def self.configure(opts = {})
opts.each do |key, val|
@config[key.to_sym] = val if @config.keys.include?(key.to_sym)
end
end
|
.parse(url) ⇒ Object
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
|
# File 'lib/jobparser.rb', line 31
def self.parse(url)
if JobParser.cache.valid_for_url?(url)
res = JobParser.cache.fetch_result_for_url(url)
res[:schema] ? ParseSchema.new(nil, url) : ParseHtml.new(nil, url)
else
begin
html = open(URI.encode(url), :allow_redirections => :safe).read
if html.include?("http://schema.org/JobPosting")
ParseSchema.new(html, url)
else
ParseHtml.new(html, url)
end
rescue URI::InvalidURIError
raise JobParser::Error::InvalidUrl, "The URI given (\"#{url}\") was not valid"
end
end
end
|
.parser(url) ⇒ Object
26
27
28
29
|
# File 'lib/jobparser.rb', line 26
def self.parser(url)
puts "Warning: JobParser.parser is old. Use JobParser.parse"
JobParser.parse(url)
end
|