Module: Scrapifier::XPath

Included in:
Support
Defined in:
lib/scrapifier/xpath.rb

Overview

Collection of all XPaths which are used to find the nodes within the parsed HTML doc.

Constant Summary collapse

TITLE =
"  |//meta[@property=\"og:title\"]/@content|\n  |//meta[@name=\"title\"]/@content|\n  |//meta[@name=\"Title\"]/@content|\n  |//title|//h1\n".gsub(/^\s+\|/, '')
DESC =
"  |//meta[@property=\"og:description\"]/@content|\n  |//meta[@name=\"description\"]/@content|\n  |//meta[@name=\"Description\"]/@content|\n  |//h1|//h3|//p|//span|//font\n".gsub(/^\s+\|/, '')
KEYWORDS =
"  |//meta[@name=\"keywords\"]/@content|\n  |//meta[@name=\"Keywords\"]/@content|\n  |//meta[@property=\"og:type\"]/@content\n".gsub(/^\s+\|/, '')
LANG =
"  |//html/@lang|\n  |//meta[@property=\"og:locale\"]/@content|\n  |//meta[@http-equiv=\"content-language\"]/@content\n".gsub(/^\s+\|/, '')
ENCODE =
"  |//meta/@charset|\n  |//meta[@http-equiv=\"content-type\"]/@content\n".gsub(/^\s+\|/, '')
REPLY_TO =
"  |//meta[@name=\"reply_to\"]/@content|\n  |//meta[@name=\"Reply_to\"]/@content\n".gsub(/^\s+\|/, '')
AUTHOR =
"  |//meta[@name=\"author\"]/@content|\n  |//meta[@name=\"Author\"]/@content\n".gsub(/^\s+\|/, '')
IMG =
"  |//meta[@property=\"og:image\"]/@content|\n  |//link[@rel=\"image_src\"]/@href|\n  |//meta[@itemprop=\"image\"]/@content|\n  |//div[@id=\"logo\"]/img/@src|//a[@id=\"logo\"]/img/@src|\n  |//div[@class=\"logo\"]/img/@src|//a[@class=\"logo\"]/img/@src|\n  |//a//img[@width]/@src|//img[@width]/@src|\n  |//a//img[@height]/@src|//img[@height]/@src|\n  |//a//img/@src|//span//img/@src|//img/@src\n".gsub(/^\s+\|/, '')