Class: Meteor::Core::Html::ParserImpl
- Defined in:
- lib/meteor.rb
Overview
HTMLパーサ
Constant Summary collapse
- KAIGYO_CODE =
KAIGYO_CODE = “r?n|r” KAIGYO_CODE = “rn|n|r”
["\r\n","\n","\r"]
- NBSP_2 =
' '
- NBSP_3 =
'nbsp'
- BR_1 =
"\r?\n|\r"
- BR_2 =
'<br>'
- META =
'META'
- META_S =
'meta'
- MATCH_TAG =
MATCH_TAG = “br|hr|img|input|meta|base”
['br','hr','img','input','meta','base']
- MATCH_TAG_2 =
MATCH_TAG_2 = “textarea|option|pre”
['textarea','option','pre']
- MATCH_TAG_SNG =
['texarea','option','form']
- HTTP_EQUIV =
'http-equiv'
- CONTENT_TYPE =
'Content-Type'
- CONTENT =
'content'
- ATTR_LOGIC =
['disabled','readonly','checked','selected','multiple']
- OPTION =
'option'
- SELECTED =
'selected'
- INPUT =
'input'
- CHECKED =
'checked'
- RADIO =
'radio'
- DISABLE_ELEMENT =
DISABLE_ELEMENT = “input|textarea|select|optgroup”
['input','textarea','select','optgroup']
- DISABLED =
'disabled'
- READONLY_TYPE =
READONLY_TYPE = “text|password”
['text','password']
- TEXTAREA =
'textarea'
- READONLY =
'readonly'
- SELECT =
'select'
- MULTIPLE =
'multiple'
- SELECTED_M =
@@pattern_option = Regexp.new(OPTION) @@pattern_selected = Regexp.new(SELECTED) @@pattern_input = Regexp.new(INPUT) @@pattern_checked = Regexp.new(CHECKED) @@pattern_radio = Regexp.new(RADIO) @@pattern_disable_element = Regexp.new(DISABLE_ELEMENT) @@pattern_disabled = Regexp.new(DISABLED) @@pattern_readonly_type = Regexp.new(READONLY_TYPE) @@pattern_textarea = Regexp.new(TEXTAREA) @@pattern_readonly = Regexp.new(READONLY) @@pattern_select = Regexp.new(SELECT) @@pattern_multiple = Regexp.new(MULTIPLE)
'\\sselected\\s|\\sselected$|\\sSELECTED\\s|\\sSELECTED$'
- SELECTED_R =
SELECTED_M = [‘ selected ’,‘ selected’,‘ SELECTED ’,‘ SELECTED’]
'selected\\s|selected$|SELECTED\\s|SELECTED$'
- CHECKED_M =
'\\schecked\\s|\\schecked$|\\sCHECKED\\s|\\sCHECKED$'
- CHECKED_R =
CHECKED_M = [‘ checked ’,‘ checked’,‘ CHECKED ’,‘ CHECKED’]
'checked\\s|checked$|CHECKED\\s|CHECKED$'
- DISABLED_M =
'\\sdisabled\\s|\\sdisabled$|\\sDISABLED\\s|\\sDISABLED$'
- DISABLED_R =
DISABLED_M = [‘ disabled ’,‘ disiabled’,‘ DISABLED ’,‘ DISABLED’]
'disabled\\s|disabled$|DISABLED\\s|DISABLED$'
- READONLY_M =
'\\sreadonly\\s|\\sreadonly$|\\sREADONLY\\s|\\sREADONLY$'
- READONLY_R =
READONLY_M = [‘ readonly ’,‘ readonly’,‘ READONLY ’,‘ READONLY’]
'readonly\\s|readonly$|READONLY\\s|READONLY$'
- MULTIPLE_M =
'\\smultiple\\s|\\smultiple$|\\sMULTIPLE\\s|\\sMULTIPLE$'
- MULTIPLE_R =
MULTIPLE_M = [‘ multiple ’,‘ multiple’,‘ MULTIPLE ’,‘ MULTIPLE’]
'multiple\\s|multiple$|MULTIPLE\\s|MULTIPLE$'
- TRUE =
'true'
- FALSE =
'false'
- TYPE_L =
@@pattern_true = Regexp.new(TRUE) @@pattern_false = Regexp.new(FALSE)
'type'
- TYPE_U =
'TYPE'
- PATTERN_UNESCAPE =
'&(amp|quot|apos|gt|lt|nbsp);'
- GET_ATTRS_MAP2 =
'\\s(disabled|readonly|checked|selected|multiple)'
- TABLE_FOR_ESCAPE_ =
{ '&' => '&', '"' => '"', '\'' => ''', '<' => '<', '>' => '>', ' ' => ' ', }
- TABLE_FOR_ESCAPE_CONTENT_ =
{ '&' => '&', '"' => '"', '\'' => ''', '<' => '<', '>' => '>', ' ' => ' ', "\r\n" => '<br>', "\r" => '<br>', "\n" => '<br>', }
- PATTERN_ESCAPE =
"[&\"'<> ]"
- PATTERN_ESCAPE_CONTENT =
"[&\"'<> \\n]"
- @@pattern_selected_m =
@@pattern_match_tag = Regexp.new(MATCH_TAG) @@pattern_match_tag2 = Regexp.new(MATCH_TAG_2)
Regexp.new(SELECTED_M)
- @@pattern_selected_r =
Regexp.new(SELECTED_R)
- @@pattern_checked_m =
Regexp.new(CHECKED_M)
- @@pattern_checked_r =
Regexp.new(CHECKED_R)
- @@pattern_disabled_m =
Regexp.new(DISABLED_M)
- @@pattern_disabled_r =
Regexp.new(DISABLED_R)
- @@pattern_readonly_m =
Regexp.new(READONLY_M)
- @@pattern_readonly_r =
Regexp.new(READONLY_R)
- @@pattern_multiple_m =
Regexp.new(MULTIPLE_M)
- @@pattern_multiple_r =
Regexp.new(MULTIPLE_R)
- @@pattern_escape =
Regexp.new(PATTERN_ESCAPE)
- @@pattern_escape_content =
Regexp.new(PATTERN_ESCAPE_CONTENT)
- @@pattern_br_2 =
Regexp.new(BR_2)
- @@pattern_unescape =
Regexp.new(PATTERN_UNESCAPE)
- @@pattern_set_mono1 =
Regexp.new(SET_MONO_1)
- @@pattern_get_attrs_map2 =
Regexp.new(GET_ATTRS_MAP2)
- @@pattern_and_1 =
Regexp.new(AND_1)
- @@pattern_lt_1 =
Regexp.new(LT_1)
- @@pattern_gt_1 =
Regexp.new(GT_1)
- @@pattern_dq_1 =
Regexp.new(DOUBLE_QUATATION)
- @@pattern_space_1 =
Regexp.new(SPACE)
- @@pattern_br_1 =
Regexp.new(BR_1)
- @@pattern_lt_2 =
Regexp.new(LT_2)
- @@pattern_gt_2 =
Regexp.new(GT_2)
- @@pattern_dq_2 =
Regexp.new(QO_2)
- @@pattern_space_2 =
Regexp.new(NBSP_2)
- @@pattern_and_2 =
Regexp.new(AND_2)
Constants inherited from Kernel
Kernel::AND_1, Kernel::AND_2, Kernel::AND_3, Kernel::AP_1, Kernel::AP_2, Kernel::AP_3, Kernel::ATTR_EQ, Kernel::CLEAN_1, Kernel::CLEAN_2, Kernel::DOUBLE_QUATATION, Kernel::EMPTY, Kernel::EN_1, Kernel::ERASE_ATTR_1, Kernel::ESCAPE_ENTITY_REF, Kernel::GET_ATTRS_MAP, Kernel::GET_ATTR_1, Kernel::GT_1, Kernel::GT_2, Kernel::GT_3, Kernel::LT_1, Kernel::LT_2, Kernel::LT_3, Kernel::MODE, Kernel::MODE_AF, Kernel::MODE_BF, Kernel::MODE_UTF8, Kernel::QO_2, Kernel::QO_3, Kernel::SEARCH_CX_1, Kernel::SEARCH_CX_2, Kernel::SEARCH_CX_3, Kernel::SEARCH_CX_4, Kernel::SEARCH_CX_5, Kernel::SEARCH_CX_6, Kernel::SET_ATTR_1, Kernel::SET_CX_1, Kernel::SET_CX_2, Kernel::SET_CX_3, Kernel::SET_CX_4, Kernel::SET_MONO_1, Kernel::SPACE, Kernel::TAG_CLOSE, Kernel::TAG_CLOSE3, Kernel::TAG_OPEN, Kernel::TAG_OPEN3, Kernel::TAG_SEARCH_1_1, Kernel::TAG_SEARCH_1_2, Kernel::TAG_SEARCH_1_3, Kernel::TAG_SEARCH_1_4, Kernel::TAG_SEARCH_1_4_2, Kernel::TAG_SEARCH_2_1, Kernel::TAG_SEARCH_2_1_2, Kernel::TAG_SEARCH_2_2, Kernel::TAG_SEARCH_2_2_2, Kernel::TAG_SEARCH_2_3, Kernel::TAG_SEARCH_2_3_2, Kernel::TAG_SEARCH_2_3_2_2, Kernel::TAG_SEARCH_2_4, Kernel::TAG_SEARCH_2_4_2, Kernel::TAG_SEARCH_2_4_2_2, Kernel::TAG_SEARCH_2_4_2_3, Kernel::TAG_SEARCH_2_4_3, Kernel::TAG_SEARCH_2_4_3_2, Kernel::TAG_SEARCH_2_4_4, Kernel::TAG_SEARCH_2_6, Kernel::TAG_SEARCH_2_7, Kernel::TAG_SEARCH_3_1, Kernel::TAG_SEARCH_3_1_2, Kernel::TAG_SEARCH_3_1_2_2, Kernel::TAG_SEARCH_3_2, Kernel::TAG_SEARCH_3_2_2, Kernel::TAG_SEARCH_3_2_2_2, Kernel::TAG_SEARCH_4_1, Kernel::TAG_SEARCH_4_2, Kernel::TAG_SEARCH_4_3, Kernel::TAG_SEARCH_4_4, Kernel::TAG_SEARCH_4_5, Kernel::TAG_SEARCH_4_6, Kernel::TAG_SEARCH_4_7, Kernel::TAG_SEARCH_4_7_2, Kernel::TAG_SEARCH_NC_1_1, Kernel::TAG_SEARCH_NC_1_2, Kernel::TAG_SEARCH_NC_1_3, Kernel::TAG_SEARCH_NC_1_4, Kernel::TAG_SEARCH_NC_1_4_2, Kernel::TAG_SEARCH_NC_2_1, Kernel::TAG_SEARCH_NC_2_1_2, Kernel::TAG_SEARCH_NC_2_2, Kernel::TAG_SEARCH_NC_2_2_2, Kernel::TAG_SEARCH_NC_2_3, Kernel::TAG_SEARCH_NC_2_3_2, Kernel::TAG_SEARCH_NC_2_3_2_2, Kernel::TAG_SEARCH_NC_2_4, Kernel::TAG_SEARCH_NC_2_4_2, Kernel::TAG_SEARCH_NC_2_4_2_2, Kernel::TAG_SEARCH_NC_2_4_2_3, Kernel::TAG_SEARCH_NC_2_4_3, Kernel::TAG_SEARCH_NC_2_4_3_2, Kernel::TAG_SEARCH_NC_2_4_4, Kernel::TAG_SEARCH_NC_2_6, Kernel::TAG_SEARCH_NC_2_7, Kernel::TAG_SEARCH_NC_3_1, Kernel::TAG_SEARCH_NC_3_1_2, Kernel::TAG_SEARCH_NC_3_1_2_2, Kernel::TAG_SEARCH_NC_3_2, Kernel::TAG_SEARCH_NC_3_2_2, Kernel::TAG_SEARCH_NC_3_2_2_2
Constants inherited from Parser
Parser::HTML, Parser::XHTML, Parser::XML
Instance Attribute Summary
Attributes inherited from Kernel
Instance Method Summary collapse
-
#content_type ⇒ String
コンテントタイプを取得する.
-
#initialize(*args) ⇒ ParserImpl
constructor
イニシャライザ.
-
#parse(document) ⇒ Object
ドキュメントをパーサにセットする.
-
#read(file_path, encoding) ⇒ Object
ファイルを読み込み、パーサにセットする.
- #remove_attribute_(elm, attr_name) ⇒ Object
Methods inherited from Kernel
#attribute, #attribute_map, #character_encoding, #character_encoding=, #content, #cxtag, #document, #document=, #element, #execute, #flush, #remove_element, #root_element
Constructor Details
#initialize ⇒ ParserImpl #initialize(ps) ⇒ ParserImpl
イニシャライザ
3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 |
# File 'lib/meteor.rb', line 3244 def initialize(*args) super() @doc_type = Parser::HTML case args.length when ZERO initialize_0 when ONE initialize_1(args[0]) else raise ArgumentError end end |
Instance Method Details
#content_type ⇒ String
コンテントタイプを取得する
3312 3313 3314 |
# File 'lib/meteor.rb', line 3312 def content_type @root.content_type end |
#parse(document) ⇒ Object
ドキュメントをパーサにセットする
3280 3281 3282 3283 |
# File 'lib/meteor.rb', line 3280 def parse(document) @root.document = document analyze_ml() end |
#read(file_path, encoding) ⇒ Object
ファイルを読み込み、パーサにセットする
3290 3291 3292 3293 |
# File 'lib/meteor.rb', line 3290 def read(file_path,encoding) super(file_path,encoding) analyze_ml() end |
#remove_attribute_(elm, attr_name) ⇒ Object
3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 |
# File 'lib/meteor.rb', line 3728 def remove_attribute_(elm,attr_name) #検索対象属性の論理型是非判定 if !is_match(ATTR_LOGIC,attr_name) then #属性検索用パターン @pattern = Meteor::Core::Util::PatternCache.get('' << attr_name << ERASE_ATTR_1) elm.attributes.sub!(@pattern, EMPTY) else #属性検索用パターン @pattern = Meteor::Core::Util::PatternCache.get(attr_name) elm.attributes.sub!(@pattern, EMPTY) #end end end |