Class: Meteor::Ml::Xhtml::ParserImpl

Inherits:
Core::Kernel show all
Defined in:
lib/meteor.rb

Overview

XHTML parser (XHTMLパーサ)

Direct Known Subclasses

Meteor::Ml::Xhtml5::ParserImpl

Constant Summary collapse

KAIGYO_CODE =

KAIGYO_CODE = “r?n|r”.freeze

["\r\n".freeze, "\n".freeze, "\r".freeze]
NBSP_2 =
' '.freeze
NBSP_3 =
'nbsp'.freeze
BR_1 =
"\r?\n|\r".freeze
BR_2 =
'<br/>'.freeze
BR_3 =
'<br\\/>'.freeze
META =
'META'.freeze
META_S =
'meta'.freeze
OPTION =
'option'.freeze
SELECTED =
'selected'.freeze
INPUT =
'input'.freeze
CHECKED =
'checked'.freeze
RADIO =
'radio'.freeze
DISABLE_ELEMENT =

DISABLE_ELEMENT = “input|textarea|select|optgroup”

['input'.freeze, 'textarea'.freeze, 'select'.freeze, 'optgroup'.freeze]
DISABLED =
'disabled'.freeze
READONLY_TYPE =

READONLY_TYPE = “text|password”

['text'.freeze, 'password'.freeze]
TEXTAREA =
'textarea'.freeze
READONLY =
'readonly'.freeze
SELECT =
'select'.freeze
MULTIPLE =
'multiple'.freeze
SELECTED_M =

@@pattern_option = Regexp.new(OPTION) @@pattern_selected = Regexp.new(SELECTED) @@pattern_input = Regexp.new(INPUT) @@pattern_checked = Regexp.new(CHECKED) @@pattern_radio = Regexp.new(RADIO) @@pattern_disable_element = Regexp.new(DISABLE_ELEMENT) @@pattern_disabled = Regexp.new(DISABLED) @@pattern_readonly_type = Regexp.new(READONLY_TYPE) @@pattern_textarea = Regexp.new(TEXTAREA) @@pattern_readonly = Regexp.new(READONLY) @@pattern_select = Regexp.new(SELECT) @@pattern_multiple = Regexp.new(MULTIPLE)

'\\sselected="[^"]*"\\s|\\sselected="[^"]*"$'.freeze
SELECTED_M1 =
'\\sselected="([^"]*)"\\s|\\sselected="([^"]*)"$'.freeze
SELECTED_R =
'selected="[^"]*"'.freeze
SELECTED_U =
'selected="selected"'.freeze
CHECKED_M =
'\\schecked="[^"]*"\\s|\\schecked="[^"]*"$'.freeze
CHECKED_M1 =
'\\schecked="([^"]*)"\\s|\\schecked="([^"]*)"$'.freeze
CHECKED_R =
'checked="[^"]*"'.freeze
CHECKED_U =
'checked="checked"'.freeze
DISABLED_M =
'\\sdisabled="[^"]*"\\s|\\sdisabled="[^"]*"$'.freeze
DISABLED_M1 =
'\\sdisabled="([^"]*)"\\s|\\sdisabled="([^"]*)"$'.freeze
DISABLED_R =
'disabled="[^"]*"'.freeze
DISABLED_U =
'disabled="disabled"'.freeze
READONLY_M =
'\\sreadonly="[^"]*"\\s|\\sreadonly="[^"]*"$'.freeze
READONLY_M1 =
'\\sreadonly="([^"]*)"\\s|\\sreadonly="([^"]*)"$'.freeze
READONLY_R =
'readonly="[^"]*"'.freeze
READONLY_U =
'readonly="readonly"'.freeze
MULTIPLE_M =
'\\smultiple="[^"]*"\\s|\\smultiple="[^"]*"$'.freeze
MULTIPLE_M1 =
'\\smultiple="([^"]*)"\\s|\\smultiple="([^"]*)"$'.freeze
MULTIPLE_R =
'multiple="[^"]*"'.freeze
MULTIPLE_U =
'multiple="multiple"'.freeze
HTTP_EQUIV =
'http-equiv'.freeze
CONTENT_TYPE =
'Content-Type'.freeze
CONTENT =
'content'.freeze
TRUE =
'true'.freeze
FALSE =
'false'.freeze
TYPE_L =
'type'.freeze
TYPE_U =
'TYPE'.freeze
PATTERN_UNESCAPE =
'&(amp|quot|apos|gt|lt|nbsp);'.freeze
TABLE_FOR_ESCAPE_ =

@@pattern_@@match_tag = Regexp.new(@@match_tag) @@pattern_@@match_tag2 = Regexp.new(@@match_tag_2)

{
    '&' => '&amp;'.freeze,
    '"' => '&quot;'.freeze,
    '\'' => '&apos;'.freeze,
    '<' => '&lt;'.freeze,
    '>' => '&gt;'.freeze,
    ' ' => '&nbsp;'.freeze,
}
TABLE_FOR_ESCAPE_CONTENT_ =
{
    '&' => '&amp;'.freeze,
    '"' => '&quot;'.freeze,
    '\'' => '&apos;'.freeze,
    '<' => '&lt;'.freeze,
    '>' => '&gt;'.freeze,
    ' ' => '&nbsp;'.freeze,
    "\r\n" => '<br/>'.freeze,
    "\r" => '<br/>'.freeze,
    "\n" => '<br/>'.freeze,
}
PATTERN_ESCAPE =
'[&"\'<> ]'.freeze
PATTERN_ESCAPE_CONTENT =
'[&"\'<> \\n]'.freeze
@@match_tag_2 =

@@match_tag_2 = “textarea|option|pre”

['textarea'.freeze, 'option'.freeze, 'pre'.freeze]
@@attr_logic =
Array

論理値で指定する属性

['disabled'.freeze, 'readonly'.freeze, 'checked'.freeze, 'selected'.freeze, 'multiple'.freeze]
@@pattern_selected_m =
Regexp.new(SELECTED_M)
@@pattern_selected_m1 =
Regexp.new(SELECTED_M1)
@@pattern_selected_r =
Regexp.new(SELECTED_R)
@@pattern_checked_m =
Regexp.new(CHECKED_M)
@@pattern_checked_m1 =
Regexp.new(CHECKED_M1)
@@pattern_checked_r =
Regexp.new(CHECKED_R)
@@pattern_disabled_m =
Regexp.new(DISABLED_M)
@@pattern_disabled_m1 =
Regexp.new(DISABLED_M1)
@@pattern_disabled_r =
Regexp.new(DISABLED_R)
@@pattern_readonly_m =
Regexp.new(READONLY_M)
@@pattern_readonly_m1 =
Regexp.new(READONLY_M1)
@@pattern_readonly_r =
Regexp.new(READONLY_R)
@@pattern_multiple_m =
Regexp.new(MULTIPLE_M)
@@pattern_multiple_m1 =
Regexp.new(MULTIPLE_M1)
@@pattern_multiple_r =
Regexp.new(MULTIPLE_R)
@@pattern_unescape =
Regexp.new(PATTERN_UNESCAPE)
@@pattern_br_2 =
Regexp.new(BR_3)
@@pattern_escape =
Regexp.new(PATTERN_ESCAPE)
@@pattern_escape_content =
Regexp.new(PATTERN_ESCAPE_CONTENT)

Constants inherited from Core::Kernel

Core::Kernel::AND_1, Core::Kernel::AND_2, Core::Kernel::AND_3, Core::Kernel::AP_1, Core::Kernel::AP_2, Core::Kernel::AP_3, Core::Kernel::ATTR_EQ, Core::Kernel::CLEAN_1, Core::Kernel::CLEAN_2, Core::Kernel::CSS_CLASS, Core::Kernel::CSS_ID, Core::Kernel::DOUBLE_QUATATION, Core::Kernel::EMPTY, Core::Kernel::EN_1, Core::Kernel::ERASE_ATTR_1, Core::Kernel::ESCAPE_ENTITY_REF, Core::Kernel::GET_ATTRS_MAP, Core::Kernel::GET_ATTR_1, Core::Kernel::GT_1, Core::Kernel::GT_2, Core::Kernel::GT_3, Core::Kernel::LT_1, Core::Kernel::LT_2, Core::Kernel::LT_3, Core::Kernel::MODE_AF, Core::Kernel::MODE_BF, Core::Kernel::MODE_UTF8, Core::Kernel::PATTERN_FIND_1, Core::Kernel::PATTERN_FIND_2_1, Core::Kernel::PATTERN_FIND_2_2, Core::Kernel::PATTERN_FIND_2_3, Core::Kernel::PATTERN_FIND_3_1, Core::Kernel::PATTERN_FIND_3_2, Core::Kernel::PATTERN_FIND_3_3, Core::Kernel::PATTERN_FIND_4, Core::Kernel::PATTERN_FIND_5, Core::Kernel::QO_2, Core::Kernel::QO_3, Core::Kernel::SEARCH_CX_1, Core::Kernel::SEARCH_CX_2, Core::Kernel::SEARCH_CX_3, Core::Kernel::SEARCH_CX_4, Core::Kernel::SEARCH_CX_5, Core::Kernel::SEARCH_CX_6, Core::Kernel::SET_ATTR_1, Core::Kernel::SET_CX_1, Core::Kernel::SET_CX_2, Core::Kernel::SET_CX_3, Core::Kernel::SET_CX_4, Core::Kernel::SET_MONO_1, Core::Kernel::SPACE, Core::Kernel::TAG_CLOSE, Core::Kernel::TAG_CLOSE3, Core::Kernel::TAG_OPEN, Core::Kernel::TAG_OPEN3, Core::Kernel::TAG_SEARCH_1_1, Core::Kernel::TAG_SEARCH_1_2, Core::Kernel::TAG_SEARCH_1_3, Core::Kernel::TAG_SEARCH_1_4, Core::Kernel::TAG_SEARCH_1_4_2, Core::Kernel::TAG_SEARCH_2_1, Core::Kernel::TAG_SEARCH_2_1_2, Core::Kernel::TAG_SEARCH_2_2, Core::Kernel::TAG_SEARCH_2_2_2, Core::Kernel::TAG_SEARCH_2_3, Core::Kernel::TAG_SEARCH_2_3_2, Core::Kernel::TAG_SEARCH_2_3_2_2, Core::Kernel::TAG_SEARCH_2_4, Core::Kernel::TAG_SEARCH_2_4_2, Core::Kernel::TAG_SEARCH_2_4_2_2, Core::Kernel::TAG_SEARCH_2_4_2_3, Core::Kernel::TAG_SEARCH_2_4_3, Core::Kernel::TAG_SEARCH_2_4_3_2, Core::Kernel::TAG_SEARCH_2_4_4, Core::Kernel::TAG_SEARCH_2_6, Core::Kernel::TAG_SEARCH_2_7, Core::Kernel::TAG_SEARCH_3_1, Core::Kernel::TAG_SEARCH_3_1_2, Core::Kernel::TAG_SEARCH_3_1_2_2, Core::Kernel::TAG_SEARCH_3_2, Core::Kernel::TAG_SEARCH_3_2_2, Core::Kernel::TAG_SEARCH_3_2_2_2, Core::Kernel::TAG_SEARCH_4_1, Core::Kernel::TAG_SEARCH_4_2, Core::Kernel::TAG_SEARCH_4_3, Core::Kernel::TAG_SEARCH_4_4, Core::Kernel::TAG_SEARCH_4_5, Core::Kernel::TAG_SEARCH_4_6, Core::Kernel::TAG_SEARCH_4_7, Core::Kernel::TAG_SEARCH_4_7_2, Core::Kernel::TAG_SEARCH_NC_1_1, Core::Kernel::TAG_SEARCH_NC_1_2, Core::Kernel::TAG_SEARCH_NC_1_3, Core::Kernel::TAG_SEARCH_NC_1_4, Core::Kernel::TAG_SEARCH_NC_1_4_2, Core::Kernel::TAG_SEARCH_NC_2_1, Core::Kernel::TAG_SEARCH_NC_2_1_2, Core::Kernel::TAG_SEARCH_NC_2_2, Core::Kernel::TAG_SEARCH_NC_2_2_2, Core::Kernel::TAG_SEARCH_NC_2_3, Core::Kernel::TAG_SEARCH_NC_2_3_2, Core::Kernel::TAG_SEARCH_NC_2_3_2_2, Core::Kernel::TAG_SEARCH_NC_2_4, Core::Kernel::TAG_SEARCH_NC_2_4_2, Core::Kernel::TAG_SEARCH_NC_2_4_2_2, Core::Kernel::TAG_SEARCH_NC_2_4_2_3, Core::Kernel::TAG_SEARCH_NC_2_4_3, Core::Kernel::TAG_SEARCH_NC_2_4_3_2, Core::Kernel::TAG_SEARCH_NC_2_4_4, Core::Kernel::TAG_SEARCH_NC_2_6, Core::Kernel::TAG_SEARCH_NC_2_7, Core::Kernel::TAG_SEARCH_NC_3_1, Core::Kernel::TAG_SEARCH_NC_3_1_2, Core::Kernel::TAG_SEARCH_NC_3_1_2_2, Core::Kernel::TAG_SEARCH_NC_3_2, Core::Kernel::TAG_SEARCH_NC_3_2_2, Core::Kernel::TAG_SEARCH_NC_3_2_2_2

Constants inherited from Parser

Parser::HTML, Parser::HTML5, Parser::XHTML, Parser::XHTML5, Parser::XML

Instance Attribute Summary

Attributes inherited from Core::Kernel

#doc_type, #document_hook, #element_cache, #element_hook

Instance Method Summary collapse

Methods inherited from Core::Kernel

#attr, #attr_map, #attrs, #character_encoding, #character_encoding=, #content, #cxtag, #document, #document=, #element, #elements, #elements_, #find, #flush, #read, #remove_element, #root_element, #shadow

Constructor Details

#initializeParserImpl #initialize(ps) ⇒ ParserImpl

initializer (イニシャライザ)

Overloads:



4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
# File 'lib/meteor.rb', line 4795

def initialize(*args)
  super()
  @doc_type = Parser::XHTML
  case args.length
    when ZERO
      #initialize_0
    when ONE
      initialize_1(args[0])
    else
      raise ArgumentError
  end
end

Instance Method Details

#content_typeString

get content type (コンテントタイプを取得する)

Returns:

  • (String)

    content type (コンテントタイプ)



4853
4854
4855
# File 'lib/meteor.rb', line 4853

def content_type
  @root.content_type
end

#parseObject

parse document (ドキュメントを解析する)



4832
4833
4834
# File 'lib/meteor.rb', line 4832

def parse
  analyze_ml
end