Class: Meteor::Core::Xhtml::ParserImpl

Inherits:
Kernel show all
Defined in:
lib/meteor.rb

Overview

XHTMLパーサ

Constant Summary collapse

KAIGYO_CODE =

KAIGYO_CODE = “r?n|r”

["\r\n","\n","\r"]
NBSP_2 =
' '
NBSP_3 =
'nbsp'
BR_1 =
"\r?\n|\r"
BR_2 =
'<br/>'
BR_3 =
'<br\\/>'
META =
'META'
META_S =
'meta'
MATCH_TAG_2 =

MATCH_TAG_2 = “textarea|option|pre”

['textarea','option','pre']
ATTR_LOGIC =
['disabled','readonly','checked','selected','multiple']
OPTION =
'option'
SELECTED =
'selected'
INPUT =
'input'
CHECKED =
'checked'
RADIO =
'radio'
DISABLE_ELEMENT =

DISABLE_ELEMENT = “input|textarea|select|optgroup”

['input','textarea','select','optgroup']
DISABLED =
'disabled'
READONLY_TYPE =

READONLY_TYPE = “text|password”

['text','password']
TEXTAREA =
'textarea'
READONLY =
'readonly'
SELECT =
'select'
MULTIPLE =
'multiple'
SELECTED_M =

@@pattern_option = Regexp.new(OPTION) @@pattern_selected = Regexp.new(SELECTED) @@pattern_input = Regexp.new(INPUT) @@pattern_checked = Regexp.new(CHECKED) @@pattern_radio = Regexp.new(RADIO) @@pattern_disable_element = Regexp.new(DISABLE_ELEMENT) @@pattern_disabled = Regexp.new(DISABLED) @@pattern_readonly_type = Regexp.new(READONLY_TYPE) @@pattern_textarea = Regexp.new(TEXTAREA) @@pattern_readonly = Regexp.new(READONLY) @@pattern_select = Regexp.new(SELECT) @@pattern_multiple = Regexp.new(MULTIPLE)

'\\sselected="[^"]*"\\s|\\sselected="[^"]*"$'
SELECTED_M1 =
'\\sselected="([^"]*)"\\s|\\sselected="([^"]*)"$'
SELECTED_R =
'selected="[^"]*"'
SELECTED_U =
'selected="selected"'
CHECKED_M =
'\\schecked="[^"]*"\\s|\\schecked="[^"]*"$'
CHECKED_M1 =
'\\schecked="([^"]*)"\\s|\\schecked="([^"]*)"$'
CHECKED_R =
'checked="[^"]*"'
CHECKED_U =
'checked="checked"'
DISABLED_M =
'\\sdisabled="[^"]*"\\s|\\sdisabled="[^"]*"$'
DISABLED_M1 =
'\\sdisabled="([^"]*)"\\s|\\sdisabled="([^"]*)"$'
DISABLED_R =
'disabled="[^"]*"'
DISABLED_U =
'disabled="disabled"'
READONLY_M =
'\\sreadonly="[^"]*"\\s|\\sreadonly="[^"]*"$'
READONLY_M1 =
'\\sreadonly="([^"]*)"\\s|\\sreadonly="([^"]*)"$'
READONLY_R =
'readonly="[^"]*"'
READONLY_U =
'readonly="readonly"'
MULTIPLE_M =
'\\smultiple="[^"]*"\\s|\\smultiple="[^"]*"$'
MULTIPLE_M1 =
'\\smultiple="([^"]*)"\\s|\\smultiple="([^"]*)"$'
MULTIPLE_R =
'multiple="[^"]*"'
MULTIPLE_U =
'multiple="multiple"'
HTTP_EQUIV =
'http-equiv'
CONTENT_TYPE =
'Content-Type'
CONTENT =
'content'
TRUE =
'true'
FALSE =
'false'
TYPE_L =
'type'
TYPE_U =
'TYPE'
PATTERN_UNESCAPE =
'&(amp|quot|apos|gt|lt|nbsp);'
TABLE_FOR_ESCAPE_ =
{
'&' => '&amp;',
'"' => '&quot;',
'\'' => '&apos;',
'<' => '&lt;',
'>' => '&gt;',
' ' => '&nbsp;',
}
TABLE_FOR_ESCAPE_CONTENT_ =
{
'&' => '&amp;',
'"' => '&quot;',
'\'' => '&apos;',
'<' => '&lt;',
'>' => '&gt;',
' ' => '&nbsp;',
"\r\n" => '<br/>',
"\r" => '<br/>',
"\n" => '<br/>',
}
PATTERN_ESCAPE =
'[&"\'<> ]'
PATTERN_ESCAPE_CONTENT =
'[&"\'<> \\n]'
@@pattern_selected_m =

@@pattern_match_tag = Regexp.new(MATCH_TAG) @@pattern_match_tag2 = Regexp.new(MATCH_TAG_2)

Regexp.new(SELECTED_M)
@@pattern_selected_m1 =
Regexp.new(SELECTED_M1)
@@pattern_selected_r =
Regexp.new(SELECTED_R)
@@pattern_checked_m =
Regexp.new(CHECKED_M)
@@pattern_checked_m1 =
Regexp.new(CHECKED_M1)
@@pattern_checked_r =
Regexp.new(CHECKED_R)
@@pattern_disabled_m =
Regexp.new(DISABLED_M)
@@pattern_disabled_m1 =
Regexp.new(DISABLED_M1)
@@pattern_disabled_r =
Regexp.new(DISABLED_R)
@@pattern_readonly_m =
Regexp.new(READONLY_M)
@@pattern_readonly_m1 =
Regexp.new(READONLY_M1)
@@pattern_readonly_r =
Regexp.new(READONLY_R)
@@pattern_multiple_m =
Regexp.new(MULTIPLE_M)
@@pattern_multiple_m1 =
Regexp.new(MULTIPLE_M1)
@@pattern_multiple_r =
Regexp.new(MULTIPLE_R)
@@pattern_escape =
Regexp.new(PATTERN_ESCAPE)
@@pattern_escape_content =
Regexp.new(PATTERN_ESCAPE_CONTENT)
@@pattern_br_2 =
Regexp.new(BR_3)
@@pattern_unescape =
Regexp.new(PATTERN_UNESCAPE)
@@pattern_set_mono1 =
Regexp.new(SET_MONO_1)
@@pattern_and_1 =
Regexp.new(AND_1)
@@pattern_lt_1 =
Regexp.new(LT_1)
@@pattern_gt_1 =
Regexp.new(GT_1)
@@pattern_dq_1 =
Regexp.new(DOUBLE_QUATATION)
@@pattern_ap_1 =
Regexp.new(AP_1)
@@pattern_space_1 =
Regexp.new(SPACE)
@@pattern_br_1 =
Regexp.new(BR_1)
@@pattern_lt_2 =
Regexp.new(LT_2)
@@pattern_gt_2 =
Regexp.new(GT_2)
@@pattern_dq_2 =
Regexp.new(QO_2)
@@pattern_ap_2 =
Regexp.new(AP_2)
@@pattern_space_2 =
Regexp.new(NBSP_2)
@@pattern_and_2 =
Regexp.new(AND_2)

Constants inherited from Kernel

Kernel::AND_1, Kernel::AND_2, Kernel::AND_3, Kernel::AP_1, Kernel::AP_2, Kernel::AP_3, Kernel::ATTR_EQ, Kernel::CLEAN_1, Kernel::CLEAN_2, Kernel::DOUBLE_QUATATION, Kernel::EMPTY, Kernel::EN_1, Kernel::ERASE_ATTR_1, Kernel::ESCAPE_ENTITY_REF, Kernel::GET_ATTRS_MAP, Kernel::GET_ATTR_1, Kernel::GT_1, Kernel::GT_2, Kernel::GT_3, Kernel::LT_1, Kernel::LT_2, Kernel::LT_3, Kernel::MODE, Kernel::MODE_AF, Kernel::MODE_BF, Kernel::MODE_UTF8, Kernel::QO_2, Kernel::QO_3, Kernel::SEARCH_CX_1, Kernel::SEARCH_CX_2, Kernel::SEARCH_CX_3, Kernel::SEARCH_CX_4, Kernel::SEARCH_CX_5, Kernel::SEARCH_CX_6, Kernel::SET_ATTR_1, Kernel::SET_CX_1, Kernel::SET_CX_2, Kernel::SET_CX_3, Kernel::SET_CX_4, Kernel::SET_MONO_1, Kernel::SPACE, Kernel::TAG_CLOSE, Kernel::TAG_CLOSE3, Kernel::TAG_OPEN, Kernel::TAG_OPEN3, Kernel::TAG_SEARCH_1_1, Kernel::TAG_SEARCH_1_2, Kernel::TAG_SEARCH_1_3, Kernel::TAG_SEARCH_1_4, Kernel::TAG_SEARCH_1_4_2, Kernel::TAG_SEARCH_2_1, Kernel::TAG_SEARCH_2_1_2, Kernel::TAG_SEARCH_2_2, Kernel::TAG_SEARCH_2_2_2, Kernel::TAG_SEARCH_2_3, Kernel::TAG_SEARCH_2_3_2, Kernel::TAG_SEARCH_2_3_2_2, Kernel::TAG_SEARCH_2_4, Kernel::TAG_SEARCH_2_4_2, Kernel::TAG_SEARCH_2_4_2_2, Kernel::TAG_SEARCH_2_4_2_3, Kernel::TAG_SEARCH_2_4_3, Kernel::TAG_SEARCH_2_4_3_2, Kernel::TAG_SEARCH_2_4_4, Kernel::TAG_SEARCH_2_6, Kernel::TAG_SEARCH_2_7, Kernel::TAG_SEARCH_3_1, Kernel::TAG_SEARCH_3_1_2, Kernel::TAG_SEARCH_3_1_2_2, Kernel::TAG_SEARCH_3_2, Kernel::TAG_SEARCH_3_2_2, Kernel::TAG_SEARCH_3_2_2_2, Kernel::TAG_SEARCH_4_1, Kernel::TAG_SEARCH_4_2, Kernel::TAG_SEARCH_4_3, Kernel::TAG_SEARCH_4_4, Kernel::TAG_SEARCH_4_5, Kernel::TAG_SEARCH_4_6, Kernel::TAG_SEARCH_4_7, Kernel::TAG_SEARCH_4_7_2, Kernel::TAG_SEARCH_NC_1_1, Kernel::TAG_SEARCH_NC_1_2, Kernel::TAG_SEARCH_NC_1_3, Kernel::TAG_SEARCH_NC_1_4, Kernel::TAG_SEARCH_NC_1_4_2, Kernel::TAG_SEARCH_NC_2_1, Kernel::TAG_SEARCH_NC_2_1_2, Kernel::TAG_SEARCH_NC_2_2, Kernel::TAG_SEARCH_NC_2_2_2, Kernel::TAG_SEARCH_NC_2_3, Kernel::TAG_SEARCH_NC_2_3_2, Kernel::TAG_SEARCH_NC_2_3_2_2, Kernel::TAG_SEARCH_NC_2_4, Kernel::TAG_SEARCH_NC_2_4_2, Kernel::TAG_SEARCH_NC_2_4_2_2, Kernel::TAG_SEARCH_NC_2_4_2_3, Kernel::TAG_SEARCH_NC_2_4_3, Kernel::TAG_SEARCH_NC_2_4_3_2, Kernel::TAG_SEARCH_NC_2_4_4, Kernel::TAG_SEARCH_NC_2_6, Kernel::TAG_SEARCH_NC_2_7, Kernel::TAG_SEARCH_NC_3_1, Kernel::TAG_SEARCH_NC_3_1_2, Kernel::TAG_SEARCH_NC_3_1_2_2, Kernel::TAG_SEARCH_NC_3_2, Kernel::TAG_SEARCH_NC_3_2_2, Kernel::TAG_SEARCH_NC_3_2_2_2

Constants inherited from Parser

Parser::HTML, Parser::XHTML, Parser::XML

Instance Attribute Summary

Attributes inherited from Kernel

#doc_type, #element_cache

Instance Method Summary collapse

Methods inherited from Kernel

#attribute, #attribute_map, #character_encoding, #character_encoding=, #content, #cxtag, #document, #document=, #element, #execute, #flush, #remove_element, #root_element

Constructor Details

#initializeParserImpl #initialize(ps) ⇒ ParserImpl

イニシャライザ

Overloads:



4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
# File 'lib/meteor.rb', line 4035

def initialize(*args)
  super()
  @doc_type = Parser::XHTML
  case args.length
    when ZERO
      initialize_0
    when ONE
      initialize_1(args[0])
    else
      raise ArgumentError
  end
end

Instance Method Details

#content_typeString

コンテントタイプを取得する

Returns:

  • (String)

    コンテントタイプ



4102
4103
4104
# File 'lib/meteor.rb', line 4102

def content_type()
  @root.content_type
end

#parse(document) ⇒ Object

ドキュメントをパーサにセットする

Parameters:

  • document (String)

    ドキュメント



4071
4072
4073
4074
# File 'lib/meteor.rb', line 4071

def parse(document)
  @root.document = document
  analyze_ml()
end

#read(file_path, encoding) ⇒ Object

ファイルを読み込み、パーサにセットする

Parameters:

  • file_path

    ファイルパス

  • encoding

    エンコーディング



4081
4082
4083
4084
# File 'lib/meteor.rb', line 4081

def read(file_path,encoding)
  super(file_path,encoding)
  analyze_ml()
end