Module: Metanorma::Standoc::Regex

Included in:
Cleanup, Refs, Section
Defined in:
lib/metanorma/standoc/regex.rb

Constant Summary collapse

NUMERIC_REGEX =
%r{^((\+|-)?\d*\.?\d+)([eE](\+|-){1}\d+)?$}
CONN_REGEX_STR =

extending localities to cover ISO referencing

"(?<conn>and|or|from|to)!".freeze
LOCALITIES =
"section|clause|part|paragraph|chapter|page|line|" \
"table|annex|figure|example|note|formula|list|time|anchor|" \
"locality:[^ \\t\\n\\r:,;=]+".freeze
LOCALITY_REGEX_STR =
<<~REGEXP.freeze
  ^((#{CONN_REGEX_STR})?
      (?<locality>#{LOCALITIES})(\\s+|=)
         (?<ref>[^"][^ \\t\\n,:;-]*|"[^"]+")
           (-(?<to>[^"][^ \\t\\n,:;-]*|"[^"]"))?|
    (?<locality2>whole|title|locality:[^ \\t\\n\\r:,;=]+))(?<punct>[,:;]?)\\s*
   (?<text>.*)$
REGEXP
LOCALITY_REGEX_VALUE_ONLY_STR =
<<~REGEXP.freeze
  ^(?<conn0>(#{CONN_REGEX_STR}))
    (?!whole|title|locality:)
    (?<value>[^=,;:\\t\\n\\r]+)
    (?<punct>[,;\\t\\n\\r]|$)
REGEXP
LOCALITY_REGEX_STR_TRIPLEDASH =
<<~REGEXP.freeze
  ^(?<locality>(#{CONN_REGEX_STR})?
      (#{LOCALITIES})(\\s+|=))
         (?<ref>[^"][^ \\t\\n,:;-]*
           -[^ \\t\\n,:;"-]+
           -[^ \\t\\n,:;"]+)
    (?<text>[,:;]?\\s*
   .*)$
REGEXP
TERM_REFERENCE_RE_STR =
<<~REGEXP.freeze
  ^(?<xref><(xref|concept)[^>]+>(.*?</(xref|concept)>)?)
         (,\s(?<text>.*))?
  $
REGEXP
TERM_REFERENCE_RE =
Regexp.new(TERM_REFERENCE_RE_STR.gsub(/\s/, "").gsub(/_/, "\\s"),
Regexp::IGNORECASE | Regexp::MULTILINE)
ISO_REF =
%r{^<ref\sid="(?<anchor>[^"]+)">
\[(?<usrlbl>\([^)]+\))?(?<code>(?:ISO|IEC)[^0-9]*\s[0-9-]+|IEV)
(?::(?<year>[0-9][0-9-]+))?\]</ref>,?\s*(?<text>.*)$}xm
ISO_REF_NO_YEAR =
%r{^<ref\sid="(?<anchor>[^"]+)">
      \[(?<usrlbl>\([^)]+\))?(?<code>(?:ISO|IEC)[^0-9]*\s[0-9-]+):
      (?:--|–|—|&\#821[12];)\]</ref>,?\s*
(?:<fn[^>]*>\s*<p>(?<fn>[^\]]+)</p>\s*</fn>)?,?\s?(?<text>.*)$}xm
ISO_REF_ALL_PARTS =
%r{^<ref\sid="(?<anchor>[^"]+)">
      \[(?<usrlbl>\([^)]+\))?(?<code>(?:ISO|IEC)[^0-9]*\s[0-9]+)
      (?::(?<year>--|–|—|&\#821[12];|[0-9][0-9-]+))?\s
      \(all\sparts\)\]</ref>,?\s*
(?:<fn[^>]*>\s*<p>(?<fn>[^\]]+)</p>\s*</fn>,?\s?)?(?<text>.*)$}xm
NON_ISO_REF =
%r{^<ref\sid="(?<anchor>[^"]+)">
\[(?<usrlbl>\([^)]+\))?(?<code>.+?)\]</ref>,?\s*(?<text>.*)$}xm
NON_ISO_REF1 =
%r{^<ref\sid="(?<anchor>[^"]+)">
(?<usrlbl>\([^)]+\))?(?<code>.+?)</ref>,?\s*(?<text>.*)$}xm

Instance Method Summary collapse

Instance Method Details

#to_regex(str) ⇒ Object



23
24
25
# File 'lib/metanorma/standoc/regex.rb', line 23

def to_regex(str)
  Regexp.new(str.gsub(/\s/, ""), Regexp::IGNORECASE | Regexp::MULTILINE)
end