Class: Metanorma::Standoc::Cleanup::SpansToBibitem

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/metanorma/standoc/spans_to_bibitem.rb,
lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb

Constant Summary

Constants included from Utils

Utils::SECTION_CONTAINERS, Utils::SUBCLAUSE_XPATH

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utils

adoc2xml, #asciimath_key, #attr_code, #csv_split, #dl_to_attrs, #dl_to_elems, #document_ns_attributes, #grkletters, #insert_before, #isodoc, #kv_parse, #link_unwrap, #noko, #processor, #quoted_csv_split, #refid?, #section_containers, #term_expr, #to_xml, #uuid?, #wrap_in_para, #xml_encode

Constructor Details

#initialize(bib) ⇒ SpansToBibitem

Returns a new instance of SpansToBibitem.



11
12
13
14
15
16
17
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 11

def initialize(bib)
  @bib = bib
  @err = []
  @spans = spans_preprocess(extract_spans(bib))
  ids = spans_preprocess(extract_docid(bib))
  @spans[:docid] = override_docids(ids[:docid], @spans[:docid])
end

Instance Attribute Details

#errObject (readonly)

Returns the value of attribute err.



9
10
11
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 9

def err
  @err
end

#outObject (readonly)

Returns the value of attribute out.



9
10
11
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 9

def out
  @out
end

Instance Method Details

#convertObject



28
29
30
31
32
33
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 28

def convert
  ret = spans_to_bibitem(@spans)
  @out = Nokogiri::XML("<bibitem>#{ret}</bibitem>").root
  @spans[:type] and @out["type"] = @spans[:type]
  self
end

#empty_span_hashObject



32
33
34
35
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 32

def empty_span_hash
  { contrib: [], docid: [], uri: [], date: [], classification: [],
    image: [], extent: {}, in: {} }
end

#extract_docid(bib) ⇒ Object



25
26
27
28
29
30
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 25

def extract_docid(bib)
  bib.xpath("./docidentifier").each_with_object([]) do |d, m|
    m << { key: "docid", type: d["type"], val: d.text }
    d.remove unless bib.at("./title")
  end
end

#extract_spans(bib) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 5

def extract_spans(bib)
  ret = bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
    s.at("./ancestor::span") and next
    extract_spans1(s, m)
  end
  bib.xpath("./formattedref//image").each do |i|
    i.delete("id")
    ret << { key: "image", type: nil, val: i.remove.to_xml }
  end
  ret
end

#extract_spans1(span, acc) ⇒ Object



17
18
19
20
21
22
23
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 17

def extract_spans1(span, acc)
  keys = span["class"].split(".", 2)
  acc << { key: keys[0], type: keys[1],
           val: span.children.to_xml }
  (span["class"] == "type" and span.remove) or
    span.replace(span.children)
end

#host_rearrange(ret) ⇒ Object



95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 95

def host_rearrange(ret)
  ret[:in][:title] or return ret
  ret[:in].merge!(empty_span_hash, { type: "misc" }) do |_, old, _|
    old
  end
  %i(series).each do |k|
    ret[:in][k] = ret[k]
    ret.delete(k)
  end
  /^in/.match?(ret[:type]) and ret[:in][:type] =
                                 ret[:type].sub(/^in/, "")
  ret
end

#multiple_givennames?(span, contrib) ⇒ Boolean

Returns:

  • (Boolean)


127
128
129
130
131
132
133
134
135
136
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 127

def multiple_givennames?(span, contrib)
  (%w(formatted-initials givenname).include?(span[:key]) &&
    (contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])) or
    return false
  if contrib[-1][:"formatted-initials"]
    contrib[-1][:givenname] = contrib[-1][:"formatted-initials"]
    contrib[-1].delete(:"formatted-initials")
  end
  true
end

#override_docids(old, new) ⇒ Object



19
20
21
22
23
24
25
26
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 19

def override_docids(old, new)
  ret = new
  keys = new.map { |a| a[:type] }
  old.each do |e|
    keys.include?(e[:type]) or ret << e
  end
  ret
end

#span_preprocess1(span, ret) ⇒ Object



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 43

def span_preprocess1(span, ret)
  case span[:key]
  when "uri", "docid", "classification"
    val = link_unwrap(Nokogiri::XML.fragment(span[:val])).to_xml
    ret[span[:key].to_sym] << { type: span[:type], val: }
  when "date"
    ret[span[:key].to_sym] << { type: span[:type] || "published",
                                val: span[:val] }
  when "pages", "volume", "issue"
    ret[:extent][span[:key].to_sym] ||= []
    ret[:extent][span[:key].to_sym] << span[:val]
  when "pubplace", "title", "type", "series", "edition", "version",
    "abstract"
    ret[span[:key].to_sym] = span[:val]
  when "image"
    ret[span[:key].to_sym] << { type: span[:type], val: span[:val] }
  when "note"
    ret[span[:key].to_sym] = { type: span[:type], val: span[:val] }
  when "in_title"
    ret[:in][:title] = span[:val]
  when "publisher"
    ret[:contrib] << { role: "publisher", entity: "organization",
                       name: span[:val] }
  when "surname", "initials", "givenname", "formatted-initials"
    ret[:contrib] = spans_preprocess_contrib(span, ret[:contrib])
  when "fullname"
    ret[:contrib] = spans_preprocess_fullname(span, ret[:contrib])
  when "organization"
    ret[:contrib] = spans_preprocess_org(span, ret[:contrib])
  when "in_surname", "in_initials", "in_givenname",
    "in_formatted-initials"
    ret[:in][:contrib] ||= []
    span[:key].sub!(/^in_/, "")
    ret[:in][:contrib] =
      spans_preprocess_contrib(span, ret[:in][:contrib])
  when "in_fullname"
    ret[:in][:contrib] ||= []
    span[:key].sub!(/^in_/, "")
    ret[:in][:contrib] =
      spans_preprocess_fullname(span, ret[:in][:contrib])
  when "in_organization"
    ret[:in][:contrib] ||= []
    span[:key].sub!(/^in_/, "")
    ret[:in][:contrib] =
      spans_preprocess_org(span, ret[:in][:contrib])
  else
    msg = "unrecognised key '#{span[:key]}' in " \
          "`span:#{span[:key]}[#{span[:val]}]`"
    @err << { msg: }
  end
end

#span_to_contrib(span, title) ⇒ Object



129
130
131
132
133
134
135
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 129

def span_to_contrib(span, title)
  e = if span[:entity] == "organization"
        "<organization><name>#{span[:name]}</name></organization>"
      else span_to_person(span, title)
      end
  "<contributor><role type='#{span[:role]}'/>#{e}</contributor>"
end

#span_to_date(span) ⇒ Object



111
112
113
114
115
116
117
118
119
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 111

def span_to_date(span)
  val = if /[-–](?=\d{4})/.match?(span[:val])
          from, to = span[:val].split(/[-–](?=\d{4})/, 2)
          "<from>#{from}</from><to>#{to}</to>"
        else "<on>#{span[:val]}</on>"
        end
  type = span[:type] ? " type='#{span[:type]}'" : ""
  "<date#{type}>#{val}</date>"
end

#span_to_docid(span, key) ⇒ Object



104
105
106
107
108
109
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 104

def span_to_docid(span, key)
  if span[:type]
    "<#{key} type='#{span[:type]}'>#{span[:val]}</#{key}>"
  else "<#{key}>#{span[:val]}</#{key}>"
  end
end

#span_to_extent(span, key) ⇒ Object



95
96
97
98
99
100
101
102
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 95

def span_to_extent(span, key)
  values = span.split(/[-–]/)
  ret = "<locality type='#{key}'>" \
        "<referenceFrom>#{values[0]}</referenceFrom>"
  values[1] and
    ret += "<referenceTo>#{values[1]}</referenceTo>"
  "#{ret}</locality>"
end

#span_to_person(span, title) ⇒ Object



144
145
146
147
148
149
150
151
152
153
154
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 144

def span_to_person(span, title)
  validate_span_to_person(span, title)
  pre = (span[:"formatted-initials"] and
               "<formatted-initials>" \
               "#{span[:"formatted-initials"]}</formatted-initials>") ||
    Array(span[:givenname]).map do |x|
      "<forename>#{x}</forename>"
    end.join
  "<person><name>#{pre}<surname>#{span[:surname]}</surname></name>" \
    "</person>"
end

#spans_preprocess(spans) ⇒ Object



37
38
39
40
41
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 37

def spans_preprocess(spans)
  ret = empty_span_hash
  spans.each { |s| span_preprocess1(s, ret) }
  host_rearrange(ret)
end

#spans_preprocess_contrib(span, contrib) ⇒ Object



109
110
111
112
113
114
115
116
117
118
119
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 109

def spans_preprocess_contrib(span, contrib)
  span[:key] == "initials" and span[:key] = "formatted-initials"
  spans_preprocess_new_contrib?(span, contrib) and
    contrib << { role: span[:type] || "author", entity: "person" }
  if multiple_givennames?(span, contrib)
    contrib[-1][:givenname] = [contrib[-1][:givenname],
                               span[:val]].flatten
  else contrib[-1][span[:key].to_sym] = span[:val]
  end
  contrib
end

#spans_preprocess_fullname(span, contrib) ⇒ Object



138
139
140
141
142
143
144
145
146
147
148
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 138

def spans_preprocess_fullname(span, contrib)
  name = span[:val].gsub(/\.(?=\p{Alpha})/, ". ").split(/ /)
  out = { role: span[:type] || "author", entity: "person",
          surname: name[-1] }
  if name.size > 1 && name[0..-2].all? { |x| /\.$/.match?(x) }
    out[:"formatted-initials"] = name[0..-2].join(" ")
  else out[:givenname] = name[0..-2]
  end
  contrib << out
  contrib
end

#spans_preprocess_new_contrib?(span, contrib) ⇒ Boolean

Returns:

  • (Boolean)


121
122
123
124
125
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 121

def spans_preprocess_new_contrib?(span, contrib)
  contrib.empty? ||
    (span[:key] == "surname" && contrib[-1][:surname]) ||
    contrib[-1][:role] != (span[:type] || "author")
end

#spans_preprocess_org(span, contrib) ⇒ Object



150
151
152
153
154
# File 'lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb', line 150

def spans_preprocess_org(span, contrib)
  contrib << { role: span[:type] || "author", entity: "organization",
               name: span[:val] }
  contrib
end

#spans_to_bibitem(spans) ⇒ Object



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 35

def spans_to_bibitem(spans)
  ret = ""
  spans[:title] and ret += "<title>#{spans[:title]}</title>"
  ret += spans_to_bibitem_docid(spans)
  ret += spans_to_contribs(spans)
  ret += spans_to_bibitem_edn(spans)
  spans[:abstract] and ret += "<abstract>#{spans[:abstract]}</abstract>"
  ret += spans_to_series(spans)
  spans[:pubplace] and ret += "<place>#{spans[:pubplace]}</place>"
  ret += spans_to_bibitem_host(spans)
  ret += spans_to_bibitem_extent(spans[:extent])
  spans[:classification]&.each do |s|
    ret += span_to_docid(s, "classification")
  end
  spans[:image]&.each do |s|
    ret += "<depiction>#{s[:val]}</depiction>"
  end
  ret
end

#spans_to_bibitem_docid(spans) ⇒ Object



68
69
70
71
72
73
74
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 68

def spans_to_bibitem_docid(spans)
  ret = ""
  spans[:uri].each { |s| ret += span_to_docid(s, "uri") }
  spans[:docid].each { |s| ret += span_to_docid(s, "docidentifier") }
  spans[:date].each { |s| ret += span_to_date(s) }
  ret
end

#spans_to_bibitem_edn(spans) ⇒ Object



76
77
78
79
80
81
82
83
84
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 76

def spans_to_bibitem_edn(spans)
  ret = ""
  spans[:edition] and ret += "<edition>#{spans[:edition]}</edition>"
  spans[:version] and ret += "<version>#{spans[:version]}</version>"
  spans[:note] and
    ret += "<note type='#{spans[:note][:type]}'>#{spans[:note][:val]}" \
           "</note>".sub(/<note type=''>/, "<note>")
  ret
end

#spans_to_bibitem_extent(spans) ⇒ Object



86
87
88
89
90
91
92
93
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 86

def spans_to_bibitem_extent(spans)
  ret = ""
  { volume: "volume", issue: "issue", pages: "page" }.each do |k, v|
    spans[k]&.each { |s| ret += span_to_extent(s, v) }
  end
  ret.empty? and return ""
  "<extent>#{ret}</extent>"
end

#spans_to_bibitem_host(spans) ⇒ Object



60
61
62
63
64
65
66
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 60

def spans_to_bibitem_host(spans)
  spans[:in].empty? and return ""
  ret =
    "<relation type='includedIn'><bibitem type='#{spans[:in][:type]}'>"
  spans[:in].delete(:type)
  ret + "#{spans_to_bibitem(spans[:in])}</bibitem></relation>"
end

#spans_to_contribs(spans) ⇒ Object



121
122
123
124
125
126
127
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 121

def spans_to_contribs(spans)
  ret = ""
  spans[:contrib].each do |s|
    ret += span_to_contrib(s, spans[:title])
  end
  ret
end

#spans_to_series(spans) ⇒ Object



55
56
57
58
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 55

def spans_to_series(spans)
  spans[:series] or return ""
  "<series><title>#{spans[:series]}</title></series>"
end

#validate_span_to_person(span, title) ⇒ Object



137
138
139
140
141
142
# File 'lib/metanorma/standoc/spans_to_bibitem.rb', line 137

def validate_span_to_person(span, title)
  span[:surname] and return
  msg = "Missing surname: issue with bibliographic markup " \
        "in \"#{title}\": #{span}"
  @err << { msg:, fatal: true }
end