Class: Opener::Ners::Base

Inherits:
Object
  • Object
show all
Defined in:
lib/opener/ners/base.rb,
lib/opener/ners/base/version.rb

Overview

Base NER class that supports various languages such as Dutch and English.

Constant Summary collapse

VERSION =
'3.0.1'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Base

Returns a new instance of Base.

Parameters:

  • options (Hash) (defaults to: {})

Options Hash (options):

  • :features (String)

    The NERC feature to use, defaults to “baseline”.

  • :beamsize (Fixnum)

    The beam size for decoding, defaults to 3.

  • :dictionaries (String)

    The dictionary to use, if any.

  • :dictionaries_path (String)

    The path to the dictionaries.

  • :lexer (Fixnum)

    The lexer rules to use for NERC tagging.

  • :model (String)

    The model to use for NERC annotation.

  • :enable_time (TrueClass|FalseClass)

    Whether or not to enable dynamic timestamps (enabled by default).



64
65
66
67
68
69
70
71
72
# File 'lib/opener/ners/base.rb', line 64

def initialize(options = {})
  @dictionaries      = options[:dictionaries]
  @dictionaries_path = options[:dictionaries_path]
  @features          = options.fetch(:features, 'baseline')
  @beamsize          = options.fetch(:beamsize, 3)
  @lexer             = options[:lexer]
  @model             = options.fetch(:model, 'default')
  @enable_time       = options.fetch(:enable_time, true)
end

Instance Attribute Details

#beamsizeFixnum (readonly)

Returns:

  • (Fixnum)


38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/opener/ners/base.rb', line 38

class Base
  attr_reader :features, :beamsize, :dictionaries, :dictionaries_path,
    :lexer, :model, :enable_time

  ##
  # @param [Hash] options
  #
  # @option options [String] :features The NERC feature to use, defaults to
  #  "baseline".
  #
  # @option options [Fixnum] :beamsize The beam size for decoding, defaults
  #  to 3.
  #
  # @option options [String] :dictionaries The dictionary to use, if any.
  #
  # @option options [String] :dictionaries_path The path to the
  #  dictionaries.
  #
  # @option options [Fixnum] :lexer The lexer rules to use for NERC
  #  tagging.
  #
  # @option options [String] :model The model to use for NERC annotation.
  #
  # @option options [TrueClass|FalseClass] :enable_time Whether or not to
  #  enable dynamic timestamps (enabled by default).
  #
  def initialize(options = {})
    @dictionaries      = options[:dictionaries]
    @dictionaries_path = options[:dictionaries_path]
    @features          = options.fetch(:features, 'baseline')
    @beamsize          = options.fetch(:beamsize, 3)
    @lexer             = options[:lexer]
    @model             = options.fetch(:model, 'default')
    @enable_time       = options.fetch(:enable_time, true)
  end

  ##
  # Runs the command and returns the output of STDOUT, STDERR and the
  # process information.
  #
  # @param [String] input The input to process.
  # @return [Array]
  #
  def run(input)
    lang = language_from_kaf(input)
    kaf  = new_kaf_document(input)
    args = [lang, model, features, beamsize]

    if use_dictionaries?
      args += [dictionaries, dictionaries_path, lexer]
    end

    annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args)

    annotator.annotate_kaf(enable_time, kaf)

    return kaf.to_string
  end

  ##
  # @param [String] input The input KAF document as a string.
  # @return [Java::ixa.kaflib.KAFDocument]
  #
  def new_kaf_document(input)
    input_io = StringIO.new(input)
    reader   = Java::java.io.InputStreamReader.new(input_io.to_inputstream)

    return Java::ixa.kaflib.KAFDocument.create_from_stream(reader)
  end

  ##
  # @return [TrueClass|FalseClass]
  #
  def use_dictionaries?
    return dictionaries || dictionaries_path || features == 'dict'
  end

  ##
  # Returns the language for the given KAF document.
  #
  # @param [String] input
  # @return [String]
  #
  def language_from_kaf(input)
    document = Nokogiri::XML(input)

    return document.at('KAF').attr('xml:lang')
  end
end

#dictionariesString (readonly)

Returns:

  • (String)


38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/opener/ners/base.rb', line 38

class Base
  attr_reader :features, :beamsize, :dictionaries, :dictionaries_path,
    :lexer, :model, :enable_time

  ##
  # @param [Hash] options
  #
  # @option options [String] :features The NERC feature to use, defaults to
  #  "baseline".
  #
  # @option options [Fixnum] :beamsize The beam size for decoding, defaults
  #  to 3.
  #
  # @option options [String] :dictionaries The dictionary to use, if any.
  #
  # @option options [String] :dictionaries_path The path to the
  #  dictionaries.
  #
  # @option options [Fixnum] :lexer The lexer rules to use for NERC
  #  tagging.
  #
  # @option options [String] :model The model to use for NERC annotation.
  #
  # @option options [TrueClass|FalseClass] :enable_time Whether or not to
  #  enable dynamic timestamps (enabled by default).
  #
  def initialize(options = {})
    @dictionaries      = options[:dictionaries]
    @dictionaries_path = options[:dictionaries_path]
    @features          = options.fetch(:features, 'baseline')
    @beamsize          = options.fetch(:beamsize, 3)
    @lexer             = options[:lexer]
    @model             = options.fetch(:model, 'default')
    @enable_time       = options.fetch(:enable_time, true)
  end

  ##
  # Runs the command and returns the output of STDOUT, STDERR and the
  # process information.
  #
  # @param [String] input The input to process.
  # @return [Array]
  #
  def run(input)
    lang = language_from_kaf(input)
    kaf  = new_kaf_document(input)
    args = [lang, model, features, beamsize]

    if use_dictionaries?
      args += [dictionaries, dictionaries_path, lexer]
    end

    annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args)

    annotator.annotate_kaf(enable_time, kaf)

    return kaf.to_string
  end

  ##
  # @param [String] input The input KAF document as a string.
  # @return [Java::ixa.kaflib.KAFDocument]
  #
  def new_kaf_document(input)
    input_io = StringIO.new(input)
    reader   = Java::java.io.InputStreamReader.new(input_io.to_inputstream)

    return Java::ixa.kaflib.KAFDocument.create_from_stream(reader)
  end

  ##
  # @return [TrueClass|FalseClass]
  #
  def use_dictionaries?
    return dictionaries || dictionaries_path || features == 'dict'
  end

  ##
  # Returns the language for the given KAF document.
  #
  # @param [String] input
  # @return [String]
  #
  def language_from_kaf(input)
    document = Nokogiri::XML(input)

    return document.at('KAF').attr('xml:lang')
  end
end

#dictionaries_pathString (readonly)

Returns:

  • (String)


38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/opener/ners/base.rb', line 38

class Base
  attr_reader :features, :beamsize, :dictionaries, :dictionaries_path,
    :lexer, :model, :enable_time

  ##
  # @param [Hash] options
  #
  # @option options [String] :features The NERC feature to use, defaults to
  #  "baseline".
  #
  # @option options [Fixnum] :beamsize The beam size for decoding, defaults
  #  to 3.
  #
  # @option options [String] :dictionaries The dictionary to use, if any.
  #
  # @option options [String] :dictionaries_path The path to the
  #  dictionaries.
  #
  # @option options [Fixnum] :lexer The lexer rules to use for NERC
  #  tagging.
  #
  # @option options [String] :model The model to use for NERC annotation.
  #
  # @option options [TrueClass|FalseClass] :enable_time Whether or not to
  #  enable dynamic timestamps (enabled by default).
  #
  def initialize(options = {})
    @dictionaries      = options[:dictionaries]
    @dictionaries_path = options[:dictionaries_path]
    @features          = options.fetch(:features, 'baseline')
    @beamsize          = options.fetch(:beamsize, 3)
    @lexer             = options[:lexer]
    @model             = options.fetch(:model, 'default')
    @enable_time       = options.fetch(:enable_time, true)
  end

  ##
  # Runs the command and returns the output of STDOUT, STDERR and the
  # process information.
  #
  # @param [String] input The input to process.
  # @return [Array]
  #
  def run(input)
    lang = language_from_kaf(input)
    kaf  = new_kaf_document(input)
    args = [lang, model, features, beamsize]

    if use_dictionaries?
      args += [dictionaries, dictionaries_path, lexer]
    end

    annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args)

    annotator.annotate_kaf(enable_time, kaf)

    return kaf.to_string
  end

  ##
  # @param [String] input The input KAF document as a string.
  # @return [Java::ixa.kaflib.KAFDocument]
  #
  def new_kaf_document(input)
    input_io = StringIO.new(input)
    reader   = Java::java.io.InputStreamReader.new(input_io.to_inputstream)

    return Java::ixa.kaflib.KAFDocument.create_from_stream(reader)
  end

  ##
  # @return [TrueClass|FalseClass]
  #
  def use_dictionaries?
    return dictionaries || dictionaries_path || features == 'dict'
  end

  ##
  # Returns the language for the given KAF document.
  #
  # @param [String] input
  # @return [String]
  #
  def language_from_kaf(input)
    document = Nokogiri::XML(input)

    return document.at('KAF').attr('xml:lang')
  end
end

#enable_timeTrueClass|FalseClass (readonly)

Returns:

  • (TrueClass|FalseClass)


38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/opener/ners/base.rb', line 38

class Base
  attr_reader :features, :beamsize, :dictionaries, :dictionaries_path,
    :lexer, :model, :enable_time

  ##
  # @param [Hash] options
  #
  # @option options [String] :features The NERC feature to use, defaults to
  #  "baseline".
  #
  # @option options [Fixnum] :beamsize The beam size for decoding, defaults
  #  to 3.
  #
  # @option options [String] :dictionaries The dictionary to use, if any.
  #
  # @option options [String] :dictionaries_path The path to the
  #  dictionaries.
  #
  # @option options [Fixnum] :lexer The lexer rules to use for NERC
  #  tagging.
  #
  # @option options [String] :model The model to use for NERC annotation.
  #
  # @option options [TrueClass|FalseClass] :enable_time Whether or not to
  #  enable dynamic timestamps (enabled by default).
  #
  def initialize(options = {})
    @dictionaries      = options[:dictionaries]
    @dictionaries_path = options[:dictionaries_path]
    @features          = options.fetch(:features, 'baseline')
    @beamsize          = options.fetch(:beamsize, 3)
    @lexer             = options[:lexer]
    @model             = options.fetch(:model, 'default')
    @enable_time       = options.fetch(:enable_time, true)
  end

  ##
  # Runs the command and returns the output of STDOUT, STDERR and the
  # process information.
  #
  # @param [String] input The input to process.
  # @return [Array]
  #
  def run(input)
    lang = language_from_kaf(input)
    kaf  = new_kaf_document(input)
    args = [lang, model, features, beamsize]

    if use_dictionaries?
      args += [dictionaries, dictionaries_path, lexer]
    end

    annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args)

    annotator.annotate_kaf(enable_time, kaf)

    return kaf.to_string
  end

  ##
  # @param [String] input The input KAF document as a string.
  # @return [Java::ixa.kaflib.KAFDocument]
  #
  def new_kaf_document(input)
    input_io = StringIO.new(input)
    reader   = Java::java.io.InputStreamReader.new(input_io.to_inputstream)

    return Java::ixa.kaflib.KAFDocument.create_from_stream(reader)
  end

  ##
  # @return [TrueClass|FalseClass]
  #
  def use_dictionaries?
    return dictionaries || dictionaries_path || features == 'dict'
  end

  ##
  # Returns the language for the given KAF document.
  #
  # @param [String] input
  # @return [String]
  #
  def language_from_kaf(input)
    document = Nokogiri::XML(input)

    return document.at('KAF').attr('xml:lang')
  end
end

#featuresString (readonly)

Returns:

  • (String)


38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/opener/ners/base.rb', line 38

class Base
  attr_reader :features, :beamsize, :dictionaries, :dictionaries_path,
    :lexer, :model, :enable_time

  ##
  # @param [Hash] options
  #
  # @option options [String] :features The NERC feature to use, defaults to
  #  "baseline".
  #
  # @option options [Fixnum] :beamsize The beam size for decoding, defaults
  #  to 3.
  #
  # @option options [String] :dictionaries The dictionary to use, if any.
  #
  # @option options [String] :dictionaries_path The path to the
  #  dictionaries.
  #
  # @option options [Fixnum] :lexer The lexer rules to use for NERC
  #  tagging.
  #
  # @option options [String] :model The model to use for NERC annotation.
  #
  # @option options [TrueClass|FalseClass] :enable_time Whether or not to
  #  enable dynamic timestamps (enabled by default).
  #
  def initialize(options = {})
    @dictionaries      = options[:dictionaries]
    @dictionaries_path = options[:dictionaries_path]
    @features          = options.fetch(:features, 'baseline')
    @beamsize          = options.fetch(:beamsize, 3)
    @lexer             = options[:lexer]
    @model             = options.fetch(:model, 'default')
    @enable_time       = options.fetch(:enable_time, true)
  end

  ##
  # Runs the command and returns the output of STDOUT, STDERR and the
  # process information.
  #
  # @param [String] input The input to process.
  # @return [Array]
  #
  def run(input)
    lang = language_from_kaf(input)
    kaf  = new_kaf_document(input)
    args = [lang, model, features, beamsize]

    if use_dictionaries?
      args += [dictionaries, dictionaries_path, lexer]
    end

    annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args)

    annotator.annotate_kaf(enable_time, kaf)

    return kaf.to_string
  end

  ##
  # @param [String] input The input KAF document as a string.
  # @return [Java::ixa.kaflib.KAFDocument]
  #
  def new_kaf_document(input)
    input_io = StringIO.new(input)
    reader   = Java::java.io.InputStreamReader.new(input_io.to_inputstream)

    return Java::ixa.kaflib.KAFDocument.create_from_stream(reader)
  end

  ##
  # @return [TrueClass|FalseClass]
  #
  def use_dictionaries?
    return dictionaries || dictionaries_path || features == 'dict'
  end

  ##
  # Returns the language for the given KAF document.
  #
  # @param [String] input
  # @return [String]
  #
  def language_from_kaf(input)
    document = Nokogiri::XML(input)

    return document.at('KAF').attr('xml:lang')
  end
end

#lexerFixnum (readonly)

Returns:

  • (Fixnum)


38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/opener/ners/base.rb', line 38

class Base
  attr_reader :features, :beamsize, :dictionaries, :dictionaries_path,
    :lexer, :model, :enable_time

  ##
  # @param [Hash] options
  #
  # @option options [String] :features The NERC feature to use, defaults to
  #  "baseline".
  #
  # @option options [Fixnum] :beamsize The beam size for decoding, defaults
  #  to 3.
  #
  # @option options [String] :dictionaries The dictionary to use, if any.
  #
  # @option options [String] :dictionaries_path The path to the
  #  dictionaries.
  #
  # @option options [Fixnum] :lexer The lexer rules to use for NERC
  #  tagging.
  #
  # @option options [String] :model The model to use for NERC annotation.
  #
  # @option options [TrueClass|FalseClass] :enable_time Whether or not to
  #  enable dynamic timestamps (enabled by default).
  #
  def initialize(options = {})
    @dictionaries      = options[:dictionaries]
    @dictionaries_path = options[:dictionaries_path]
    @features          = options.fetch(:features, 'baseline')
    @beamsize          = options.fetch(:beamsize, 3)
    @lexer             = options[:lexer]
    @model             = options.fetch(:model, 'default')
    @enable_time       = options.fetch(:enable_time, true)
  end

  ##
  # Runs the command and returns the output of STDOUT, STDERR and the
  # process information.
  #
  # @param [String] input The input to process.
  # @return [Array]
  #
  def run(input)
    lang = language_from_kaf(input)
    kaf  = new_kaf_document(input)
    args = [lang, model, features, beamsize]

    if use_dictionaries?
      args += [dictionaries, dictionaries_path, lexer]
    end

    annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args)

    annotator.annotate_kaf(enable_time, kaf)

    return kaf.to_string
  end

  ##
  # @param [String] input The input KAF document as a string.
  # @return [Java::ixa.kaflib.KAFDocument]
  #
  def new_kaf_document(input)
    input_io = StringIO.new(input)
    reader   = Java::java.io.InputStreamReader.new(input_io.to_inputstream)

    return Java::ixa.kaflib.KAFDocument.create_from_stream(reader)
  end

  ##
  # @return [TrueClass|FalseClass]
  #
  def use_dictionaries?
    return dictionaries || dictionaries_path || features == 'dict'
  end

  ##
  # Returns the language for the given KAF document.
  #
  # @param [String] input
  # @return [String]
  #
  def language_from_kaf(input)
    document = Nokogiri::XML(input)

    return document.at('KAF').attr('xml:lang')
  end
end

#modelString (readonly)

Returns:

  • (String)


38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/opener/ners/base.rb', line 38

class Base
  attr_reader :features, :beamsize, :dictionaries, :dictionaries_path,
    :lexer, :model, :enable_time

  ##
  # @param [Hash] options
  #
  # @option options [String] :features The NERC feature to use, defaults to
  #  "baseline".
  #
  # @option options [Fixnum] :beamsize The beam size for decoding, defaults
  #  to 3.
  #
  # @option options [String] :dictionaries The dictionary to use, if any.
  #
  # @option options [String] :dictionaries_path The path to the
  #  dictionaries.
  #
  # @option options [Fixnum] :lexer The lexer rules to use for NERC
  #  tagging.
  #
  # @option options [String] :model The model to use for NERC annotation.
  #
  # @option options [TrueClass|FalseClass] :enable_time Whether or not to
  #  enable dynamic timestamps (enabled by default).
  #
  def initialize(options = {})
    @dictionaries      = options[:dictionaries]
    @dictionaries_path = options[:dictionaries_path]
    @features          = options.fetch(:features, 'baseline')
    @beamsize          = options.fetch(:beamsize, 3)
    @lexer             = options[:lexer]
    @model             = options.fetch(:model, 'default')
    @enable_time       = options.fetch(:enable_time, true)
  end

  ##
  # Runs the command and returns the output of STDOUT, STDERR and the
  # process information.
  #
  # @param [String] input The input to process.
  # @return [Array]
  #
  def run(input)
    lang = language_from_kaf(input)
    kaf  = new_kaf_document(input)
    args = [lang, model, features, beamsize]

    if use_dictionaries?
      args += [dictionaries, dictionaries_path, lexer]
    end

    annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args)

    annotator.annotate_kaf(enable_time, kaf)

    return kaf.to_string
  end

  ##
  # @param [String] input The input KAF document as a string.
  # @return [Java::ixa.kaflib.KAFDocument]
  #
  def new_kaf_document(input)
    input_io = StringIO.new(input)
    reader   = Java::java.io.InputStreamReader.new(input_io.to_inputstream)

    return Java::ixa.kaflib.KAFDocument.create_from_stream(reader)
  end

  ##
  # @return [TrueClass|FalseClass]
  #
  def use_dictionaries?
    return dictionaries || dictionaries_path || features == 'dict'
  end

  ##
  # Returns the language for the given KAF document.
  #
  # @param [String] input
  # @return [String]
  #
  def language_from_kaf(input)
    document = Nokogiri::XML(input)

    return document.at('KAF').attr('xml:lang')
  end
end

#optionsHash (readonly)

Returns:

  • (Hash)


38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/opener/ners/base.rb', line 38

class Base
  attr_reader :features, :beamsize, :dictionaries, :dictionaries_path,
    :lexer, :model, :enable_time

  ##
  # @param [Hash] options
  #
  # @option options [String] :features The NERC feature to use, defaults to
  #  "baseline".
  #
  # @option options [Fixnum] :beamsize The beam size for decoding, defaults
  #  to 3.
  #
  # @option options [String] :dictionaries The dictionary to use, if any.
  #
  # @option options [String] :dictionaries_path The path to the
  #  dictionaries.
  #
  # @option options [Fixnum] :lexer The lexer rules to use for NERC
  #  tagging.
  #
  # @option options [String] :model The model to use for NERC annotation.
  #
  # @option options [TrueClass|FalseClass] :enable_time Whether or not to
  #  enable dynamic timestamps (enabled by default).
  #
  def initialize(options = {})
    @dictionaries      = options[:dictionaries]
    @dictionaries_path = options[:dictionaries_path]
    @features          = options.fetch(:features, 'baseline')
    @beamsize          = options.fetch(:beamsize, 3)
    @lexer             = options[:lexer]
    @model             = options.fetch(:model, 'default')
    @enable_time       = options.fetch(:enable_time, true)
  end

  ##
  # Runs the command and returns the output of STDOUT, STDERR and the
  # process information.
  #
  # @param [String] input The input to process.
  # @return [Array]
  #
  def run(input)
    lang = language_from_kaf(input)
    kaf  = new_kaf_document(input)
    args = [lang, model, features, beamsize]

    if use_dictionaries?
      args += [dictionaries, dictionaries_path, lexer]
    end

    annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args)

    annotator.annotate_kaf(enable_time, kaf)

    return kaf.to_string
  end

  ##
  # @param [String] input The input KAF document as a string.
  # @return [Java::ixa.kaflib.KAFDocument]
  #
  def new_kaf_document(input)
    input_io = StringIO.new(input)
    reader   = Java::java.io.InputStreamReader.new(input_io.to_inputstream)

    return Java::ixa.kaflib.KAFDocument.create_from_stream(reader)
  end

  ##
  # @return [TrueClass|FalseClass]
  #
  def use_dictionaries?
    return dictionaries || dictionaries_path || features == 'dict'
  end

  ##
  # Returns the language for the given KAF document.
  #
  # @param [String] input
  # @return [String]
  #
  def language_from_kaf(input)
    document = Nokogiri::XML(input)

    return document.at('KAF').attr('xml:lang')
  end
end

Instance Method Details

#language_from_kaf(input) ⇒ String

Returns the language for the given KAF document.

Parameters:

  • input (String)

Returns:

  • (String)


121
122
123
124
125
# File 'lib/opener/ners/base.rb', line 121

def language_from_kaf(input)
  document = Nokogiri::XML(input)

  return document.at('KAF').attr('xml:lang')
end

#new_kaf_document(input) ⇒ Java::ixa.kaflib.KAFDocument

Parameters:

  • input (String)

    The input KAF document as a string.

Returns:

  • (Java::ixa.kaflib.KAFDocument)


101
102
103
104
105
106
# File 'lib/opener/ners/base.rb', line 101

def new_kaf_document(input)
  input_io = StringIO.new(input)
  reader   = Java::java.io.InputStreamReader.new(input_io.to_inputstream)

  return Java::ixa.kaflib.KAFDocument.create_from_stream(reader)
end

#run(input) ⇒ Array

Runs the command and returns the output of STDOUT, STDERR and the process information.

Parameters:

  • input (String)

    The input to process.

Returns:

  • (Array)


81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/opener/ners/base.rb', line 81

def run(input)
  lang = language_from_kaf(input)
  kaf  = new_kaf_document(input)
  args = [lang, model, features, beamsize]

  if use_dictionaries?
    args += [dictionaries, dictionaries_path, lexer]
  end

  annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args)

  annotator.annotate_kaf(enable_time, kaf)

  return kaf.to_string
end

#use_dictionaries?TrueClass|FalseClass

Returns:

  • (TrueClass|FalseClass)


111
112
113
# File 'lib/opener/ners/base.rb', line 111

def use_dictionaries?
  return dictionaries || dictionaries_path || features == 'dict'
end