Class: Opener::Ners::Base
- Inherits:
-
Object
- Object
- Opener::Ners::Base
- Defined in:
- lib/opener/ners/base.rb,
lib/opener/ners/base/version.rb
Overview
Base NER class that supports various languages such as Dutch and English.
Constant Summary collapse
- VERSION =
'3.0.1'
Instance Attribute Summary collapse
- #beamsize ⇒ Fixnum readonly
- #dictionaries ⇒ String readonly
- #dictionaries_path ⇒ String readonly
- #enable_time ⇒ TrueClass|FalseClass readonly
- #features ⇒ String readonly
- #lexer ⇒ Fixnum readonly
- #model ⇒ String readonly
- #options ⇒ Hash readonly
Instance Method Summary collapse
-
#initialize(options = {}) ⇒ Base
constructor
A new instance of Base.
-
#language_from_kaf(input) ⇒ String
Returns the language for the given KAF document.
- #new_kaf_document(input) ⇒ Java::ixa.kaflib.KAFDocument
-
#run(input) ⇒ Array
Runs the command and returns the output of STDOUT, STDERR and the process information.
- #use_dictionaries? ⇒ TrueClass|FalseClass
Constructor Details
#initialize(options = {}) ⇒ Base
Returns a new instance of Base.
64 65 66 67 68 69 70 71 72 |
# File 'lib/opener/ners/base.rb', line 64 def initialize( = {}) @dictionaries = [:dictionaries] @dictionaries_path = [:dictionaries_path] @features = .fetch(:features, 'baseline') @beamsize = .fetch(:beamsize, 3) @lexer = [:lexer] @model = .fetch(:model, 'default') @enable_time = .fetch(:enable_time, true) end |
Instance Attribute Details
#beamsize ⇒ Fixnum (readonly)
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/opener/ners/base.rb', line 38 class Base attr_reader :features, :beamsize, :dictionaries, :dictionaries_path, :lexer, :model, :enable_time ## # @param [Hash] options # # @option options [String] :features The NERC feature to use, defaults to # "baseline". # # @option options [Fixnum] :beamsize The beam size for decoding, defaults # to 3. # # @option options [String] :dictionaries The dictionary to use, if any. # # @option options [String] :dictionaries_path The path to the # dictionaries. # # @option options [Fixnum] :lexer The lexer rules to use for NERC # tagging. # # @option options [String] :model The model to use for NERC annotation. # # @option options [TrueClass|FalseClass] :enable_time Whether or not to # enable dynamic timestamps (enabled by default). # def initialize( = {}) @dictionaries = [:dictionaries] @dictionaries_path = [:dictionaries_path] @features = .fetch(:features, 'baseline') @beamsize = .fetch(:beamsize, 3) @lexer = [:lexer] @model = .fetch(:model, 'default') @enable_time = .fetch(:enable_time, true) end ## # Runs the command and returns the output of STDOUT, STDERR and the # process information. # # @param [String] input The input to process. # @return [Array] # def run(input) lang = language_from_kaf(input) kaf = new_kaf_document(input) args = [lang, model, features, beamsize] if use_dictionaries? args += [dictionaries, dictionaries_path, lexer] end annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args) annotator.annotate_kaf(enable_time, kaf) return kaf.to_string end ## # @param [String] input The input KAF document as a string. # @return [Java::ixa.kaflib.KAFDocument] # def new_kaf_document(input) input_io = StringIO.new(input) reader = Java::java.io.InputStreamReader.new(input_io.to_inputstream) return Java::ixa.kaflib.KAFDocument.create_from_stream(reader) end ## # @return [TrueClass|FalseClass] # def use_dictionaries? return dictionaries || dictionaries_path || features == 'dict' end ## # Returns the language for the given KAF document. # # @param [String] input # @return [String] # def language_from_kaf(input) document = Nokogiri::XML(input) return document.at('KAF').attr('xml:lang') end end |
#dictionaries ⇒ String (readonly)
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/opener/ners/base.rb', line 38 class Base attr_reader :features, :beamsize, :dictionaries, :dictionaries_path, :lexer, :model, :enable_time ## # @param [Hash] options # # @option options [String] :features The NERC feature to use, defaults to # "baseline". # # @option options [Fixnum] :beamsize The beam size for decoding, defaults # to 3. # # @option options [String] :dictionaries The dictionary to use, if any. # # @option options [String] :dictionaries_path The path to the # dictionaries. # # @option options [Fixnum] :lexer The lexer rules to use for NERC # tagging. # # @option options [String] :model The model to use for NERC annotation. # # @option options [TrueClass|FalseClass] :enable_time Whether or not to # enable dynamic timestamps (enabled by default). # def initialize( = {}) @dictionaries = [:dictionaries] @dictionaries_path = [:dictionaries_path] @features = .fetch(:features, 'baseline') @beamsize = .fetch(:beamsize, 3) @lexer = [:lexer] @model = .fetch(:model, 'default') @enable_time = .fetch(:enable_time, true) end ## # Runs the command and returns the output of STDOUT, STDERR and the # process information. # # @param [String] input The input to process. # @return [Array] # def run(input) lang = language_from_kaf(input) kaf = new_kaf_document(input) args = [lang, model, features, beamsize] if use_dictionaries? args += [dictionaries, dictionaries_path, lexer] end annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args) annotator.annotate_kaf(enable_time, kaf) return kaf.to_string end ## # @param [String] input The input KAF document as a string. # @return [Java::ixa.kaflib.KAFDocument] # def new_kaf_document(input) input_io = StringIO.new(input) reader = Java::java.io.InputStreamReader.new(input_io.to_inputstream) return Java::ixa.kaflib.KAFDocument.create_from_stream(reader) end ## # @return [TrueClass|FalseClass] # def use_dictionaries? return dictionaries || dictionaries_path || features == 'dict' end ## # Returns the language for the given KAF document. # # @param [String] input # @return [String] # def language_from_kaf(input) document = Nokogiri::XML(input) return document.at('KAF').attr('xml:lang') end end |
#dictionaries_path ⇒ String (readonly)
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/opener/ners/base.rb', line 38 class Base attr_reader :features, :beamsize, :dictionaries, :dictionaries_path, :lexer, :model, :enable_time ## # @param [Hash] options # # @option options [String] :features The NERC feature to use, defaults to # "baseline". # # @option options [Fixnum] :beamsize The beam size for decoding, defaults # to 3. # # @option options [String] :dictionaries The dictionary to use, if any. # # @option options [String] :dictionaries_path The path to the # dictionaries. # # @option options [Fixnum] :lexer The lexer rules to use for NERC # tagging. # # @option options [String] :model The model to use for NERC annotation. # # @option options [TrueClass|FalseClass] :enable_time Whether or not to # enable dynamic timestamps (enabled by default). # def initialize( = {}) @dictionaries = [:dictionaries] @dictionaries_path = [:dictionaries_path] @features = .fetch(:features, 'baseline') @beamsize = .fetch(:beamsize, 3) @lexer = [:lexer] @model = .fetch(:model, 'default') @enable_time = .fetch(:enable_time, true) end ## # Runs the command and returns the output of STDOUT, STDERR and the # process information. # # @param [String] input The input to process. # @return [Array] # def run(input) lang = language_from_kaf(input) kaf = new_kaf_document(input) args = [lang, model, features, beamsize] if use_dictionaries? args += [dictionaries, dictionaries_path, lexer] end annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args) annotator.annotate_kaf(enable_time, kaf) return kaf.to_string end ## # @param [String] input The input KAF document as a string. # @return [Java::ixa.kaflib.KAFDocument] # def new_kaf_document(input) input_io = StringIO.new(input) reader = Java::java.io.InputStreamReader.new(input_io.to_inputstream) return Java::ixa.kaflib.KAFDocument.create_from_stream(reader) end ## # @return [TrueClass|FalseClass] # def use_dictionaries? return dictionaries || dictionaries_path || features == 'dict' end ## # Returns the language for the given KAF document. # # @param [String] input # @return [String] # def language_from_kaf(input) document = Nokogiri::XML(input) return document.at('KAF').attr('xml:lang') end end |
#enable_time ⇒ TrueClass|FalseClass (readonly)
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/opener/ners/base.rb', line 38 class Base attr_reader :features, :beamsize, :dictionaries, :dictionaries_path, :lexer, :model, :enable_time ## # @param [Hash] options # # @option options [String] :features The NERC feature to use, defaults to # "baseline". # # @option options [Fixnum] :beamsize The beam size for decoding, defaults # to 3. # # @option options [String] :dictionaries The dictionary to use, if any. # # @option options [String] :dictionaries_path The path to the # dictionaries. # # @option options [Fixnum] :lexer The lexer rules to use for NERC # tagging. # # @option options [String] :model The model to use for NERC annotation. # # @option options [TrueClass|FalseClass] :enable_time Whether or not to # enable dynamic timestamps (enabled by default). # def initialize( = {}) @dictionaries = [:dictionaries] @dictionaries_path = [:dictionaries_path] @features = .fetch(:features, 'baseline') @beamsize = .fetch(:beamsize, 3) @lexer = [:lexer] @model = .fetch(:model, 'default') @enable_time = .fetch(:enable_time, true) end ## # Runs the command and returns the output of STDOUT, STDERR and the # process information. # # @param [String] input The input to process. # @return [Array] # def run(input) lang = language_from_kaf(input) kaf = new_kaf_document(input) args = [lang, model, features, beamsize] if use_dictionaries? args += [dictionaries, dictionaries_path, lexer] end annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args) annotator.annotate_kaf(enable_time, kaf) return kaf.to_string end ## # @param [String] input The input KAF document as a string. # @return [Java::ixa.kaflib.KAFDocument] # def new_kaf_document(input) input_io = StringIO.new(input) reader = Java::java.io.InputStreamReader.new(input_io.to_inputstream) return Java::ixa.kaflib.KAFDocument.create_from_stream(reader) end ## # @return [TrueClass|FalseClass] # def use_dictionaries? return dictionaries || dictionaries_path || features == 'dict' end ## # Returns the language for the given KAF document. # # @param [String] input # @return [String] # def language_from_kaf(input) document = Nokogiri::XML(input) return document.at('KAF').attr('xml:lang') end end |
#features ⇒ String (readonly)
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/opener/ners/base.rb', line 38 class Base attr_reader :features, :beamsize, :dictionaries, :dictionaries_path, :lexer, :model, :enable_time ## # @param [Hash] options # # @option options [String] :features The NERC feature to use, defaults to # "baseline". # # @option options [Fixnum] :beamsize The beam size for decoding, defaults # to 3. # # @option options [String] :dictionaries The dictionary to use, if any. # # @option options [String] :dictionaries_path The path to the # dictionaries. # # @option options [Fixnum] :lexer The lexer rules to use for NERC # tagging. # # @option options [String] :model The model to use for NERC annotation. # # @option options [TrueClass|FalseClass] :enable_time Whether or not to # enable dynamic timestamps (enabled by default). # def initialize( = {}) @dictionaries = [:dictionaries] @dictionaries_path = [:dictionaries_path] @features = .fetch(:features, 'baseline') @beamsize = .fetch(:beamsize, 3) @lexer = [:lexer] @model = .fetch(:model, 'default') @enable_time = .fetch(:enable_time, true) end ## # Runs the command and returns the output of STDOUT, STDERR and the # process information. # # @param [String] input The input to process. # @return [Array] # def run(input) lang = language_from_kaf(input) kaf = new_kaf_document(input) args = [lang, model, features, beamsize] if use_dictionaries? args += [dictionaries, dictionaries_path, lexer] end annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args) annotator.annotate_kaf(enable_time, kaf) return kaf.to_string end ## # @param [String] input The input KAF document as a string. # @return [Java::ixa.kaflib.KAFDocument] # def new_kaf_document(input) input_io = StringIO.new(input) reader = Java::java.io.InputStreamReader.new(input_io.to_inputstream) return Java::ixa.kaflib.KAFDocument.create_from_stream(reader) end ## # @return [TrueClass|FalseClass] # def use_dictionaries? return dictionaries || dictionaries_path || features == 'dict' end ## # Returns the language for the given KAF document. # # @param [String] input # @return [String] # def language_from_kaf(input) document = Nokogiri::XML(input) return document.at('KAF').attr('xml:lang') end end |
#lexer ⇒ Fixnum (readonly)
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/opener/ners/base.rb', line 38 class Base attr_reader :features, :beamsize, :dictionaries, :dictionaries_path, :lexer, :model, :enable_time ## # @param [Hash] options # # @option options [String] :features The NERC feature to use, defaults to # "baseline". # # @option options [Fixnum] :beamsize The beam size for decoding, defaults # to 3. # # @option options [String] :dictionaries The dictionary to use, if any. # # @option options [String] :dictionaries_path The path to the # dictionaries. # # @option options [Fixnum] :lexer The lexer rules to use for NERC # tagging. # # @option options [String] :model The model to use for NERC annotation. # # @option options [TrueClass|FalseClass] :enable_time Whether or not to # enable dynamic timestamps (enabled by default). # def initialize( = {}) @dictionaries = [:dictionaries] @dictionaries_path = [:dictionaries_path] @features = .fetch(:features, 'baseline') @beamsize = .fetch(:beamsize, 3) @lexer = [:lexer] @model = .fetch(:model, 'default') @enable_time = .fetch(:enable_time, true) end ## # Runs the command and returns the output of STDOUT, STDERR and the # process information. # # @param [String] input The input to process. # @return [Array] # def run(input) lang = language_from_kaf(input) kaf = new_kaf_document(input) args = [lang, model, features, beamsize] if use_dictionaries? args += [dictionaries, dictionaries_path, lexer] end annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args) annotator.annotate_kaf(enable_time, kaf) return kaf.to_string end ## # @param [String] input The input KAF document as a string. # @return [Java::ixa.kaflib.KAFDocument] # def new_kaf_document(input) input_io = StringIO.new(input) reader = Java::java.io.InputStreamReader.new(input_io.to_inputstream) return Java::ixa.kaflib.KAFDocument.create_from_stream(reader) end ## # @return [TrueClass|FalseClass] # def use_dictionaries? return dictionaries || dictionaries_path || features == 'dict' end ## # Returns the language for the given KAF document. # # @param [String] input # @return [String] # def language_from_kaf(input) document = Nokogiri::XML(input) return document.at('KAF').attr('xml:lang') end end |
#model ⇒ String (readonly)
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/opener/ners/base.rb', line 38 class Base attr_reader :features, :beamsize, :dictionaries, :dictionaries_path, :lexer, :model, :enable_time ## # @param [Hash] options # # @option options [String] :features The NERC feature to use, defaults to # "baseline". # # @option options [Fixnum] :beamsize The beam size for decoding, defaults # to 3. # # @option options [String] :dictionaries The dictionary to use, if any. # # @option options [String] :dictionaries_path The path to the # dictionaries. # # @option options [Fixnum] :lexer The lexer rules to use for NERC # tagging. # # @option options [String] :model The model to use for NERC annotation. # # @option options [TrueClass|FalseClass] :enable_time Whether or not to # enable dynamic timestamps (enabled by default). # def initialize( = {}) @dictionaries = [:dictionaries] @dictionaries_path = [:dictionaries_path] @features = .fetch(:features, 'baseline') @beamsize = .fetch(:beamsize, 3) @lexer = [:lexer] @model = .fetch(:model, 'default') @enable_time = .fetch(:enable_time, true) end ## # Runs the command and returns the output of STDOUT, STDERR and the # process information. # # @param [String] input The input to process. # @return [Array] # def run(input) lang = language_from_kaf(input) kaf = new_kaf_document(input) args = [lang, model, features, beamsize] if use_dictionaries? args += [dictionaries, dictionaries_path, lexer] end annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args) annotator.annotate_kaf(enable_time, kaf) return kaf.to_string end ## # @param [String] input The input KAF document as a string. # @return [Java::ixa.kaflib.KAFDocument] # def new_kaf_document(input) input_io = StringIO.new(input) reader = Java::java.io.InputStreamReader.new(input_io.to_inputstream) return Java::ixa.kaflib.KAFDocument.create_from_stream(reader) end ## # @return [TrueClass|FalseClass] # def use_dictionaries? return dictionaries || dictionaries_path || features == 'dict' end ## # Returns the language for the given KAF document. # # @param [String] input # @return [String] # def language_from_kaf(input) document = Nokogiri::XML(input) return document.at('KAF').attr('xml:lang') end end |
#options ⇒ Hash (readonly)
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/opener/ners/base.rb', line 38 class Base attr_reader :features, :beamsize, :dictionaries, :dictionaries_path, :lexer, :model, :enable_time ## # @param [Hash] options # # @option options [String] :features The NERC feature to use, defaults to # "baseline". # # @option options [Fixnum] :beamsize The beam size for decoding, defaults # to 3. # # @option options [String] :dictionaries The dictionary to use, if any. # # @option options [String] :dictionaries_path The path to the # dictionaries. # # @option options [Fixnum] :lexer The lexer rules to use for NERC # tagging. # # @option options [String] :model The model to use for NERC annotation. # # @option options [TrueClass|FalseClass] :enable_time Whether or not to # enable dynamic timestamps (enabled by default). # def initialize( = {}) @dictionaries = [:dictionaries] @dictionaries_path = [:dictionaries_path] @features = .fetch(:features, 'baseline') @beamsize = .fetch(:beamsize, 3) @lexer = [:lexer] @model = .fetch(:model, 'default') @enable_time = .fetch(:enable_time, true) end ## # Runs the command and returns the output of STDOUT, STDERR and the # process information. # # @param [String] input The input to process. # @return [Array] # def run(input) lang = language_from_kaf(input) kaf = new_kaf_document(input) args = [lang, model, features, beamsize] if use_dictionaries? args += [dictionaries, dictionaries_path, lexer] end annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args) annotator.annotate_kaf(enable_time, kaf) return kaf.to_string end ## # @param [String] input The input KAF document as a string. # @return [Java::ixa.kaflib.KAFDocument] # def new_kaf_document(input) input_io = StringIO.new(input) reader = Java::java.io.InputStreamReader.new(input_io.to_inputstream) return Java::ixa.kaflib.KAFDocument.create_from_stream(reader) end ## # @return [TrueClass|FalseClass] # def use_dictionaries? return dictionaries || dictionaries_path || features == 'dict' end ## # Returns the language for the given KAF document. # # @param [String] input # @return [String] # def language_from_kaf(input) document = Nokogiri::XML(input) return document.at('KAF').attr('xml:lang') end end |
Instance Method Details
#language_from_kaf(input) ⇒ String
Returns the language for the given KAF document.
121 122 123 124 125 |
# File 'lib/opener/ners/base.rb', line 121 def language_from_kaf(input) document = Nokogiri::XML(input) return document.at('KAF').attr('xml:lang') end |
#new_kaf_document(input) ⇒ Java::ixa.kaflib.KAFDocument
101 102 103 104 105 106 |
# File 'lib/opener/ners/base.rb', line 101 def new_kaf_document(input) input_io = StringIO.new(input) reader = Java::java.io.InputStreamReader.new(input_io.to_inputstream) return Java::ixa.kaflib.KAFDocument.create_from_stream(reader) end |
#run(input) ⇒ Array
Runs the command and returns the output of STDOUT, STDERR and the process information.
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
# File 'lib/opener/ners/base.rb', line 81 def run(input) lang = language_from_kaf(input) kaf = new_kaf_document(input) args = [lang, model, features, beamsize] if use_dictionaries? args += [dictionaries, dictionaries_path, lexer] end annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args) annotator.annotate_kaf(enable_time, kaf) return kaf.to_string end |
#use_dictionaries? ⇒ TrueClass|FalseClass
111 112 113 |
# File 'lib/opener/ners/base.rb', line 111 def use_dictionaries? return dictionaries || dictionaries_path || features == 'dict' end |