Class: Opener::POSTaggers::EN

Inherits:
Object
  • Object
show all
Defined in:
lib/opener/pos_taggers/en.rb,
lib/opener/pos_taggers/en/version.rb

Overview

The POS tagger that supports English and Spanish.

Direct Known Subclasses

ES, FR, IT, NL

Constant Summary collapse

DEFAULT_LANGUAGE =

The default language to use.

Returns:

  • (String)
'en'.freeze
VERSION =
"2.0.2"

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ EN

Returns a new instance of EN.

Parameters:

  • options (Hash) (defaults to: {})

Options Hash (options):

  • :args (Array)

    The commandline arguments to pass to the underlying Python script.



44
45
46
47
# File 'lib/opener/pos_taggers/en.rb', line 44

def initialize(options = {})
  @args    = options.delete(:args) || []
  @options = options
end

Instance Attribute Details

#argsArray (readonly)

Returns:

  • (Array)


28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/opener/pos_taggers/en.rb', line 28

class EN
  attr_reader :args, :options

  ##
  # The default language to use.
  #
  # @return [String]
  #
  DEFAULT_LANGUAGE = 'en'.freeze

  ##
  # @param [Hash] options
  #
  # @option options [Array] :args The commandline arguments to pass to the
  #  underlying Python script.
  #
  def initialize(options = {})
    @args    = options.delete(:args) || []
    @options = options
  end

  ##
  # Runs the command and returns the output of STDOUT, STDERR and the
  # process information.
  #
  # @param [String] input The input to tag.
  # @return [Array]
  #
  def run(input)
    input     = StringIO.new(input) unless input.kind_of?(IO)
    reader    = InputStreamReader.new(input.to_inputstream)
    kaf       = KAFDocument.create_from_stream(reader)
    annotator = Java::ehu.pos.Annotate.new(language)

    kaf.addLinguisticProcessor("terms","ehu-pos-"+language,"now","1.0");
    annotator.annotatePOSToKAF(kaf, lemmatizer, language)

    return kaf.to_string
  end

  protected

  ##
  # Returns the lemmatizer to use.
  #
  def lemmatizer
    return LemmatizerDispatcher.obtainMorfologikLemmatizer(language)
  end

  ##
  # @return [String]
  #
  def language
    return options[:language] || DEFAULT_LANGUAGE
  end
end

#optionsHash (readonly)

Returns:

  • (Hash)


28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/opener/pos_taggers/en.rb', line 28

class EN
  attr_reader :args, :options

  ##
  # The default language to use.
  #
  # @return [String]
  #
  DEFAULT_LANGUAGE = 'en'.freeze

  ##
  # @param [Hash] options
  #
  # @option options [Array] :args The commandline arguments to pass to the
  #  underlying Python script.
  #
  def initialize(options = {})
    @args    = options.delete(:args) || []
    @options = options
  end

  ##
  # Runs the command and returns the output of STDOUT, STDERR and the
  # process information.
  #
  # @param [String] input The input to tag.
  # @return [Array]
  #
  def run(input)
    input     = StringIO.new(input) unless input.kind_of?(IO)
    reader    = InputStreamReader.new(input.to_inputstream)
    kaf       = KAFDocument.create_from_stream(reader)
    annotator = Java::ehu.pos.Annotate.new(language)

    kaf.addLinguisticProcessor("terms","ehu-pos-"+language,"now","1.0");
    annotator.annotatePOSToKAF(kaf, lemmatizer, language)

    return kaf.to_string
  end

  protected

  ##
  # Returns the lemmatizer to use.
  #
  def lemmatizer
    return LemmatizerDispatcher.obtainMorfologikLemmatizer(language)
  end

  ##
  # @return [String]
  #
  def language
    return options[:language] || DEFAULT_LANGUAGE
  end
end

Instance Method Details

#run(input) ⇒ Array

Runs the command and returns the output of STDOUT, STDERR and the process information.

Parameters:

  • input (String)

    The input to tag.

Returns:

  • (Array)


56
57
58
59
60
61
62
63
64
65
66
# File 'lib/opener/pos_taggers/en.rb', line 56

def run(input)
  input     = StringIO.new(input) unless input.kind_of?(IO)
  reader    = InputStreamReader.new(input.to_inputstream)
  kaf       = KAFDocument.create_from_stream(reader)
  annotator = Java::ehu.pos.Annotate.new(language)

  kaf.addLinguisticProcessor("terms","ehu-pos-"+language,"now","1.0");
  annotator.annotatePOSToKAF(kaf, lemmatizer, language)

  return kaf.to_string
end