Class: Opener::POSTaggers::Base

Inherits:
Object
  • Object
show all
Defined in:
lib/opener/pos_taggers/base.rb,
lib/opener/pos_taggers/base/version.rb

Overview

The base POS tagger that supports Dutch and German.

Direct Known Subclasses

DE

Constant Summary collapse

VERSION =
'2.1.0'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Base

Returns a new instance of Base.

Parameters:

  • options (Hash) (defaults to: {})

Options Hash (options):

  • :args (Array)

    The commandline arguments to pass to the underlying Python script.



23
24
25
26
# File 'lib/opener/pos_taggers/base.rb', line 23

def initialize(options = {})
  @args          = options.delete(:args) || []
  @options       = options
end

Instance Attribute Details

#argsArray (readonly)

Returns:

  • (Array)


14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/opener/pos_taggers/base.rb', line 14

class Base
  attr_reader :args, :options

  ##
  # @param [Hash] options
  #
  # @option options [Array] :args The commandline arguments to pass to the
  #  underlying Python script.
  #
  def initialize(options = {})
    @args          = options.delete(:args) || []
    @options       = options
  end

  ##
  # Builds the command used to execute the kernel.
  #
  # @return [String]
  #
  def command
    return "#{adjust_python_path} python -E -OO #{kernel} #{args.join(' ')}"
  end

  ##
  # Runs the command and returns the output of STDOUT, STDERR and the
  # process information.
  #
  # @param [String] input The input to tag.
  # @return [String]
  #
  def run(input)
    stdout, stderr, process = capture(input)

    raise stderr unless process.success?

    return stdout
  end

  protected
  ##
  # @return [String]
  #
  def adjust_python_path
    site_packages =  File.join(core_dir, 'site-packages')
    "env PYTHONPATH=#{site_packages}:$PYTHONPATH"
  end

  ##
  # capture3 method doesn't work properly with Jruby, so
  # this is a workaround
  #
  def capture(input)
    Open3.popen3(*command.split(" ")) {|i, o, e, t|
      out_reader = Thread.new { o.read }
      err_reader = Thread.new { e.read }
      i.write input
      i.close
      [out_reader.value, err_reader.value, t.value]
    }
  end

  ##
  # @return [String]
  #
  def core_dir
    return File.expand_path('../../../../core', __FILE__)
  end

  ##
  # @return [String]
  #
  def kernel
    return File.join(core_dir, 'pos-tagger_open-nlp.py')
  end
end

#optionsHash (readonly)

Returns:

  • (Hash)


14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/opener/pos_taggers/base.rb', line 14

class Base
  attr_reader :args, :options

  ##
  # @param [Hash] options
  #
  # @option options [Array] :args The commandline arguments to pass to the
  #  underlying Python script.
  #
  def initialize(options = {})
    @args          = options.delete(:args) || []
    @options       = options
  end

  ##
  # Builds the command used to execute the kernel.
  #
  # @return [String]
  #
  def command
    return "#{adjust_python_path} python -E -OO #{kernel} #{args.join(' ')}"
  end

  ##
  # Runs the command and returns the output of STDOUT, STDERR and the
  # process information.
  #
  # @param [String] input The input to tag.
  # @return [String]
  #
  def run(input)
    stdout, stderr, process = capture(input)

    raise stderr unless process.success?

    return stdout
  end

  protected
  ##
  # @return [String]
  #
  def adjust_python_path
    site_packages =  File.join(core_dir, 'site-packages')
    "env PYTHONPATH=#{site_packages}:$PYTHONPATH"
  end

  ##
  # capture3 method doesn't work properly with Jruby, so
  # this is a workaround
  #
  def capture(input)
    Open3.popen3(*command.split(" ")) {|i, o, e, t|
      out_reader = Thread.new { o.read }
      err_reader = Thread.new { e.read }
      i.write input
      i.close
      [out_reader.value, err_reader.value, t.value]
    }
  end

  ##
  # @return [String]
  #
  def core_dir
    return File.expand_path('../../../../core', __FILE__)
  end

  ##
  # @return [String]
  #
  def kernel
    return File.join(core_dir, 'pos-tagger_open-nlp.py')
  end
end

Instance Method Details

#commandString

Builds the command used to execute the kernel.

Returns:

  • (String)


33
34
35
# File 'lib/opener/pos_taggers/base.rb', line 33

def command
  return "#{adjust_python_path} python -E -OO #{kernel} #{args.join(' ')}"
end

#run(input) ⇒ String

Runs the command and returns the output of STDOUT, STDERR and the process information.

Parameters:

  • input (String)

    The input to tag.

Returns:

  • (String)


44
45
46
47
48
49
50
# File 'lib/opener/pos_taggers/base.rb', line 44

def run(input)
  stdout, stderr, process = capture(input)

  raise stderr unless process.success?

  return stdout
end