Class: Opener::Tokenizer::CLI

Inherits:
Object
  • Object
show all
Defined in:
lib/opener/tokenizer/cli.rb

Overview

CLI wrapper around Opener::Tokenizer using Slop.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeCLI

Returns a new instance of CLI.



12
13
14
# File 'lib/opener/tokenizer/cli.rb', line 12

def initialize
  @parser = configure_slop
end

Instance Attribute Details

#parserSlop (readonly)

Returns:

  • (Slop)


9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/opener/tokenizer/cli.rb', line 9

class CLI
  attr_reader :parser

  def initialize
    @parser = configure_slop
  end

  ##
  # @param [Array] argv
  #
  def run(argv = ARGV)
    parser.parse(argv)
  end

  ##
  # @return [Slop]
  #
  def configure_slop
    return Slop.new(:strict => false, :indent => 2, :help => true) do
      banner 'Usage: tokenizer [OPTIONS]'

      separator <<-EOF.chomp

About:

Tokenizer for KAF/plain text documents with support for various languages
such as Dutch and English. This command reads input from STDIN.

Examples:

cat example.txt | tokenizer -l en # Manually specify the language
cat example.kaf | tokenizer       # Uses the xml:lang attribute

Languages:

* Dutch (nl)
* English (en)
* French (fr)
* German (de)
* Italian (it)
* Spanish (es)

KAF Input:

If you give a KAF file as an input (-k or --kaf) the language is taken from
the xml:lang attribute inside the file. Else it expects that you give the
language as an argument (-l or --language)

Example KAF:

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<KAF version="v1.opener" xml:lang="en">
  <raw>This is some text.</raw>
</KAF>
      EOF

      separator "\nOptions:\n"

      on :v, :version, 'Shows the current version' do
        abort "tokenizer v#{VERSION} on #{RUBY_DESCRIPTION}"
      end

      on :l=, :language=, 'A specific language to use',
        :as      => String,
        :default => DEFAULT_LANGUAGE

      on :k, :kaf, 'Treats the input as a KAF document'
      on :p, :plain, 'Treats the input as plain text'

      run do |opts, args|
        tokenizer = Tokenizer.new(
          :args     => args,
          :kaf      => opts[:plain] ? false : true,
          :language => opts[:language]
        )

        input = STDIN.tty? ? nil : STDIN.read

        puts tokenizer.run(input)
      end
    end
  end
end

Instance Method Details

#configure_slopSlop

Returns:

  • (Slop)


26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/opener/tokenizer/cli.rb', line 26

def configure_slop
  return Slop.new(:strict => false, :indent => 2, :help => true) do
    banner 'Usage: tokenizer [OPTIONS]'

    separator <<-EOF.chomp

About:

    Tokenizer for KAF/plain text documents with support for various languages
    such as Dutch and English. This command reads input from STDIN.

Examples:

    cat example.txt | tokenizer -l en # Manually specify the language
    cat example.kaf | tokenizer       # Uses the xml:lang attribute

Languages:

    * Dutch (nl)
    * English (en)
    * French (fr)
    * German (de)
    * Italian (it)
    * Spanish (es)

KAF Input:

    If you give a KAF file as an input (-k or --kaf) the language is taken from
    the xml:lang attribute inside the file. Else it expects that you give the
    language as an argument (-l or --language)

Example KAF:

    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <KAF version="v1.opener" xml:lang="en">
<raw>This is some text.</raw>
    </KAF>
    EOF

    separator "\nOptions:\n"

    on :v, :version, 'Shows the current version' do
      abort "tokenizer v#{VERSION} on #{RUBY_DESCRIPTION}"
    end

    on :l=, :language=, 'A specific language to use',
      :as      => String,
      :default => DEFAULT_LANGUAGE

    on :k, :kaf, 'Treats the input as a KAF document'
    on :p, :plain, 'Treats the input as plain text'

    run do |opts, args|
      tokenizer = Tokenizer.new(
        :args     => args,
        :kaf      => opts[:plain] ? false : true,
        :language => opts[:language]
      )

      input = STDIN.tty? ? nil : STDIN.read

      puts tokenizer.run(input)
    end
  end
end

#run(argv = ARGV) ⇒ Object

Parameters:

  • argv (Array) (defaults to: ARGV)


19
20
21
# File 'lib/opener/tokenizer/cli.rb', line 19

def run(argv = ARGV)
  parser.parse(argv)
end