Class: Genebrand::PosParser
- Inherits:
-
Object
- Object
- Genebrand::PosParser
- Defined in:
- lib/genebrand/posparser.rb
Instance Method Summary collapse
- #getparts(data) ⇒ Object
-
#initialize ⇒ PosParser
constructor
Fills parts of speech table.
- #is_numeric?(obj) ⇒ Boolean
-
#parse(filename) ⇒ Hash
Parses file of wordtpartofspeech.
- #parse_top(filename, top) ⇒ Object
- #parseandsave(filename, to) ⇒ Object
- #parseandsave_preseed(filename, to) ⇒ Object
- #parseandsave_top(filename, top, to) ⇒ Object
- #preseed(filename) ⇒ Object
Constructor Details
#initialize ⇒ PosParser
Fills parts of speech table
7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
# File 'lib/genebrand/posparser.rb', line 7 def initialize @parsed = {} @table = {} # Сущ @table['N'] = @parsed['noun'] = [] # Мн. число @table['P'] = @parsed['plur'] = [] # Глаг. прич, пер, непер @table['V'] = @parsed['verb_part'] = [] @table['t'] = @parsed['verb_trans'] = [] @table['i'] = @parsed['verb_intrans'] = [] # Прилаг @table['A'] = @parsed['adj'] = [] end |
Instance Method Details
#getparts(data) ⇒ Object
44 45 46 47 48 |
# File 'lib/genebrand/posparser.rb', line 44 def getparts(data) data[1].split('').each do |partofsp| @table[partofsp].push(data[0].downcase) if @table.key?(partofsp) end end |
#is_numeric?(obj) ⇒ Boolean
120 121 122 |
# File 'lib/genebrand/posparser.rb', line 120 def is_numeric?(obj) obj.to_s.match(/\A[+-]?\d+?(\.\d+)?\Z/).nil? ? false : true end |
#parse(filename) ⇒ Hash
Parses file of wordtpartofspeech
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/genebrand/posparser.rb', line 26 def parse(filename) init unless File.exist?(filename) fail "File not found: #{filename}" return end puts 'Seeding' File.open(filename, 'r').each_line do |line| data = line.split("\t") getparts(data) end @parsed end |
#parse_top(filename, top) ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
# File 'lib/genebrand/posparser.rb', line 50 def parse_top(filename, top) init unless File.exist?(filename) fail "File not found: #{filename}" return end puts 'Load top' toparr = [] File.open(top, 'r').each_line do |line| toparr << line.strip.downcase end puts toparr.count puts 'Seeding' it = 0 File.open(filename, 'r').each_line do |line| data = line.split("\t") getparts(data) if toparr.include?(data[0]) it += 1 puts it if it % 10_000 == 0 end @parsed end |
#parseandsave(filename, to) ⇒ Object
100 101 102 103 |
# File 'lib/genebrand/posparser.rb', line 100 def parseandsave(filename, to) FileUtils.mkdir_p 'lib/data' File.open(to, 'w+') { |f| f.write(parse(filename).to_json) } end |
#parseandsave_preseed(filename, to) ⇒ Object
105 106 107 108 109 110 111 112 113 |
# File 'lib/genebrand/posparser.rb', line 105 def parseandsave_preseed(filename, to) FileUtils.mkdir_p 'lib/data' File.open(to, 'w+') do |f| write = preseed(filename) write.each do |line| f.write(line) end end end |
#parseandsave_top(filename, top, to) ⇒ Object
115 116 117 118 |
# File 'lib/genebrand/posparser.rb', line 115 def parseandsave_top(filename, top, to) FileUtils.mkdir_p 'lib/data' File.open(to, 'w+') { |f| f.write(parse_top(filename, top).to_json) } end |
#preseed(filename) ⇒ Object
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# File 'lib/genebrand/posparser.rb', line 78 def preseed(filename) init prsdata = [] unless File.exist?(filename) fail "File not found: #{filename}" return end puts 'Preseed' File.open(filename, 'r').each_line do |line| data = line.split("\t") if !is_numeric?(data[0]) && (!/\A[a-zA-Z0-9]{2,10}\z/.match(data[0]).nil?) prsdata << line end end prsdata end |