Class: Tesseract::API
- Inherits:
-
Object
- Object
- Tesseract::API
- Defined in:
- lib/tesseract/api.rb,
lib/tesseract/api/image.rb,
lib/tesseract/api/iterator.rb
Defined Under Namespace
Constant Summary collapse
- Types =
{ int: [:integer], bool: [:boolean], double: [:float], string: [:str] }
Class Method Summary collapse
-
.finalize(pointer) ⇒ Object
:nodoc:.
-
.image_for(image) ⇒ Object
Get a pointer to a tesseract-ocr usable image from a path, a string with the data or an IO stream.
-
.to_language_code(code) ⇒ Object
Transform a language code to tesseract-ocr usable codes.
Instance Method Summary collapse
- #all_word_confidences ⇒ Object
- #clear ⇒ Object
- #end ⇒ Object
- #get_box(page = 0) ⇒ Object
- #get_hocr(page = 0) ⇒ Object
- #get_iterator ⇒ Object
- #get_page_seg_mode ⇒ Object
- #get_text ⇒ Object
- #get_unlv ⇒ Object
- #get_variable(name, type = nil) ⇒ Object
- #init(datapath = nil, language = 'eng', mode = :DEFAULT) ⇒ Object
-
#initialize ⇒ API
constructor
A new instance of API.
- #mean_text_confidence ⇒ Object
- #process_page(pix, page = 0, name = "") ⇒ Object
- #process_pages(name) ⇒ Object
- #read_config_file(path) ⇒ Object
- #set_image(pix) ⇒ Object
- #set_input_name(name) ⇒ Object
- #set_output_name(name) ⇒ Object
- #set_page_seg_mode(value) ⇒ Object
- #set_rectangle(left, top, width, height) ⇒ Object
- #set_variable(name, value) ⇒ Object
- #to_ffi ⇒ Object
- #version ⇒ Object
Constructor Details
Class Method Details
.finalize(pointer) ⇒ Object
:nodoc:
60 61 62 |
# File 'lib/tesseract/api.rb', line 60 def self.finalize (pointer) # :nodoc: C::BaseAPI.destroy(pointer) end |
.image_for(image) ⇒ Object
Get a pointer to a tesseract-ocr usable image from a path, a string with the data or an IO stream.
37 38 39 |
# File 'lib/tesseract/api.rb', line 37 def self.image_for (image) Image.new(image) end |
.to_language_code(code) ⇒ Object
Transform a language code to tesseract-ocr usable codes
43 44 45 46 47 |
# File 'lib/tesseract/api.rb', line 43 def self.to_language_code (code) ISO_639.find(code.to_s.downcase).alpha3 rescue code.to_s end |
Instance Method Details
#all_word_confidences ⇒ Object
206 207 208 |
# File 'lib/tesseract/api.rb', line 206 def all_word_confidences C::BaseAPI.all_word_confidences(to_ffi) end |
#clear ⇒ Object
210 211 212 |
# File 'lib/tesseract/api.rb', line 210 def clear C::BaseAPI.clear(to_ffi) end |
#end ⇒ Object
214 215 216 |
# File 'lib/tesseract/api.rb', line 214 def end C::BaseAPI.end(to_ffi) end |
#get_box(page = 0) ⇒ Object
182 183 184 185 186 187 188 189 190 |
# File 'lib/tesseract/api.rb', line 182 def get_box (page = 0) pointer = C::BaseAPI.get_box_text(to_ffi, page) result = pointer.read_string result.force_encoding 'UTF-8' result ensure C.free_array_of_char(pointer) end |
#get_hocr(page = 0) ⇒ Object
169 170 171 172 173 174 175 176 177 178 179 180 |
# File 'lib/tesseract/api.rb', line 169 def get_hocr(page = 0) pointer = C::BaseAPI.get_hocr_text(to_ffi, page) return if pointer.null? result = pointer.read_string result.force_encoding 'UTF-8' result ensure C.free_array_of_char(pointer) unless pointer.null? end |
#get_iterator ⇒ Object
152 153 154 |
# File 'lib/tesseract/api.rb', line 152 def get_iterator Iterator.new(C::BaseAPI.get_iterator(to_ffi)) end |
#get_page_seg_mode ⇒ Object
112 113 114 |
# File 'lib/tesseract/api.rb', line 112 def get_page_seg_mode C::BaseAPI.get_page_seg_mode(to_ffi) end |
#get_text ⇒ Object
156 157 158 159 160 161 162 163 164 165 166 167 |
# File 'lib/tesseract/api.rb', line 156 def get_text pointer = C::BaseAPI.get_utf8_text(to_ffi) return if pointer.null? result = pointer.read_string result.force_encoding 'UTF-8' result ensure C.free_array_of_char(pointer) unless pointer.null? end |
#get_unlv ⇒ Object
192 193 194 195 196 197 198 199 200 |
# File 'lib/tesseract/api.rb', line 192 def get_unlv pointer = C::BaseAPI.get_unlv_text(to_ffi) result = pointer.read_string result.force_encoding 'ISO8859-1' result ensure C.free_array_of_char(pointer) end |
#get_variable(name, type = nil) ⇒ Object
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
# File 'lib/tesseract/api.rb', line 80 def get_variable (name, type = nil) if type.nil? type = Types.keys.find { |type| C::BaseAPI.__send__ "has_#{type}_variable", to_ffi, name } if type C::BaseAPI.__send__ "get_#{type}_variable", to_ffi, name end else unless Types.has_key?(type) name, aliases = Types.find { |name, aliases| aliases.member?(type) } raise ArgumentError, "unknown type #{type}" unless name type = name end if C::BaseAPI.__send__ "has_#{type}_variable", to_ffi, name C::BaseAPI.__send__ "get_#{type}_variable", to_ffi, name end end end |
#init(datapath = nil, language = 'eng', mode = :DEFAULT) ⇒ Object
102 103 104 105 106 |
# File 'lib/tesseract/api.rb', line 102 def init (datapath = nil, language = 'eng', mode = :DEFAULT) unless C::BaseAPI.init(to_ffi, datapath || Tesseract.prefix || '/usr/share', language.to_s, mode).zero? raise 'the API did not Init correctly' end end |
#mean_text_confidence ⇒ Object
202 203 204 |
# File 'lib/tesseract/api.rb', line 202 def mean_text_confidence C::BaseAPI.mean_text_conf(to_ffi) end |
#process_page(pix, page = 0, name = "") ⇒ Object
140 141 142 143 144 145 146 147 148 149 150 |
# File 'lib/tesseract/api.rb', line 140 def process_page (pix, page = 0, name = "") result = C.create_string unless C::BaseAPI.process_page(to_ffi, pix.is_a?(Image) ? pix.to_ffi : pix, page, name, result) raise 'process_page failed' end C.string_content(result).read_string(C.string_length(result)) ensure C.destroy_string(result) end |
#process_pages(name) ⇒ Object
128 129 130 131 132 133 134 135 136 137 138 |
# File 'lib/tesseract/api.rb', line 128 def process_pages (name) result = C.create_string unless C::BaseAPI.process_pages(to_ffi, name, result) raise 'process_pages failed' end C.string_content(result).read_string(C.string_length(result)) ensure C.destroy_string(result) end |
#read_config_file(path) ⇒ Object
108 109 110 |
# File 'lib/tesseract/api.rb', line 108 def read_config_file (path) C::BaseAPI.read_config_file(to_ffi, path) end |
#set_image(pix) ⇒ Object
120 121 122 |
# File 'lib/tesseract/api.rb', line 120 def set_image (pix) C::BaseAPI.set_image(to_ffi, pix.is_a?(Image) ? pix.to_ffi : pix) end |
#set_input_name(name) ⇒ Object
68 69 70 |
# File 'lib/tesseract/api.rb', line 68 def set_input_name (name) C::BaseAPI.set_input_name(to_ffi, name) end |
#set_output_name(name) ⇒ Object
72 73 74 |
# File 'lib/tesseract/api.rb', line 72 def set_output_name (name) C::BaseAPI.set_output_name(to_ffi, name) end |
#set_page_seg_mode(value) ⇒ Object
116 117 118 |
# File 'lib/tesseract/api.rb', line 116 def set_page_seg_mode (value) C::BaseAPI.set_page_seg_mode(to_ffi, value) end |
#set_rectangle(left, top, width, height) ⇒ Object
124 125 126 |
# File 'lib/tesseract/api.rb', line 124 def set_rectangle (left, top, width, height) C::BaseAPI.set_rectangle(to_ffi, left, top, width, height) end |
#set_variable(name, value) ⇒ Object
76 77 78 |
# File 'lib/tesseract/api.rb', line 76 def set_variable (name, value) C::BaseAPI.set_variable(to_ffi, name, value) end |
#to_ffi ⇒ Object
218 219 220 |
# File 'lib/tesseract/api.rb', line 218 def to_ffi @internal end |