Class: Datasets::AozoraBunko

Inherits:
Dataset
  • Object
show all
Defined in:
lib/datasets/aozora-bunko.rb

Overview

Dataset for AozoraBunko

Defined Under Namespace

Classes: Book

Instance Attribute Summary

Attributes inherited from Dataset

#metadata

Instance Method Summary collapse

Methods inherited from Dataset

#clear_cache!, #to_table

Constructor Details

#initializeAozoraBunko

Returns a new instance of AozoraBunko.



149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/datasets/aozora-bunko.rb', line 149

def initialize
  super()

  .id = 'aozora-bunko'
  .name = 'Aozora Bunko'
  .url = 'https://www.aozora.gr.jp/'
  .licenses = 'CC-BY-2.1-JP'
  .description = "    Aozora Bunko is an activity to collect free electronic books that anyone can access\n    on the Internet like a library. The copyrighted works and the works that are said to be\n    \"free to read\" are available after being digitized in text and XHTML (some HTML) formats.\n  DESCRIPTION\nend\n"

Instance Method Details

#eachObject



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/datasets/aozora-bunko.rb', line 163

def each
  return to_enum(__method__) unless block_given?

  open_data do |csv_file_stream|
    text = csv_file_stream.read.force_encoding(Encoding::UTF_8) # file has Byte Order Mark

    CSV.parse(text, headers: true) do |row|
      %w[作品著作権フラグ 人物著作権フラグ].each do |boolean_column_name|
        row[boolean_column_name] = normalize_boolean(row[boolean_column_name])
      end
      book = Book.new(*row.fields)
      book.cache_path = cache_path

      yield(book)
    end
  end
end