Class: Datasets::LivedoorNews

Inherits:
Dataset
  • Object
show all
Includes:
TarGzReadable
Defined in:
lib/datasets/livedoor-news.rb

Defined Under Namespace

Classes: Record

Instance Attribute Summary

Attributes inherited from Dataset

#metadata

Instance Method Summary collapse

Methods included from TarGzReadable

#open_tar_gz

Methods inherited from Dataset

#clear_cache!, #to_table

Constructor Details

#initialize(type: :topic_news) ⇒ LivedoorNews

Returns a new instance of LivedoorNews.



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/datasets/livedoor-news.rb', line 11

def initialize(type: :topic_news)
  news_list = [
    :topic_news,
    :sports_watch,
    :it_life_hack,
    :kaden_channel,
    :movie_enter,
    :dokujo_tsushin,
    :smax,
    :livedoor_homme,
    :peachy
  ]
  unless news_list.include?(type)
    valid_type_labels = news_list.collect(&:inspect).join(", ")
    message = ":type must be one of [#{valid_type_labels}]: #{type.inspect}"
    raise ArgumentError, message
  end

  super()
  @type = type
  .id = 'livedoor-news'
  .name = 'livedoor-news'
  .url = 'https://www.rondhuit.com/download.html#ldcc'
  .licenses = ['CC-BY-ND-2.1-JP']
  .description = lambda do
    fetch_readme
  end
end

Instance Method Details

#each(&block) ⇒ Object



40
41
42
43
44
45
# File 'lib/datasets/livedoor-news.rb', line 40

def each(&block)
  return to_enum(__method__) unless block_given?

  data_path = download_tar_gz
  parse_data(data_path, &block)
end