Class: Wikian::Get

Inherits:
Subcommand show all
Defined in:
lib/wikian/get.rb

Instance Attribute Summary collapse

Attributes inherited from Subcommand

#api_url, #args, #config, #debug, #output_file, #query, #res, #res_body

Instance Method Summary collapse

Methods inherited from Subcommand

#doit, #make_template, #non_opt_args, #response_file, #write_response

Constructor Details

#initialize(args) ⇒ Get

Returns a new instance of Get.



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/wikian/get.rb', line 8

def initialize(args)
  raise ArgumentRequiredError if args.empty?

  super

  @url = URI(args.find{|arg| arg =~ URI.regexp})

  raise BadUrlError unless url.path

  @title = File.basename(url.path)

  @output_file = title + '.' + url.host

  @params.merge!('titles' => title, 'format' => Wikian::RESPONSE_FORMAT)

  @query = @params.to_query

  @api_url = URI("https://#{url.host}/w/api.php?#{query}")
rescue => e
  puts "#{e.class} in #{__FILE__}", e.message
  exit
end

Instance Attribute Details

#latest_revisionObject

Returns the value of attribute latest_revision.



6
7
8
# File 'lib/wikian/get.rb', line 6

def latest_revision
  @latest_revision
end

#titleObject

Returns the value of attribute title.



6
7
8
# File 'lib/wikian/get.rb', line 6

def title
  @title
end

#urlObject

Returns the value of attribute url.



6
7
8
# File 'lib/wikian/get.rb', line 6

def url
  @url
end

Instance Method Details

#extract_wikitextObject

extract wikitext from the response file and save it into a ‘.wiki` file

return: nil



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/wikian/get.rb', line 34

def extract_wikitext
  pages = JSON.parse(res_body).dig('query','pages')

  # Wikipedia is inconsistent in their value for 'pages', it's sometimes a hash, sometimes an array
  @latest_revision = (pages.respond_to?(:keys) ? pages.values.first : pages.first)['revisions'].first

  content = latest_revision.dig('slots', 'main', 'content') ||
            latest_revision.dig('slots', '*') ||
            latest_revision.dig('*')

  wiki_file= File.basename(response_file, File.extname(response_file)) + '.wiki'

  File.open(wiki_file,'w') do |f|
    STDERR.puts "Warning: nil 'content' in #{Wikian::CONFIG_FILE}" unless content
    STDERR.puts "Writing to #{wiki_file}"
    f.puts content
  end

rescue => e
  puts "An error occurred while extracting the wikitext"
  exit
end

#save_metadataObject

save article metadata

metadata like article timestamp is used to solve edit conflicts



60
61
62
63
64
65
66
67
# File 'lib/wikian/get.rb', line 60

def 
  FileUtils.mkdir_p(Wikian.meta_dir)

   = File.exist?(Wikian.meta_file) ? YAML.load(File.open(Wikian.meta_file)) : {'meta' => {}}
  ['meta'].merge!(title => {'timestamp' => latest_revision['timestamp']})

  File.write(Wikian.meta_file, YAML.dump())
end

#templateObject



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/wikian/get.rb', line 69

def template
  "    # for a list of parameters to use here see: https://www.mediawiki.org/wiki/API:Revisions\n    meta:\n      headers:\n        user-agent: Wikian\n    api:\n      action:\n        - query\n      prop:\n        - revisions\n      rvprop:\n        - content\n        - timestamp\n     #rvsection: # get specific sections\n     #  - 0\n     #  - 2\n      rvslots:\n        - main\n      formatversion:\n        - 2\n  eos\nend\n"