Class: P3
- Inherits:
-
Object
- Object
- P3
- Defined in:
- lib/arxiv/references/P3.rb
Constant Summary collapse
- BASE_URL =
"https://arxiv.org"
- REFERENCE_START_REGEXP =
Regexp.new('\n*[rR][eE][fF][eE][rR][eE][nN][cC][eE][sS]?( +|\n+)?$')
- REFERENCE_REGEXP =
Regexp.new('(\[[0-9]?[0-9]\]|\[.+?\])')
Class Method Summary collapse
- .convertSingleColPdf(job_id, work_dir, file_name, use_dir) ⇒ Object
- .fetchFromPdfUrl(pdfUrl, work_dir = true, use_dir = true) ⇒ Object
- .fetchPdfFile(pdfUrl, file_name) ⇒ Object
- .fetchReference(file_name) ⇒ Object
- .getK2Pdf(id, work_dir, use_dir) ⇒ Object
- .makeDir(id, work_dir) ⇒ Object
- .makeFile(id, work_dir, use_dir) ⇒ Object
- .makeId ⇒ Object
- .removeDir(id, work_dir) ⇒ Object
- .removeFile(id, work_dir) ⇒ Object
Class Method Details
.convertSingleColPdf(job_id, work_dir, file_name, use_dir) ⇒ Object
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
# File 'lib/arxiv/references/P3.rb', line 52 def self.convertSingleColPdf(job_id, work_dir,file_name, use_dir) cmd = "k2pdfopt -dev kpw #{file_name}" PTY.spawn(cmd) do |i,o| o.sync = true i.expect(/\S.*Enter option above \(h=help, q=quit\):/,10){ o.puts "\n" o.flush } while( i.eof? == false ) res = i.gets print res break unless res.index('written').nil? end end return getK2Pdf(job_id, work_dir, use_dir) end |
.fetchFromPdfUrl(pdfUrl, work_dir = true, use_dir = true) ⇒ Object
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/arxiv/references/P3.rb', line 98 def self.fetchFromPdfUrl(pdfUrl, work_dir=true, use_dir=true) job_id = makeId makeDir(job_id, work_dir) if use_dir file_name = makeFile(job_id, work_dir, use_dir) fetchPdfFile(pdfUrl, file_name) executed_pdf = convertSingleColPdf(job_id, work_dir, file_name, use_dir) references = fetchReference(executed_pdf) if use_dir removeDir(job_id, work_dir) else removeFile(job_id, work_dir) end return references end |
.fetchPdfFile(pdfUrl, file_name) ⇒ Object
44 45 46 47 48 49 50 |
# File 'lib/arxiv/references/P3.rb', line 44 def self.fetchPdfFile(pdfUrl,file_name) open(file_name, 'wb') do |o| open(pdfUrl) do |data| o.write(data.read) end end end |
.fetchReference(file_name) ⇒ Object
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
# File 'lib/arxiv/references/P3.rb', line 69 def self.fetchReference(file_name) reader = PDF::Reader.new(file_name) page_no = reader. pages. reject{|i| i.text.index(REFERENCE_START_REGEXP).nil? }. map(&:number). sort. shift ref_page = reader. pages. select{|i| i.number >= page_no }. map{|i| i.text.gsub(/\n\n+/,"\n").gsub(/ +/,' ').gsub(/-\n +/,'') } ref_page.shift ref_page. join(' '). gsub(REFERENCE_REGEXP,"\n\\1"). split(/\n *\n/). map{|i| i.gsub("\n",'')}. select{|i| i.length > 15} return ref_page end |
.getK2Pdf(id, work_dir, use_dir) ⇒ Object
32 33 34 35 36 37 38 |
# File 'lib/arxiv/references/P3.rb', line 32 def self. getK2Pdf(id, work_dir, use_dir) if use_dir return "#{work_dir}/#{id}/output_k2opt.pdf" else return "#{work_dir}/#{id}-output_k2opt.pdf" end end |
.makeDir(id, work_dir) ⇒ Object
16 17 18 |
# File 'lib/arxiv/references/P3.rb', line 16 def self.makeDir(id, work_dir) Dir.mkdir("#{work_dir}/#{id}") end |
.makeFile(id, work_dir, use_dir) ⇒ Object
24 25 26 27 28 29 30 |
# File 'lib/arxiv/references/P3.rb', line 24 def self.makeFile(id, work_dir, use_dir) if use_dir return "#{work_dir}/#{id}/output.pdf" else return "#{work_dir}/#{id}-output.pdf" end end |
.makeId ⇒ Object
12 13 14 |
# File 'lib/arxiv/references/P3.rb', line 12 def self.makeId return Digest::SHA256.hexdigest Time.now.strftime("%F %H:%M:%S") end |
.removeDir(id, work_dir) ⇒ Object
20 21 22 |
# File 'lib/arxiv/references/P3.rb', line 20 def self.removeDir(id, work_dir) FileUtils.rm_rf("#{work_dir}/#{id}") end |
.removeFile(id, work_dir) ⇒ Object
40 41 42 43 |
# File 'lib/arxiv/references/P3.rb', line 40 def self.removeFile(id, work_dir) File.delete("#{work_dir}/#{id}-output.pdf") File.delete("#{work_dir}/#{id}-output_k2opt.pdf") end |