Module: Arx

Defined in:
lib/arx.rb,
lib/arx/error.rb,
lib/arx/cleaner.rb,
lib/arx/version.rb,
lib/arx/inspector.rb,
lib/arx/categories.rb,
lib/arx/query/query.rb,
lib/arx/entities/link.rb,
lib/arx/entities/paper.rb,
lib/arx/query/validate.rb,
lib/arx/entities/author.rb,
lib/arx/entities/category.rb

Overview

A Ruby interface for querying academic papers on the arXiv search API.

Defined Under Namespace

Modules: Error, Inspector, Validate Classes: Author, Category, Cleaner, Link, Paper, Query

Constant Summary collapse

ENDPOINT =

The arXiv search API endpoint.

'http://export.arxiv.org/api/query?'
NEW_IDENTIFIER_FORMAT =

The current arxiv paper identifier scheme (1 April 2007 and onwards).

The last block of digits can either be five digits (if the paper was published after 1501 - January 2015),
or four digits (if the paper was published before 1501).

Examples:

1501.00001
1705.01662v1
1412.0135
0706.0001v2

See Also:

/^\d{4}\.\d{4,5}(v\d+)?$/
OLD_IDENTIFIER_FORMAT =

The legacy arXiv paper identifier scheme (before 1 April 2007).

Examples:

math/0309136v1
cond-mat/0211034

See Also:

/^[a-z]+(\-[a-z]+)?\/\d{7}(v\d+)?$/
VERSION =

The current version of Arx.

{
  major: 1,
  minor: 3,
  patch: 0,
  meta: nil,
}.compact.values.join('.').freeze
CATEGORIES =

arXiv categories and their full names

{
  'astro-ph' => 'Astrophysics',
  'astro-ph.CO' => 'Cosmology and Nongalactic Astrophysics',
  'astro-ph.EP' => 'Earth and Planetary Astrophysics',
  'astro-ph.GA' => 'Astrophysics of Galaxies',
  'astro-ph.HE' => 'High Energy Astrophysical Phenomena',
  'astro-ph.IM' => 'Instrumentation and Methods for Astrophysics',
  'astro-ph.SR' => 'Solar and Stellar Astrophysics',
  'cond-mat' => 'Condensed Matter',
  'cond-mat.dis-nn' => 'Disordered Systems and Neural Networks',
  'cond-mat.mes-hall' => 'Mesoscale and Nanoscale Physics',
  'cond-mat.mtrl-sci' => 'Materials Science',
  'cond-mat.other' => 'Other Condensed Matter',
  'cond-mat.quant-gas' => 'Quantum Gases',
  'cond-mat.soft' => 'Soft Condensed Matter',
  'cond-mat.stat-mech' => 'Statistical Mechanics',
  'cond-mat.str-el' => 'Strongly Correlated Electrons',
  'cond-mat.supr-con' => 'Superconductivity',
  'dis-nn' => 'Disordered Systems and Neural Networks', # cond-mat
  'mes-hall' => 'Mesoscale and Nanoscale Physics', # cond-mat
  'mtrl-sci' => 'Materials Science', # cond-mat
  'quant-gas' => 'Quantum Gases', # cond-mat
  'soft' => 'Soft Condensed Matter', # cond-mat
  'stat-mech' => 'Statistical Mechanics', # cond-mat
  'str-el' => 'Strongly Correlated Electrons', # cond-mat
  'supr-con' => 'Superconductivity', # cond-mat
  'cs' => 'Computer Science',
  'cs.AI' => 'Artificial Intelligence',
  'cs.AR' => 'Hardware Architecture',
  'cs.CC' => 'Computational Complexity',
  'cs.CE' => 'Computational Engineering, Finance, and Science',
  'cs.CG' => 'Computational Geometry',
  'cs.CL' => 'Computation and Language',
  'cs.CR' => 'Cryptography and Security',
  'cs.CV' => 'Computer Vision and Pattern Recognition',
  'cs.CY' => 'Computers and Society',
  'cs.DB' => 'Databases',
  'cs.DC' => 'Distributed, Parallel, and Cluster Computing',
  'cs.DL' => 'Digital Libraries',
  'cs.DM' => 'Discrete Mathematics',
  'cs.DS' => 'Data Structures and Algorithms',
  'cs.ET' => 'Emerging Technologies',
  'cs.FL' => 'Formal Languages and Automata Theory',
  'cs.GL' => 'General Literature',
  'cs.GR' => 'Graphics',
  'cs.GT' => 'Computer Science and Game Theory',
  'cs.HC' => 'Human-Computer Interaction',
  'cs.IR' => 'Information Retrieval',
  'cs.IT' => 'Information Theory',
  'cs.LG' => 'Learning',
  'cs.LO' => 'Logic in Computer Science',
  'cs.MA' => 'Multiagent Systems',
  'cs.MM' => 'Multimedia',
  'cs.MS' => 'Mathematical Software',
  'cs.NA' => 'Numerical Analysis',
  'cs.NE' => 'Neural and Evolutionary Computing',
  'cs.NI' => 'Networking and Internet Architecture',
  'cs.OH' => 'Other Computer Science',
  'cs.OS' => 'Operating Systems',
  'cs.PF' => 'Performance',
  'cs.PL' => 'Programming Languages',
  'cs.RO' => 'Robotics',
  'cs.SC' => 'Symbolic Computation',
  'cs.SD' => 'Sound',
  'cs.SE' => 'Software Engineering',
  'cs.SI' => 'Social and Information Networks',
  'cs.SY' => 'Systems and Control',
  'econ' => 'Economics',
  'econ.EM' => 'Econometrics',
  'eess' => 'Electrical Engineering and Systems Science',
  'eess.AS' => 'Audio and Speech Processing',
  'eess.IV' => 'Image and Video Processing',
  'eess.SP' => 'Signal Processing',
  'gr-qc' => 'General Relativity and Quantum Cosmology',
  'hep-ex' => 'High Energy Physics - Experiment',
  'hep-lat' => 'High Energy Physics - Lattice',
  'hep-ph' => 'High Energy Physics - Phenomenology',
  'hep-th' => 'High Energy Physics - Theory',
  'math' => 'Mathematics',
  'math.AC' => 'Commutative Algebra',
  'math.AG' => 'Algebraic Geometry',
  'math.AP' => 'Analysis of PDEs',
  'math.AT' => 'Algebraic Topology',
  'math.CA' => 'Classical Analysis and ODEs',
  'math.CO' => 'Combinatorics',
  'math.CT' => 'Category Theory',
  'math.CV' => 'Complex Variables',
  'math.DG' => 'Differential Geometry',
  'math.DS' => 'Dynamical Systems',
  'math.FA' => 'Functional Analysis',
  'math.GM' => 'General Mathematics',
  'math.GN' => 'General Topology',
  'math.GR' => 'Group Theory',
  'math.GT' => 'Geometric Topology',
  'math.HO' => 'History and Overview',
  'math.IT' => 'Information Theory',
  'math.KT' => 'K-Theory and Homology',
  'math.LO' => 'Logic',
  'math.MG' => 'Metric Geometry',
  'math.MP' => 'Mathematical Physics',
  'math.NA' => 'Numerical Analysis',
  'math.NT' => 'Number Theory',
  'math.OA' => 'Operator Algebras',
  'math.OC' => 'Optimization and Control',
  'math.PR' => 'Probability',
  'math.QA' => 'Quantum Algebra',
  'math.RA' => 'Rings and Algebras',
  'math.RT' => 'Representation Theory',
  'math.SG' => 'Symplectic Geometry',
  'math.SP' => 'Spectral Theory',
  'math.ST' => 'Statistics Theory',
  'math-ph' => 'Mathematical Physics',
  'nlin' => 'Nonlinear Sciences',
  'nlin.AO' => 'Adaptation and Self-Organizing Systems',
  'nlin.CD' => 'Chaotic Dynamics',
  'nlin.CG' => 'Cellular Automata and Lattice Gases',
  'nlin.PS' => 'Pattern Formation and Solitons',
  'nlin.SI' => 'Exactly Solvable and Integrable Systems',
  'nucl-ex' => 'Nuclear Experiment',
  'nucl-th' => 'Nuclear Theory',
  'physics' => 'Physics',
  'physics.acc-ph' => 'Accelerator Physics',
  'physics.ao-ph' => 'Atmospheric and Oceanic Physics',
  'physics.app-ph' => 'Applied Physics',
  'physics.atm-clus' => 'Atomic and Molecular Clusters',
  'physics.atom-ph' => 'Atomic Physics',
  'physics.bio-ph' => 'Biological Physics',
  'physics.chem-ph' => 'Chemical Physics',
  'physics.class-ph' => 'Classical Physics',
  'physics.comp-ph' => 'Computational Physics',
  'physics.data-an' => 'Data Analysis, Statistics and Probability',
  'physics.ed-ph' => 'Physics Education',
  'physics.flu-dyn' => 'Fluid Dynamics',
  'physics.gen-ph' => 'General Physics',
  'physics.geo-ph' => 'Geophysics',
  'physics.hist-ph' => 'History and Philosophy of Physics',
  'physics.ins-det' => 'Instrumentation and Detectors',
  'physics.med-ph' => 'Medical Physics',
  'physics.optics' => 'Optics',
  'physics.plasm-ph' => 'Plasma Physics',
  'physics.pop-ph' => 'Popular Physics',
  'physics.soc-ph' => 'Physics and Society',
  'physics.space-ph' => 'Space Physics',
  'acc-ph' => 'Accelerator Physics', # physics
  'ao-ph' => 'Atmospheric and Oceanic Physics', # physics
  'app-ph' => 'Applied Physics', # physics
  'atm-clus' => 'Atomic and Molecular Clusters', # physics
  'atom-ph' => 'Atomic Physics', # physics
  'bio-ph' => 'Biological Physics', # physics
  'chem-ph' => 'Chemical Physics', # physics
  'class-ph' => 'Classical Physics', # physics
  'comp-ph' => 'Computational Physics', # physics
  'data-an' => 'Data Analysis, Statistics and Probability', # physics
  'ed-ph' => 'Physics Education', # physics
  'flu-dyn' => 'Fluid Dynamics', # physics
  'gen-ph' => 'General Physics', # physics
  'geo-ph' => 'Geophysics', # physics
  'hist-ph' => 'History and Philosophy of Physics', # physics
  'ins-det' => 'Instrumentation and Detectors', # physics
  'med-ph' => 'Medical Physics', # physics
  'optics' => 'Optics', # physics
  'plasm-ph' => 'Plasma Physics', # physics
  'pop-ph' => 'Popular Physics', # physics
  'soc-ph' => 'Physics and Society', # physics
  'space-ph' => 'Space Physics', # physics
  'q-bio' => 'Quantitative Biology',
  'q-bio.BM' => 'Biomolecules',
  'q-bio.CB' => 'Cell Behavior',
  'q-bio.GN' => 'Genomics',
  'q-bio.MN' => 'Molecular Networks',
  'q-bio.NC' => 'Neurons and Cognition',
  'q-bio.OT' => 'Other Quantitative Biology',
  'q-bio.PE' => 'Populations and Evolution',
  'q-bio.QM' => 'Quantitative Methods',
  'q-bio.SC' => 'Subcellular Processes',
  'q-bio.TO' => 'Tissues and Organs',
  'q-fin' => 'Quantitative Finance',
  'q-fin.CP' => 'Computational Finance',
  'q-fin.EC' => 'Economics',
  'q-fin.GN' => 'General Finance',
  'q-fin.MF' => 'Mathematical Finance',
  'q-fin.PM' => 'Portfolio Management',
  'q-fin.PR' => 'Pricing of Securities',
  'q-fin.RM' => 'Risk Management',
  'q-fin.ST' => 'Statistical Finance',
  'q-fin.TR' => 'Trading and Market Microstructure',
  'quant-ph' => 'Quantum Physics',
  'stat' => 'Statistics',
  'stat.AP' => 'Applications',
  'stat.CO' => 'Computation',
  'stat.ME' => 'Methodology',
  'stat.ML' => 'Machine Learning',
  'stat.OT' => 'Other Statistics',
  'stat.TH' => 'Statistics Theory'
}.freeze

Class Method Summary collapse

Class Method Details

.search(*ids, query: nil, sort_by: :relevance, sort_order: :descending, start: 0, max_results: 10) {|query| ... } ⇒ Array<Paper>, Paper Also known as: get

Note:

The sort_by, sort_order, start and max_results arguments are ignored if passing in your own query.

Performs a search query for papers on the arXiv search API.

Parameters:

  • ids (Array<String>)

    The IDs of the arXiv papers to restrict the query to.

  • query (Query, NilClass) (defaults to: nil)

    Predefined search query object.

  • sort_by (Symbol) (defaults to: :relevance)

    The sorting criteria for the returned results (see Arx::Query::SORT_BY).

  • sort_order (Symbol) (defaults to: :descending)

    The sorting order for the returned results (see Arx::Query::SORT_ORDER).

  • start (Integer) (defaults to: 0)

    The index of the first returned result.

  • max_results (Integer) (defaults to: 10)

    The number of results returned by the query

Yields:

  • (query)

Returns:

Raises:

  • (TypeError)


70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/arx.rb', line 70

def search(*ids, query: nil, sort_by: :relevance, sort_order: :descending, start: 0, max_results: 10)
  query ||= Query.new(*ids, sort_by: sort_by, sort_order: sort_order, start: start, max_results: max_results)
  raise TypeError.new("Expected `query` to be an Arx::Query, got: #{query.class}") unless query.is_a? Query

  yield query if block_given?

  document = Nokogiri::XML(URI.open ENDPOINT + query.to_s).remove_namespaces!
  results = Paper.parse(document, single: ids.size == 1)

  if results.is_a? Paper
    raise Error::MissingPaper.new(ids.first) if results.title.empty?
  elsif results.is_a? Array
    results.reject! {|paper| paper.title.empty?}
  elsif results.nil?
    if ids.size == 1
      raise Error::MissingPaper.new(ids.first)
    else
      results = []
    end
  end

  results
end