Class: GeoCombine::Indexer

Inherits:
Object
  • Object
show all
Defined in:
lib/geo_combine/indexer.rb

Overview

Indexes Geoblacklight documents into Solr

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(solr: nil, logger: GeoCombine::Logger.logger) ⇒ Indexer

Returns a new instance of Indexer.



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/geo_combine/indexer.rb', line 13

def initialize(solr: nil, logger: GeoCombine::Logger.logger)
  @logger = logger
  @batch_size = ENV.fetch('SOLR_BATCH_SIZE', 100).to_i

  # If SOLR_URL is set, use it; if in a Geoblacklight app, use its solr core
  solr_url = ENV.fetch('SOLR_URL', nil)
  solr_url ||= Blacklight.default_index.connection.base_uri.to_s if defined? Blacklight

  # If neither, warn and try to use local Blacklight default solr core
  if solr_url.nil?
    @logger.warn 'SOLR_URL not set; using Blacklight default'
    solr_url = 'http://localhost:8983/solr/blacklight-core'
  end

  @solr = solr || RSolr.connect(client, url: solr_url)
end

Instance Attribute Details

#solrObject (readonly)

Returns the value of attribute solr.



11
12
13
# File 'lib/geo_combine/indexer.rb', line 11

def solr
  @solr
end

Instance Method Details

#index(docs) ⇒ Object

Index everything and return the number of docs successfully indexed



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/geo_combine/indexer.rb', line 31

def index(docs)
  # Track total indexed and time spent
  @logger.info "indexing into #{solr_url}"
  total_indexed = 0
  start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)

  # Index in batches; set batch size via BATCH_SIZE
  docs.each_slice(@batch_size) do |slice|
    batch = slice.map { |doc, path| [doc, path] }
    total_indexed += index_batch(batch)
  end

  # Issue a commit to make sure all documents are indexed
  @solr.commit
  end_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
  sec = end_time - start_time
  @logger.info format('indexed %<total_indexed>d documents in %<sec>.2f seconds', total_indexed:, sec:)
  total_indexed
end

#solr_urlObject

URL to the solr instance being used



52
53
54
# File 'lib/geo_combine/indexer.rb', line 52

def solr_url
  @solr.options[:url]
end