Class: SearchSolrTools::Harvesters::NsidcJson

Inherits:
Base
  • Object
show all
Defined in:
lib/search_solr_tools/harvesters/nsidc_json.rb

Overview

Harvests data from NSIDC OAI and inserts it into Solr after it has been translated

Constant Summary

Constants inherited from Base

Base::DELETE_DOCUMENTS_RATIO, Base::JSON_CONTENT_TYPE, Base::XML_CONTENT_TYPE

Constants included from SSTLogger

SSTLogger::LOG_LEVELS

Instance Attribute Summary

Attributes inherited from Base

#environment

Instance Method Summary collapse

Methods inherited from Base

#create_new_solr_add_doc, #create_new_solr_add_doc_with_child, #delete_old_documents, #doc_valid?, #encode_data_provider_url, #get_results, #get_serialized_doc, #insert_solr_doc, #insert_solr_docs, #ping_solr, #remove_documents, #sanitize_data_centers_constraints, #solr_url, #valid_solr_spatial_coverage?

Methods included from SSTLogger

#logger, logger

Constructor Details

#initialize(env = 'development', die_on_failure: false) ⇒ NsidcJson

Returns a new instance of NsidcJson.



12
13
14
15
16
# File 'lib/search_solr_tools/harvesters/nsidc_json.rb', line 12

def initialize(env = 'development', die_on_failure: false)
  super
  @translator = Translators::NsidcJsonToSolr.new
  Helpers::FacetConfiguration.import_bin_configuration(env)
end

Instance Method Details

#docs_with_translated_entries_from_nsidcObject



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/search_solr_tools/harvesters/nsidc_json.rb', line 74

def docs_with_translated_entries_from_nsidc
  docs = []
  failure_ids = []

  all_docs = result_ids_from_nsidc
  all_docs.each do |r|
    # Each result looks like:
    # oai:nsidc.org/AE_L2A
    id = r.text.split('/').last
    begin
      docs << { 'add' => { 'doc' => @translator.translate(fetch_json_from_nsidc(id)) } }
    rescue StandardError => e
      logger.error "Failed to fetch #{id} with error #{e}: #{e.backtrace}"
      failure_ids << id
    end
  end

  { num_docs: all_docs.size, add_docs: docs, failure_ids: }
end

#fetch_json_from_nsidc(id) ⇒ Hash

Fetch a JSON representation of a dataset’s metadata

Parameters:

  • id (String)

    NSIDC authoritative ID for the dataset

Returns:

  • (Hash)

    Parsed version of the JSON response



69
70
71
72
# File 'lib/search_solr_tools/harvesters/nsidc_json.rb', line 69

def fetch_json_from_nsidc(id)
  json_response = RestClient.get("#{nsidc_json_url}#{id}.json")
  JSON.parse(json_response)
end

#harvest_and_deleteObject



29
30
31
32
# File 'lib/search_solr_tools/harvesters/nsidc_json.rb', line 29

def harvest_and_delete
  logger.info "Running harvest of NSIDC catalog from #{nsidc_json_url}"
  super(method(:harvest_nsidc_json_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name]}\"")
end

#harvest_nsidc_json_into_solrObject

get translated entries from NSIDC OAI and add them to Solr this is the main entry point for the class



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/search_solr_tools/harvesters/nsidc_json.rb', line 36

def harvest_nsidc_json_into_solr
  result = docs_with_translated_entries_from_nsidc

  status = insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE

  status.record_status(Helpers::HarvestStatus::HARVEST_NO_DOCS) if (result[:num_docs]).zero?

  # Record the number of harvest failures; note that if this is 0, that's OK, the status will stay at 0
  status.record_status(Helpers::HarvestStatus::HARVEST_FAILURE, result[:failure_ids].length)

  raise Errors::HarvestError, status unless status.ok?
rescue Errors::HarvestError => e
  raise e
rescue StandardError => e
  logger.error "An unexpected exception occurred while trying to harvest or insert: #{e}"
  logger.error e.backtrace
  status = Helpers::HarvestStatus.new(Helpers::HarvestStatus::OTHER_ERROR => e)
  raise Errors::HarvestError, status
end

#nsidc_json_urlObject



56
57
58
# File 'lib/search_solr_tools/harvesters/nsidc_json.rb', line 56

def nsidc_json_url
  SolrEnvironments[@environment][:nsidc_dataset_metadata_url]
end

#ping_sourceObject



18
19
20
21
22
23
24
25
26
27
# File 'lib/search_solr_tools/harvesters/nsidc_json.rb', line 18

def ping_source
  begin
    RestClient.options(nsidc_json_url) do |response, _request, _result|
      return response.code == 200
    end
  rescue StandardError
    logger.error "Error trying to get options for #{nsidc_json_url} (ping)"
  end
  false
end

#result_ids_from_nsidcObject



60
61
62
63
64
# File 'lib/search_solr_tools/harvesters/nsidc_json.rb', line 60

def result_ids_from_nsidc
  url = SolrEnvironments[@environment][:nsidc_dataset_metadata_url] +
        SolrEnvironments[@environment][:nsidc_oai_identifiers_url]
  get_results(url, '//xmlns:identifier') || []
end