Class: SearchSolrTools::Harvesters::NsidcJson
- Defined in:
- lib/search_solr_tools/harvesters/nsidc_json.rb
Overview
Harvests data from NSIDC OAI and inserts it into Solr after it has been translated
Constant Summary
Constants inherited from Base
Base::DELETE_DOCUMENTS_RATIO, Base::JSON_CONTENT_TYPE, Base::XML_CONTENT_TYPE
Constants included from SSTLogger
Instance Attribute Summary
Attributes inherited from Base
Instance Method Summary collapse
- #docs_with_translated_entries_from_nsidc ⇒ Object
-
#fetch_json_from_nsidc(id) ⇒ Hash
Fetch a JSON representation of a dataset’s metadata.
- #harvest_and_delete ⇒ Object
-
#harvest_nsidc_json_into_solr ⇒ Object
get translated entries from NSIDC OAI and add them to Solr this is the main entry point for the class.
-
#initialize(env = 'development', die_on_failure: false) ⇒ NsidcJson
constructor
A new instance of NsidcJson.
- #nsidc_json_url ⇒ Object
- #ping_source ⇒ Object
- #result_ids_from_nsidc ⇒ Object
Methods inherited from Base
#create_new_solr_add_doc, #create_new_solr_add_doc_with_child, #delete_old_documents, #doc_valid?, #encode_data_provider_url, #get_results, #get_serialized_doc, #insert_solr_doc, #insert_solr_docs, #ping_solr, #remove_documents, #sanitize_data_centers_constraints, #solr_url, #valid_solr_spatial_coverage?
Methods included from SSTLogger
Constructor Details
#initialize(env = 'development', die_on_failure: false) ⇒ NsidcJson
Returns a new instance of NsidcJson.
12 13 14 15 16 |
# File 'lib/search_solr_tools/harvesters/nsidc_json.rb', line 12 def initialize(env = 'development', die_on_failure: false) super @translator = Translators::NsidcJsonToSolr.new Helpers::FacetConfiguration.import_bin_configuration(env) end |
Instance Method Details
#docs_with_translated_entries_from_nsidc ⇒ Object
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# File 'lib/search_solr_tools/harvesters/nsidc_json.rb', line 74 def docs_with_translated_entries_from_nsidc docs = [] failure_ids = [] all_docs = result_ids_from_nsidc all_docs.each do |r| # Each result looks like: # oai:nsidc.org/AE_L2A id = r.text.split('/').last begin docs << { 'add' => { 'doc' => @translator.translate(fetch_json_from_nsidc(id)) } } rescue StandardError => e logger.error "Failed to fetch #{id} with error #{e}: #{e.backtrace}" failure_ids << id end end { num_docs: all_docs.size, add_docs: docs, failure_ids: } end |
#fetch_json_from_nsidc(id) ⇒ Hash
Fetch a JSON representation of a dataset’s metadata
69 70 71 72 |
# File 'lib/search_solr_tools/harvesters/nsidc_json.rb', line 69 def fetch_json_from_nsidc(id) json_response = RestClient.get("#{nsidc_json_url}#{id}.json") JSON.parse(json_response) end |
#harvest_and_delete ⇒ Object
29 30 31 32 |
# File 'lib/search_solr_tools/harvesters/nsidc_json.rb', line 29 def harvest_and_delete logger.info "Running harvest of NSIDC catalog from #{nsidc_json_url}" super(method(:harvest_nsidc_json_into_solr), "data_centers:\"#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name]}\"") end |
#harvest_nsidc_json_into_solr ⇒ Object
get translated entries from NSIDC OAI and add them to Solr this is the main entry point for the class
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/search_solr_tools/harvesters/nsidc_json.rb', line 36 def harvest_nsidc_json_into_solr result = docs_with_translated_entries_from_nsidc status = insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE status.record_status(Helpers::HarvestStatus::HARVEST_NO_DOCS) if (result[:num_docs]).zero? # Record the number of harvest failures; note that if this is 0, that's OK, the status will stay at 0 status.record_status(Helpers::HarvestStatus::HARVEST_FAILURE, result[:failure_ids].length) raise Errors::HarvestError, status unless status.ok? rescue Errors::HarvestError => e raise e rescue StandardError => e logger.error "An unexpected exception occurred while trying to harvest or insert: #{e}" logger.error e.backtrace status = Helpers::HarvestStatus.new(Helpers::HarvestStatus::OTHER_ERROR => e) raise Errors::HarvestError, status end |
#nsidc_json_url ⇒ Object
56 57 58 |
# File 'lib/search_solr_tools/harvesters/nsidc_json.rb', line 56 def nsidc_json_url SolrEnvironments[@environment][:nsidc_dataset_metadata_url] end |
#ping_source ⇒ Object
18 19 20 21 22 23 24 25 26 27 |
# File 'lib/search_solr_tools/harvesters/nsidc_json.rb', line 18 def ping_source begin RestClient.(nsidc_json_url) do |response, _request, _result| return response.code == 200 end rescue StandardError logger.error "Error trying to get options for #{nsidc_json_url} (ping)" end false end |
#result_ids_from_nsidc ⇒ Object
60 61 62 63 64 |
# File 'lib/search_solr_tools/harvesters/nsidc_json.rb', line 60 def result_ids_from_nsidc url = SolrEnvironments[@environment][:nsidc_dataset_metadata_url] + SolrEnvironments[@environment][:nsidc_oai_identifiers_url] get_results(url, '//xmlns:identifier') || [] end |