Class: CDMBL::ETLWorker

Inherits:
Object
  • Object
show all
Includes:
Sidekiq::Worker
Defined in:
lib/cdmbl/etl_worker.rb

Overview

Extract records from OAI, delete records marked for deletion, sort the remaning records them into “big and small” record piles based upon how many compounds a record has, chunk the small records into batches and the big records individuall and then send these records to a transformation worker

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#batch_sizeObject (readonly)

Returns the value of attribute batch_size.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def batch_size
  @batch_size
end

#cdm_endpointObject (readonly)

Returns the value of attribute cdm_endpoint.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def cdm_endpoint
  @cdm_endpoint
end

#completed_callback_klassObject

Because Sidekiq serializes params to JSON, we provide custom setters for dependencies (normally these would be default params in the constructor) so that they may be mocked and tested



50
51
52
# File 'lib/cdmbl/etl_worker.rb', line 50

def completed_callback_klass
  @completed_callback_klass ||= CDMBL::CompletedCallback
end

#compound_filter_klassObject



58
59
60
# File 'lib/cdmbl/etl_worker.rb', line 58

def compound_filter_klass
  @compound_filter_klass ||= CompoundFilter
end

#configObject (readonly)

Returns the value of attribute config.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def config
  @config
end

#etl_worker_klassObject



54
55
56
# File 'lib/cdmbl/etl_worker.rb', line 54

def etl_worker_klass
  @etl_worker_klass ||= ETLWorker
end

#extract_compoundsObject (readonly)

Returns the value of attribute extract_compounds.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def extract_compounds
  @extract_compounds
end

#extractor_klassObject



62
63
64
# File 'lib/cdmbl/etl_worker.rb', line 62

def extractor_klass
  @extractor_klass ||= Extractor
end

#field_mappingsObject (readonly)

Returns the value of attribute field_mappings.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def field_mappings
  @field_mappings
end

#is_recursiveObject (readonly)

Returns the value of attribute is_recursive.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def is_recursive
  @is_recursive
end

#load_worker_klassObject



66
67
68
# File 'lib/cdmbl/etl_worker.rb', line 66

def load_worker_klass
  @load_worker_klass ||= LoadWorker
end

#max_compoundsObject (readonly)

Returns the value of attribute max_compounds.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def max_compounds
  @max_compounds
end

#oai_endpointObject (readonly)

Returns the value of attribute oai_endpoint.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def oai_endpoint
  @oai_endpoint
end

#resumption_tokenObject (readonly)

Returns the value of attribute resumption_token.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def resumption_token
  @resumption_token
end

#set_specObject (readonly)

Returns the value of attribute set_spec.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def set_spec
  @set_spec
end

#solr_configObject (readonly)

Returns the value of attribute solr_config.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def solr_config
  @solr_config
end

#transform_worker_klassObject



70
71
72
# File 'lib/cdmbl/etl_worker.rb', line 70

def transform_worker_klass
  @transform_worker_klass ||= TransformWorker
end

Instance Method Details

#next_batch!Object

Recurse through OAI batches one at a time



75
76
77
78
79
80
81
# File 'lib/cdmbl/etl_worker.rb', line 75

def next_batch!
  if next_resumption_token && is_recursive
    etl_worker_klass.perform_async(next_config)
  else
    completed_callback_klass.call!(solr_config)
  end
end

#perform(config) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/cdmbl/etl_worker.rb', line 28

def perform(config)
  # Sidekiq stores params in JSON, so we can't inject dependencies. This
  # results in the long set of arguments that follows. Otherwise, we'd
  # simply inject the OAI request and extractor objects
  @config            = config
  @solr_config       = config.fetch('solr_config').symbolize_keys
  @cdm_endpoint      = config.fetch('cdm_endpoint')
  @oai_endpoint      = config.fetch('oai_endpoint')
  @field_mappings    = config.fetch('field_mappings', false)
  @extract_compounds = config.fetch('extract_compounds', false)
  @resumption_token  = config.fetch('resumption_token', nil)
  @set_spec          = config.fetch('set_spec', nil)
  @max_compounds     = config.fetch('max_compounds', 10)
  @batch_size        = config.fetch('batch_size', 5).to_i
  @is_recursive      = config.fetch('is_recursive', true)
  extract_batch!
  next_batch!
end