Class: CDMBL::ETLWorker

Inherits:
Object
  • Object
show all
Includes:
Sidekiq::Worker
Defined in:
lib/cdmbl/etl_worker.rb

Overview

Extract records from OAI, delete records marked for deletion, sort the remaning records them into “big and small” record piles based upon how many compounds a record has, chunk the small records into batches and the big records individuall and then send these records to a transformation worker

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#batch_sizeObject (readonly)

Returns the value of attribute batch_size.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def batch_size
  @batch_size
end

#cdm_endpointObject (readonly)

Returns the value of attribute cdm_endpoint.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def cdm_endpoint
  @cdm_endpoint
end

#completed_callback_klassObject

Because Sidekiq serializes params to JSON, we provide custom setters for dependencies (normally these would be default params in the constructor) so that they may be mocked and tested



52
53
54
# File 'lib/cdmbl/etl_worker.rb', line 52

def completed_callback_klass
  @completed_callback_klass ||= CDMBL::CompletedCallback
end

#compound_filter_klassObject



60
61
62
# File 'lib/cdmbl/etl_worker.rb', line 60

def compound_filter_klass
  @compound_filter_klass ||= CompoundFilter
end

#configObject (readonly)

Returns the value of attribute config.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def config
  @config
end

#etl_worker_klassObject



56
57
58
# File 'lib/cdmbl/etl_worker.rb', line 56

def etl_worker_klass
  @etl_worker_klass ||= ETLWorker
end

#extract_compoundsObject (readonly)

Returns the value of attribute extract_compounds.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def extract_compounds
  @extract_compounds
end

#extractor_klassObject



64
65
66
# File 'lib/cdmbl/etl_worker.rb', line 64

def extractor_klass
  @extractor_klass ||= Extractor
end

#field_mappingsObject (readonly)

Returns the value of attribute field_mappings.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def field_mappings
  @field_mappings
end

#fromObject (readonly)

Returns the value of attribute from.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def from
  @from
end

#is_recursiveObject (readonly)

Returns the value of attribute is_recursive.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def is_recursive
  @is_recursive
end

#load_worker_klassObject



68
69
70
# File 'lib/cdmbl/etl_worker.rb', line 68

def load_worker_klass
  @load_worker_klass ||= LoadWorker
end

#max_compoundsObject (readonly)

Returns the value of attribute max_compounds.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def max_compounds
  @max_compounds
end

#oai_endpointObject (readonly)

Returns the value of attribute oai_endpoint.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def oai_endpoint
  @oai_endpoint
end

#resumption_tokenObject (readonly)

Returns the value of attribute resumption_token.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def resumption_token
  @resumption_token
end

#set_specObject (readonly)

Returns the value of attribute set_spec.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def set_spec
  @set_spec
end

#solr_configObject (readonly)

Returns the value of attribute solr_config.



9
10
11
# File 'lib/cdmbl/etl_worker.rb', line 9

def solr_config
  @solr_config
end

#transform_worker_klassObject



72
73
74
# File 'lib/cdmbl/etl_worker.rb', line 72

def transform_worker_klass
  @transform_worker_klass ||= TransformWorker
end

Instance Method Details

#next_batch!Object

Recurse through OAI batches one at a time



77
78
79
80
81
82
83
# File 'lib/cdmbl/etl_worker.rb', line 77

def next_batch!
  if next_resumption_token && is_recursive
    etl_worker_klass.perform_async(next_config)
  else
    completed_callback_klass.call!(solr_config)
  end
end

#perform(config) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/cdmbl/etl_worker.rb', line 29

def perform(config)
  # Sidekiq stores params in JSON, so we can't inject dependencies. This
  # results in the long set of arguments that follows. Otherwise, we'd
  # simply inject the OAI request and extractor objects
  @config            = config
  @solr_config       = config.fetch('solr_config').symbolize_keys
  @cdm_endpoint      = config.fetch('cdm_endpoint')
  @oai_endpoint      = config.fetch('oai_endpoint')
  @field_mappings    = config.fetch('field_mappings', false)
  @extract_compounds = config.fetch('extract_compounds', false)
  @resumption_token  = config.fetch('resumption_token', nil)
  @set_spec          = config.fetch('set_spec', nil)
  @max_compounds     = config.fetch('max_compounds', 10)
  @batch_size        = config.fetch('batch_size', 5).to_i
  @is_recursive      = config.fetch('is_recursive', true)
  @from              = config.fetch('from', nil)
  extract_batch!
  next_batch!
end