Class: BulkOps::Operation

Inherits:
ActiveRecord::Base
  • Object
show all
Includes:
Verification
Defined in:
lib/bulk_ops/operation.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Verification

#find_field_name, #get_file_paths, is_file_field?, #notify, #verify

Instance Attribute Details

#metadataObject

Returns the value of attribute metadata.



10
11
12
# File 'lib/bulk_ops/operation.rb', line 10

def 
  @metadata
end

#reference_identifierObject

Returns the value of attribute reference_identifier.



10
11
12
# File 'lib/bulk_ops/operation.rb', line 10

def reference_identifier
  @reference_identifier
end

#visibilityObject

Returns the value of attribute visibility.



10
11
12
# File 'lib/bulk_ops/operation.rb', line 10

def visibility
  @visibility
end

#work_typeObject

Returns the value of attribute work_type.



10
11
12
# File 'lib/bulk_ops/operation.rb', line 10

def work_type
  @work_type
end

Class Method Details

.default_metadata_fields(labels = true) ⇒ Object



326
327
328
329
330
331
332
333
334
# File 'lib/bulk_ops/operation.rb', line 326

def self.(labels = true)
  #returns full set of metadata parameters from ScoobySnacks to include in ingest template spreadsheet    
  field_names = []
  schema.all_fields.each do |field|
    field_names << field.name
    field_names << "#{field.name} Label" if labels && field.controlled?
  end
  return field_names
end

.schemaObject



41
42
43
# File 'lib/bulk_ops/operation.rb', line 41

def self.schema
  ScoobySnacks::METADATA_SCHEMA
end

.unique_name(name, user) ⇒ Object



14
15
16
17
18
19
20
21
22
23
# File 'lib/bulk_ops/operation.rb', line 14

def self.unique_name name, user
  while  BulkOps::Operation.find_by(name: name) || BulkOps::GithubAccess.list_branch_names(user).include?(name) do
    if ['-','_'].include?(name[-2]) && name[-1].to_i > 0
      name = name[0..-2]+(name[-1].to_i + 1).to_s
    else
      name = name + "_1"
    end
  end
  return name
end

Instance Method Details

#accumulated_errorsObject



221
222
223
224
# File 'lib/bulk_ops/operation.rb', line 221

def accumulated_errors
  proxy_errors + (@operation_errors || [])
  # TODO - make sure this captures all operation errors
end

#apply!Object



92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/bulk_ops/operation.rb', line 92

def apply!
  update({stage: "running", 
          status: "OK",
          message: "Bulk operation initiated by #{user.name || user.email}"})
  # We should now on the master branch. Make sure the correct spreadsheet version is loaded
  final_spreadsheet

  # In case this operation has run before, gather all work proxies that are completed and exclude them from the application
  complete_proxies = work_proxies.select{|proxy| proxy.status == "complete" && proxy.work_id.present?}
  incomplete_row_numbers = Array(0..@metadata.length-1) - complete_proxies.map(&:row_number)

  # Destroy all proxies corresponding to incomplete rows
  (work_proxies - complete_proxies).each{proxy| proxy.destroy!}

  # Create a new work proxy for incompplete row
  # All the proxies need to exist before parsing in order to correctly recognize relationships
  incomplete_row_numbers.each do |row_number|
    values = @metadata[row_number]
    next if values.to_s.gsub(',','').blank?
    next if BulkOps::Parser.is_file_set? @metadata, proxy.row_number
    work_proxies.create(status: "new",
                        last_event: DateTime.now,
                        work_type: work_type,
                        row_number: proxy.row_number,
                        visibility: options['visibility'],
                        message: "created during ingest initiated by #{user.name || user.email}")
 
  end
  # Reload the operation so that it can recognize its new proxies
  reload
  # Parse each spreadsheet row and create a background job for each proxy we just created
  incomplete_row_numberss.each do |row_number|
    values = @metadata[row_number]
    proxy = work_proxies.find_by(row_number: row_number)
    proxy.update(message: "interpreted at #{DateTime.now.strftime("%d/%m/%Y %H:%M")} " + proxy.message)
    data = BulkOps::Parser.new(proxy, @metadata,options).interpret_data(raw_row: values)
    next unless proxy.proxy_errors.blank?
    BulkOps::WorkJob.perform_later(proxy.work_type || "Work",
                                         user.email,
                                         data,
                                         proxy.id,
                                         proxy.visibility)
  end
  # If any errors have occurred, make sure they are logged in github and users are notified.
  report_errors!
end

#apply_update!(spreadsheet) ⇒ Object



177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# File 'lib/bulk_ops/operation.rb', line 177

def apply_update! spreadsheet

  # this array will keep track of any current proxies not included in the final spreadsheet
  abandoned_proxies = work_proxies.dup
  # Loop through the final spreadsheet
  final_spreadsheet.each_with_index do |values,row_number|     
    # Grab the work id
    work_id = false
    values.each{|field,val| work_id = val if ["id","workid","recordid"].include?(field.downcase.gsub(/-_\s/,''))}
    @operation_errors << BulkOps::Error.new(:no_work_id_field) unless work_id

    #proxy = BulkOps::WorkProxy.find_by(operation_id: id, work_id: values["work_id"])
    if (proxy = work_proxies.find_by(work_id: work_id))
      abandoned_proxies.delete(proxy)
      proxy.update(status: "updating",
                   row_number: row_number,
                   message: "update initiated by #{user.name || user.email}")
    else
      # Create a proxy for a work that is in the spreadsheet, but wasn't in the initial draft
      work_proxies.create(status: "queued",
                          last_event: DateTime.now,
                          row_number: row_number,
                          message: "created during update application, which was initiated by #{user.name || user.email}")
    end
  end

  # Loop through any proxies in the draft that were dropped from the spreadsheet
  abandoned_proxies.each do |dead_proxy|
    dead_proxy.lift_hold
    dead_proxy.destroy!
  end
  
  #loop through the work proxies to create a job for each work
  work_proxies.each do |proxy|
    data = BulkOps::Parser.new(proxy,final_spreadsheet).interpret_data(raw_row: final_spreadsheet[proxy.row_number])
    BulkOps::UpdateWorkJob.perform_later(proxy.work_type || "",
                                         user.email,
                                         data,
                                         proxy.id,
                                         proxy.visibility)
  end
  report_errors! 
end

#busy?Boolean

Returns:

  • (Boolean)


310
311
312
313
314
315
# File 'lib/bulk_ops/operation.rb', line 310

def busy?
  return true if work_proxies.any?{|prx| prx.status.downcase == "running"}
  return true if work_proxies.any?{|prx| prx.status.downcase == "queued"}
  return true if work_proxies.any?{|prx| prx.status.downcase == "starting"}
  return false
end

#check_if_finishedObject



139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/bulk_ops/operation.rb', line 139

def check_if_finished
  return unless stage == "running" && !busy?

  update(stage: "finishing")

  # Attempt to resolve each dangling (objectless) relationships
  relationships = work_proxies.reduce([]){|all_rels,proxy| all_rels + proxy.relationships.select{|rel| rel.status == "pending"}}
  relationships.each do |rel| 
    begin
      rel.resolve! 
    rescue StandardError => e
      @operation_errors << BulkOps::Error.new(:relationship_error, row_number: proxy.row_number, object_id: relationship.id, message: "#{e.class} - #{e.message}" )
    end
  end
  
  work_proxies.each do |proxy| 
    work = nil
    begin
      work = Work.find(proxy.work_id).save
    rescue StandardError => e
      @operation_errors << BulkOps::Error.new(:ingest_failure, row_number: proxy.row_number, object_id: proxy.id, message: "#{e.class} - #{e.message}")
    end
  end

  new_stage = accumulated_errors.blank? ? "complete" : "errors"
  update(stage: new_stage)
  report_errors!
  lift_holds
end

#complete?Boolean

Returns:

  • (Boolean)


306
307
308
# File 'lib/bulk_ops/operation.rb', line 306

def complete?
  return (stage == 'complete')
end

#create_branch(fields: nil, work_ids: nil, options: nil) ⇒ Object



242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
# File 'lib/bulk_ops/operation.rb', line 242

def create_branch(fields: nil, work_ids: nil, options: nil)
  git.create_branch!
  bulk_ops_dir = Gem::Specification.find_by_name("bulk_ops").gem_dir

  #copy template files
  Dir["#{bulk_ops_dir}/#{BulkOps::TEMPLATE_DIR}/*"].each do |file| 
    git.add_file file 
  end

  #update configuration options 
  unless options.blank?
    full_options = YAML.load_file(File.join(bulk_ops_dir,BulkOps::TEMPLATE_DIR, BulkOps::OPTIONS_FILENAME))

    options.each { |option, value| full_options[option] = value }

    full_options[name] = name
    full_options[status] = status

    git.update_options full_options
  end

  create_new_spreadsheet(fields: fields, work_ids: work_ids) 
end

#create_pull_request(message: false) ⇒ Object



231
232
233
234
235
# File 'lib/bulk_ops/operation.rb', line 231

def create_pull_request message: false
  return false unless (pull_num = git.create_pull_request(message: message))
  update(pull_id: pull_num)
  return pull_num
end

#delete_branchObject



317
318
319
# File 'lib/bulk_ops/operation.rb', line 317

def delete_branch
  git.delete_branch!
end

#destroyObject



321
322
323
324
# File 'lib/bulk_ops/operation.rb', line 321

def destroy
  git.delete_branch!
  super
end

#destroy_all_proxiesObject



84
85
86
87
88
89
90
# File 'lib/bulk_ops/operation.rb', line 84

def destroy_all_proxies
  work_proxies.each do |proxy| 
    proxy.destroy
  end
  update(stage: "waiting",
         status: "reverted changes")      
end

#destroy_all_worksObject



73
74
75
76
77
78
79
80
81
82
# File 'lib/bulk_ops/operation.rb', line 73

def destroy_all_works
  work_proxies.each do |proxy| 
    if BulkOps::SolrService.record_exists?(proxy.work_id)
      ActiveFedora::Base.find(work_id).destroy
    end
    proxy.update(status: "destroyed", message: "The work created by this proxy was destroyed by the user")
  end
  update(stage: "waiting",
         status: "reverted changes")
end

#destroy_all_works_and_proxiesObject



61
62
63
64
65
66
67
68
69
70
71
# File 'lib/bulk_ops/operation.rb', line 61

def destroy_all_works_and_proxies
   work_proxies.each do |proxy| 
     if BulkOps::SolrService.record_exists?(proxy.work_id)
       ActiveFedora::Base.find(work_id).destroy
     end
     proxy.destroy
   end
   update(stage: "waiting",
          status: "reverted changes")
  
end

#draft?Boolean

Returns:

  • (Boolean)


298
299
300
# File 'lib/bulk_ops/operation.rb', line 298

def draft?
  return (stage == 'draft')
end

#error_urlObject



341
342
343
# File 'lib/bulk_ops/operation.rb', line 341

def error_url
  "https://github.com/#{git.repo}/tree/#{git.name}/#{git.name}/errors"
end

#filename_prefixObject



345
346
347
# File 'lib/bulk_ops/operation.rb', line 345

def filename_prefix
  @filename_prefix ||= options['filename_prefix']
end

#final_spreadsheetObject



275
276
277
# File 'lib/bulk_ops/operation.rb', line 275

def final_spreadsheet
  @metadata ||= git. branch: "master"
end

#finalize_draft(fields: nil, work_ids: nil) ⇒ Object



237
238
239
240
# File 'lib/bulk_ops/operation.rb', line 237

def finalize_draft(fields: nil, work_ids: nil)
  create_new_spreadsheet(fields: fields, work_ids: work_ids)
  update(stage: "pending")
end

#get_spreadsheet(return_headers: false) ⇒ Object



266
267
268
269
# File 'lib/bulk_ops/operation.rb', line 266

def get_spreadsheet return_headers: false
  branch = ((running? || complete?) ? "master" : nil)
  git. return_headers: return_headers, branch: branch
end

#ignored_fieldsObject



336
337
338
# File 'lib/bulk_ops/operation.rb', line 336

def ignored_fields
  (options['ignored headers'] || []) + BulkOps::IGNORED_COLUMNS
end

#lift_holdsObject



169
170
171
# File 'lib/bulk_ops/operation.rb', line 169

def lift_holds
  work_proxies.each { |proxy| proxy.lift_hold}
end

#optionsObject



291
292
293
294
295
296
# File 'lib/bulk_ops/operation.rb', line 291

def options
  return {} if name.nil?
  return @options if @options
  branch = (running? || complete?) ? "master" : nil
  @options ||= git.load_options(branch: branch)
end

#place_holdsObject



173
174
175
# File 'lib/bulk_ops/operation.rb', line 173

def place_holds
  work_proxies.each { |proxy| proxy.place_hold}
end

#proxy_errorsObject



25
26
27
28
29
30
31
32
33
# File 'lib/bulk_ops/operation.rb', line 25

def proxy_errors
  work_proxies.reduce([]) do |errors, proxy| 
    if proxy.proxy_errors
      errors += proxy.proxy_errors
    elsif proxy.status == "job_error"
      errors += BulkOps::Error.new(type: :job_failure, object_id: proxy.work_id, message: proxy.message)
    end
  end
end

#proxy_statesObject



35
36
37
38
39
# File 'lib/bulk_ops/operation.rb', line 35

def proxy_states
  states = {}
  work_proxies.each{|proxy| (states[proxy.status] ||= []) << proxy }
  states
end

#report_errors!Object



226
227
228
229
# File 'lib/bulk_ops/operation.rb', line 226

def report_errors!
  error_file_name = BulkOps::Error.write_errors!(accumulated_errors, git)
  notify!(subject: "Errors initializing bulk operation in Hycruz", message: "Hycruz encountered some errors while it  was setting up your operation and preparing to begin. For most types of errors, the individual rows of the spreadsheet with errors will be ignored and the rest will proceed. Please consult the operation summary for real time information on the status of the operation. Details about these initialization errors can be seen on Github at the following url: https://github.com/#{git.repo}/blob/#{git.name}/#{git.name}/errors/#{error_file_name}") if error_file_name
end

#running?Boolean

Returns:

  • (Boolean)


302
303
304
# File 'lib/bulk_ops/operation.rb', line 302

def running?
  return (['running','finishing'].include?(stage))
end

#schemaObject



45
46
47
# File 'lib/bulk_ops/operation.rb', line 45

def schema
  self.class.schema
end

#set_stage(new_stage) ⇒ Object



57
58
59
# File 'lib/bulk_ops/operation.rb', line 57

def set_stage new_stage
  update(stage: new_stage)
end

#spreadsheet_countObject



271
272
273
# File 'lib/bulk_ops/operation.rb', line 271

def spreadsheet_count
  git.spreadsheet_count
end

#update_options(options, message = nil) ⇒ Object



283
284
285
# File 'lib/bulk_ops/operation.rb', line 283

def update_options options, message=nil
  git.update_options(options, message: message)
end

#update_spreadsheet(file, message: nil) ⇒ Object



279
280
281
# File 'lib/bulk_ops/operation.rb', line 279

def update_spreadsheet file, message: nil
  git.update_spreadsheet(file, message: message)
end