Module: CSVHarvester

Includes:
BaseHarvester, FileProvider
Defined in:
lib/datafy/CSVHarvester.rb

Defined Under Namespace

Modules: CSVHarvesterClassMethods

Constant Summary

Constants included from BaseHarvester

BaseHarvester::BASE_HARVEST_DATA_FIELDS, BaseHarvester::DETAILS_PROPERTY_FIELDS

Instance Attribute Summary

Attributes included from FileProvider

#file_name

Class Method Summary collapse

Instance Method Summary collapse

Methods included from FileProvider

backupFile, #backupFile, dataDir, dataDir=, #dataTextFile, getBackupFileName, getDataDir, getDataFileName, getDir, getLogDir, localDir, localDir=, logDir, logDir=, #openLogFile, #removeFile

Methods included from BaseHarvester

#flattenTextLines, #now, #numify

Class Method Details

.included(base) ⇒ Object



11
12
13
# File 'lib/datafy/CSVHarvester.rb', line 11

def included(base)
  base.extend CSVHarvesterClassMethods
end

Instance Method Details

#csvRecordObject



352
353
354
355
356
357
358
359
360
361
# File 'lib/datafy/CSVHarvester.rb', line 352

def csvRecord
    logger.debug "#{self.class}::#{__method__}"
    rec = CSV::Row.new(  self.data_fields.keys, [] )
    self.data_fields.each do |fieldName,parts|
        value = send(parts[:property])
        logger.debug "fieldName: '#{fieldName}' => :#{parts[:property]} ==> #{value} nil?#{value.nil?}"
        rec[fieldName] = value
    end
    return rec
end

#data_fieldsObject



259
260
261
# File 'lib/datafy/CSVHarvester.rb', line 259

def data_fields
    self.class.data_fields
end

#detailsObject



378
379
380
# File 'lib/datafy/CSVHarvester.rb', line 378

def details
    @details ||= Hash.new { |k,v| k[v] = Set.new }
end

#details_fieldsObject



263
264
265
# File 'lib/datafy/CSVHarvester.rb', line 263

def details_fields
    self.class.details_fields
end

#detailsCSVRecordsObject

def printDetails



413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
# File 'lib/datafy/CSVHarvester.rb', line 413

def detailsCSVRecords
    logger.debug "#{self.class}::#{__method__}"
    @detailsCSVRecords = Set.new
    begin
        details.each do |detail,lines|
            @detail = detail
            field   = properties[detail]
            lines.each_with_index do |line,index|
                rec = CSV::Row.new( details_fields.keys, [] )
                # rec = CSV::Row.new( detailsDataFields.keys, [] )
                # DETAILS_DATA_FIELDS.each do |fieldName,parts|
                details_fields.each do |fieldName,parts|
                    prop = parts[:property]
                    resp = respond_to?(prop)
                    if resp
                        value = send(prop)
                        rec[fieldName] = value
                    end
                end
                rec['Class']  = self.class # klass
                rec['Detail'] = field
                rec['Index']  = index + 1
                rec['Line']   = line
                rec['Harvest When'] = @harvestWhen.nil? ? now : detail.eql?(:harvestWhen) ? now : harvestWhen
                rec['Persist When'] = @persistWhen.nil? ? now : persistWhen
                @detailsCSVRecords << rec
            end
        end
    rescue => e
        registerError e
    end
    return @detailsCSVRecords
end

#harvestObject



267
268
269
# File 'lib/datafy/CSVHarvester.rb', line 267

def harvest
    harvestNow
end

#harvestNowObject



271
272
273
274
# File 'lib/datafy/CSVHarvester.rb', line 271

def harvestNow
    record( property: :harvestWhen, value: now, append: false )
    return now
end

#logFileNameObject



255
256
257
# File 'lib/datafy/CSVHarvester.rb', line 255

def logFileName
    self.class.logFileName
end

#loggerObject

module CSVHarvesterClassMethods



250
251
252
253
# File 'lib/datafy/CSVHarvester.rb', line 250

def logger
    # puts "#{self.class}::#{__method__}"
    self.class.logger
end

#persist(*args, persist_when: nil, append: true, close_on_exit: true) ⇒ Object



504
505
506
507
508
509
510
# File 'lib/datafy/CSVHarvester.rb', line 504

def persist(*args, persist_when: nil, append: true, close_on_exit: true)
    logger.debug "#{self.class}::#{__method__} args:#{args}"
    record( property: :persistWhen, value: persist_when.nil? ? now : persist_when, append: append )
    persist_data
    persist_details
    self.class.closeCSVFiles if close_on_exit
end

#persist_dataObject



475
476
477
478
479
480
481
482
483
484
485
486
487
488
# File 'lib/datafy/CSVHarvester.rb', line 475

def persist_data
  logger.debug "#{self.class}::#{__method__}"
  begin
      # @persistWhen = now
      persistWhen
      file = self.class.csv_files[:data]
      $fields_file = file
      unless file.nil?
          file << csvRecord
      end
  rescue => e
      logger.error "ERROR persisting CSV fields aka csvRecord:'#{e.message}'"
  end
end

#persist_detailsObject



490
491
492
493
494
495
496
497
498
499
500
501
502
# File 'lib/datafy/CSVHarvester.rb', line 490

def persist_details
  logger.debug "#{self.class}::#{__method__}"
  begin
      file = self.class.csv_files[:details]
      $details_file = file
      detailsCSVRecords.each do |rec|
          file << rec
      end
      return
  rescue => e
      logger.error "ERROR persisting CSV data in details form:'#{e.message}'"
  end
end

#persistWhenObject



276
277
278
279
280
# File 'lib/datafy/CSVHarvester.rb', line 276

def persistWhen
    # persist_when = self.class.persistWhen
    record( property: :persistWhen, value: now, append: false )
    return now
end

#printCSVRecordObject



363
364
365
366
367
368
369
370
371
372
373
374
375
376
# File 'lib/datafy/CSVHarvester.rb', line 363

def printCSVRecord
    logger.debug "#{self.class}::#{__method__}"
    rec = csvRecord
    maxLen = rec.headers.max_by(&:length).length
    puts "CSV Record"
    puts '=' * (maxLen)
    unless rec.nil? || rec.empty?
        rec.each do |f,v|
            # logger.info "  %-#{maxLen}s -> %s " % [f,v]
            puts "%-#{maxLen}s : %s " % [f,v]
        end
        puts '-' * maxLen
    end
end

#printDataFieldsObject



282
283
284
# File 'lib/datafy/CSVHarvester.rb', line 282

def printDataFields
    self.class.printFields :data
end

#printDataFiles(show_fields = true) ⇒ Object



447
448
449
450
# File 'lib/datafy/CSVHarvester.rb', line 447

def printDataFiles show_fields=true
    logger.debug "#{self.class}::#{__method__} show_fields:#{show_fields}"
    self.class.printDataFiles show_fields
end

#printDetailsObject



394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
# File 'lib/datafy/CSVHarvester.rb', line 394

def printDetails
  logger.debug "#{self.class}::#{__method__}"
  if details.empty?
      msg = " - no details to print i.e. @details is nil or empty"
      puts msg
      logger.info msg
  else
      # maxKeyLen = properties.values.max_by(&:length).length
      details.each do |type,lines|
          # log true, type
          # puts "%-#{maxKeyLen}s :: [%s]" % [properties[type],type]
          puts "%s [%s]" % [properties[type],type]
          lines.each do |line|
              puts "  - '#{line}'"
          end
      end
  end
end

#printDetailsCSVRecordsObject



452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
# File 'lib/datafy/CSVHarvester.rb', line 452

def printDetailsCSVRecords
    logger.debug "#{self.class}::#{__method__}"
    begin
        puts "Details Records"
        recs   = detailsCSVRecords
        if recs.nil? || recs.empty?
            # log true, "No Details records exist to print."
            puts " - no details records exist to print"
        else
            maxLen = recs.first.headers.max_by(&:length).length
            puts '=' * maxLen
            recs.each do |r|
                r.entries.each do |e,v|
                    puts "%-#{maxLen}s : %s" % [e,v]
                end
                puts '-' * maxLen
            end
        end
    rescue => e
        registerError e
    end
end

#printDetailsFieldsObject



301
302
303
# File 'lib/datafy/CSVHarvester.rb', line 301

def printDetailsFields
    self.class.printFields :details
end

#printFields(*args) ⇒ Object



286
287
288
289
# File 'lib/datafy/CSVHarvester.rb', line 286

def printFields *args
    # puts "#{self}::#{__method__} args: -i- #{args}"
    self.class.printFields *args
end

#printPropertiesObject



296
297
298
299
# File 'lib/datafy/CSVHarvester.rb', line 296

def printProperties
    logger.debug "#{self.class}::#{__method__} [i]"
    self.class.printProperties
end

#propertiesObject



291
292
293
294
# File 'lib/datafy/CSVHarvester.rb', line 291

def properties
    logger.debug "#{self.class}::#{__method__} [i]"
    self.class.properties
end

#record(property:, value:, append: true) ⇒ Object



305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
# File 'lib/datafy/CSVHarvester.rb', line 305

def record(property:, value:, append: true)
    logger.debug "#{self.class}::#{__method__}"
    begin
        if ''.eql?(value) || value.nil?
            logger.debug "  !!! NOT setting nil or empty property"
            return
        end
        prop_name   = property.is_a?(String) ? property : property.to_s
        prop_name.sub!(/[=]+$/,'')
        curr_val    = send(prop_name)
        assign_prop = prop_name.concat('=')
        assign_val  = if append && !curr_val.nil? && !value.eql?(curr_val)
                          "#{curr_val} | #{value}"
                      else
                          value
                      end
        send(assign_prop, assign_val)
        register_detail(property: property, value: value)
    rescue => e
        puts "#{self.class}::#{__method__} ERROR:#{e}"
    end
end

#register_detail(property:, value:) ⇒ Object

Use this method to only add a Details line, it does not set the property



383
384
385
386
387
388
389
390
391
392
# File 'lib/datafy/CSVHarvester.rb', line 383

def register_detail(property:, value:)
    logger.debug "#{self.class}::#{__method__} prop:'#{property.inspect}' value:'#{value}'"
    return if value.nil? || ''.eql?(value)
    detail = properties.has_key?([property]) ? properties[property] : property
    deets  = details
    logger.debug " - details has new? #{deets[detail].include?(value)}"
    unless (deets.has_key?(detail) && deets[detail].include?(value))
        deets[detail] << value.to_s.strip # .to_s handles cases of non-String value
    end
end

#registerError(error) ⇒ Object



345
346
347
348
349
350
# File 'lib/datafy/CSVHarvester.rb', line 345

def registerError error
    logger.debug "#{self.class}::#{__method__}"
    record( property: :harvestSuccessful, value: false )
    record( property: :harvestMessage,    value: error.message[0..150].gsub("\n", ' | ') )
    @errorMessage = error.full_message.to_s # @errorMessage declared in BaseHarvester, useful for debugging
end

#resetDataFieldsObject



328
329
330
331
332
333
334
335
# File 'lib/datafy/CSVHarvester.rb', line 328

def resetDataFields
    data_fields.each do |fieldName,parts|
        methodName = "#{parts[:property].to_s}="
        methodName.gsub!(/[=]+$/,'=')
        send(methodName,nil)
    end
    @errorMessage = nil
end

#resetDetailsObject



337
338
339
340
341
342
343
# File 'lib/datafy/CSVHarvester.rb', line 337

def resetDetails
    logger.debug "#{self.class}::#{__method__}"
    @details = Hash.new { |k,v| k[v] = Set.new }
    @detailsCSVRecords = nil
    logger.debug "Details : #{@details.inspect}"
    logger.debug "CSV recs: #{@detailsCSVRecords.inspect}"
end

#to_sObject



512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
# File 'lib/datafy/CSVHarvester.rb', line 512

def to_s
    str = "#{self.class}:: "
    consts = self.class.constants.grep(/KEY/)
    consts.each do |c|
        fields = self.class.const_get(c)
        fields.each do |name,prop|
            str+= " -#{name}-|#{send(prop[:property])}|"
        end
        # end
        # key       = key_field.first
        # val       = send(key_field.last[:property])
        # str = "#{self.class}: '#{key}':'#{val}'"
    end
    return str
end