Class: GeoCombine::Migrators::V1AardvarkMigrator

Inherits:
Object
  • Object
show all
Defined in:
lib/geo_combine/migrators/v1_aardvark_migrator.rb

Overview

migrates the v1 schema to the aardvark schema

Constant Summary collapse

SCHEMA_FIELD_MAP =
{
  'dc_title_s' => 'dct_title_s', # new namespace
  'dc_description_s' => 'dct_description_sm', # new namespace; single to multi-valued
  'dc_language_s' => 'dct_language_sm', # new namespace; single to multi-valued
  'dc_language_sm' => 'dct_language_sm', # new namespace
  'dc_creator_sm' => 'dct_creator_sm', # new namespace
  'dc_publisher_s' => 'dct_publisher_sm', # new namespace; single to multi-valued
  'dct_provenance_s' => 'schema_provider_s', # new URI name
  'dc_subject_sm' => 'dct_subject_sm', # new namespace
  'solr_geom' => 'locn_geometry', # new URI name
  'solr_year_i' => 'gbl_indexYear_im', # new URI name; single to multi-valued
  'dc_source_sm' => 'dct_source_sm', # new namespace
  'dc_rights_s' => 'dct_accessRights_s', # new URI name
  'dc_format_s' => 'dct_format_s', # new namespace
  'layer_id_s' => 'gbl_wxsIdentifier_s', # new URI name
  'layer_slug_s' => 'id', # new URI name
  'dc_identifier_s' => 'dct_identifier_sm', # new namespace; single to multi-valued
  'layer_modified_dt' => 'gbl_mdModified_dt', # new URI name
  'geoblacklight_version' => 'gbl_mdVersion_s', # new URI name
  'suppressed_b' => 'gbl_suppressed_b' # new namespace
}.freeze
RESOURCE_CLASS_MAP =
{
  'Collection' => ['Collections'],
  'Dataset' => ['Datasets'],
  'Image' => ['Imagery'],
  'InteractiveResource' => ['Websites'],
  'Service' => ['Web services'],
  'StillImage' => ['Imagery']
}.freeze
RESOURCE_TYPE_MAP =
{
  'Point' => ['Point data'],
  'Line' => ['Line data'],
  'Polygon' => ['Polygon data'],
  'Raster' => ['Raster data'],
  'Table' => ['Table data']
}.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(v1_hash:, collection_id_map: {}) ⇒ V1AardvarkMigrator

Returns a new instance of V1AardvarkMigrator.

Parameters:

  • v1_hash (Hash)

    parsed json in the v1 schema

  • collection_id_map (Hash) (defaults to: {})

    a hash mapping collection names to ids for converting dct_isPartOf_sm



13
14
15
16
17
# File 'lib/geo_combine/migrators/v1_aardvark_migrator.rb', line 13

def initialize(v1_hash:, collection_id_map: {})
  @v1_hash = v1_hash
  @v2_hash = v1_hash
  @collection_id_map = collection_id_map
end

Instance Attribute Details

#v1_hashObject (readonly)

Returns the value of attribute v1_hash.



9
10
11
# File 'lib/geo_combine/migrators/v1_aardvark_migrator.rb', line 9

def v1_hash
  @v1_hash
end

Instance Method Details

#convert_keysObject

Namespace and URI changes to fields



35
36
37
38
39
# File 'lib/geo_combine/migrators/v1_aardvark_migrator.rb', line 35

def convert_keys
  @v2_hash.transform_keys! do |k|
    SCHEMA_FIELD_MAP[k] || k
  end
end

#convert_non_crosswalked_fieldsObject

Convert non-crosswalked fields via lookup tables rubocop:disable Metrics/PerceivedComplexity



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/geo_combine/migrators/v1_aardvark_migrator.rb', line 54

def convert_non_crosswalked_fields
  # Keys may or may not include whitespace, so we normalize them.
  # Resource class is required so we default to "Other"; resource type is not required.
  @v2_hash['gbl_resourceClass_sm'] = RESOURCE_CLASS_MAP[@v1_hash['dc_type_s']&.gsub(/\s+/, '')] || ['Other']
  resource_type = RESOURCE_TYPE_MAP[@v1_hash['layer_geom_type_s']&.gsub(/\s+/, '')]
  @v2_hash['gbl_resourceType_sm'] = resource_type unless resource_type.nil?

  # If locn_geometry is in the ENVELOPE format, also add it as dcat_bbox
  @v2_hash['dcat_bbox'] = @v2_hash['locn_geometry'] if @v2_hash['locn_geometry']&.match?(/ENVELOPE/)

  # If the user specified a collection id map, use it to convert the collection names to ids
  is_part_of = @v1_hash['dct_isPartOf_sm']&.map { |name| @collection_id_map[name] }&.compact
  if is_part_of.present?
    @v2_hash['dct_isPartOf_sm'] = is_part_of
  else
    @v2_hash.delete('dct_isPartOf_sm')
  end
end

#convert_single_to_multi_valued_fieldsObject

Fields that need to be converted from single to multi-valued



42
43
44
45
46
47
48
49
50
# File 'lib/geo_combine/migrators/v1_aardvark_migrator.rb', line 42

def convert_single_to_multi_valued_fields
  @v2_hash = @v2_hash.each_with_object({}) do |(k, v), h|
    h[k] = if !v.is_a?(Array) && k.match?(/.*_[s|i]m/)
             [v]
           else
             v
           end
  end
end

#remove_deprecated_fieldsObject

Remove fields that are no longer used



75
76
77
# File 'lib/geo_combine/migrators/v1_aardvark_migrator.rb', line 75

def remove_deprecated_fields
  @v2_hash = @v2_hash.except(*SCHEMA_FIELD_MAP.keys, 'dc_type_s', 'layer_geom_type_s')
end

#runObject



19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/geo_combine/migrators/v1_aardvark_migrator.rb', line 19

def run
  # Return unchanged if already in the aardvark schema
  return @v2_hash if @v2_hash['gbl_mdVersion_s'] == 'Aardvark'

  # Convert the record
  convert_keys
  convert_single_to_multi_valued_fields
  convert_non_crosswalked_fields
  remove_deprecated_fields

  # Mark the record as converted and return it
  @v2_hash['gbl_mdVersion_s'] = 'Aardvark'
  @v2_hash
end