Module: RedshiftSanitizer

Extended by:
RedshiftSanitizer
Included in:
RedshiftSanitizer
Defined in:
lib/redshift_sanitizer.rb,
lib/redshift_sanitizer/version.rb,
lib/redshift_sanitizer/configuration.rb

Defined Under Namespace

Classes: Configuration, Error

Constant Summary collapse

VERSION =
"0.1.3"

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#configurationObject

Returns the value of attribute configuration.



6
7
8
# File 'lib/redshift_sanitizer.rb', line 6

def configuration
  @configuration
end

Instance Method Details

#clean(text, **options) ⇒ Object



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/redshift_sanitizer.rb', line 18

def clean(text, **options)
  # Ignore non string value
  return text unless text.is_a? String

  # Limit the length if needed, value should be the redshift VARCHAR limit
  if options[:limit]
    text = text.byteslice(0, options[:limit])
  end

  # Remove invalid UTF-8 character
  text = text.encode('UTF-8', invalid: :replace, undef: :replace, replace: configuration.replace)

  # Remove delimeter
  text = text.gsub(configuration.delimeter, configuration.replace)

  # Remove end_of_field (eof)
  text = text.gsub(configuration.eof, configuration.replace)

  # Remove nulls
  text = text.gsub("\u0000", configuration.replace)

  # No surrounding quote
  text = text.gsub(/^(\"|\')+/, '')
             .gsub(/(\"|\')+$/, '')

  text
end

#configure {|configuration| ... } ⇒ Object

Yields:



10
11
12
# File 'lib/redshift_sanitizer.rb', line 10

def configure
  yield(configuration)
end