Class: Fluent::HekkRedshiftOutput

Inherits:
BufferedOutput
  • Object
show all
Defined in:
lib/fluent/plugin/out_hekk_redshift.rb

Constant Summary collapse

IGNORE_REDSHIFT_ERROR_REGEXP =

ignore load table error. (invalid data format)

/^ERROR:  Load into table '[^']+' failed\./

Instance Method Summary collapse

Constructor Details

#initializeHekkRedshiftOutput

Returns a new instance of HekkRedshiftOutput.



9
10
11
12
13
14
15
16
17
18
# File 'lib/fluent/plugin/out_hekk_redshift.rb', line 9

def initialize
  super
  require 'aws-sdk'
  require 'zlib'
  require 'time'
  require 'tempfile'
  require 'pg'
  require 'json'
  require 'csv'
end

Instance Method Details

#configure(conf) ⇒ Object



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/fluent/plugin/out_hekk_redshift.rb', line 47

def configure(conf)
  super
  @path = "#{@path}/" unless @path.end_with?('/') # append last slash
  @path = @path[1..-1] if @path.start_with?('/')  # remove head slash
  @utc = true if conf['utc']
  @db_conf = {
    host:@redshift_host,
    port:@redshift_port,
    dbname:@redshift_dbname,
    user:@redshift_user,
    password:@redshift_password
  }
  @delimiter = determine_delimiter(@file_type) if @delimiter.nil? or @delimiter.empty?
  $log.debug format_log("redshift file_type:#{@file_type} delimiter:'#{@delimiter}'")
  @copy_sql_template = "copy #{table_name_with_schema} from '%s' CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=%s' delimiter '#{@delimiter}' GZIP ESCAPE #{@redshift_copy_base_options} #{@redshift_copy_options};"
  @insert_sql_template = "insert into #{@redshift_copy_command_tablename}(#{@redshift_copy_command_columnname}) values('%s');"
end

#format(tag, time, record) ⇒ Object



77
78
79
80
81
82
83
84
85
# File 'lib/fluent/plugin/out_hekk_redshift.rb', line 77

def format(tag, time, record)
  if json?
    record.to_msgpack
  elsif msgpack?
    { @record_log_tag => record }.to_msgpack
  else
    "#{record[@record_log_tag]}\n"
  end
end

#startObject



65
66
67
68
69
70
71
72
73
74
75
# File 'lib/fluent/plugin/out_hekk_redshift.rb', line 65

def start
  super
  # init s3 conf
  options = {
    :access_key_id     => @aws_key_id,
    :secret_access_key => @aws_sec_key
  }
  options[:s3_endpoint] = @s3_endpoint if @s3_endpoint
  @s3 = AWS::S3.new(options)
  @bucket = @s3.buckets[@s3_bucket]
end

#write(chunk) ⇒ Object



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/fluent/plugin/out_hekk_redshift.rb', line 87

def write(chunk)
  $log.debug format_log('start creating gz.')

  # create a gz file
  tmp = Tempfile.new('s3-')
  tmp =
    if json? || msgpack?
      create_gz_file_from_structured_data(tmp, chunk, @delimiter)
    else
      create_gz_file_from_flat_data(tmp, chunk)
    end

  # no data -> skip
  unless tmp
    $log.debug format_log('received no valid data. ')
    return false # for debug
  end

  # create a file path with time format
  s3path = create_s3path(@bucket, @path)

  # upload gz to s3
  @bucket.objects[s3path].write(Pathname.new(tmp.path),
                                :acl => :bucket_owner_full_control)

  # close temp file
  tmp.close!

  # copy gz on s3 to redshift
  s3_uri = "s3://#{@s3_bucket}/#{s3path}"
  copy_sql = @copy_sql_template % [s3_uri, @aws_sec_key]
  sql = @insert_sql_template % [copy_sql.gsub(/'/, "\\\\'")]
  insert_sql_to_redshift(sql)
  true # for debug
end