Class: DuckDB::TableFunction

Inherits:
Object
  • Object
show all
Defined in:
lib/duckdb/table_function.rb,
lib/duckdb/table_function/bind_info.rb,
lib/duckdb/table_function/init_info.rb,
lib/duckdb/table_function/function_info.rb,
ext/duckdb/table_function.c

Overview

The DuckDB::TableFunction encapsulates a DuckDB table function.

NOTE: DuckDB::TableFunction is experimental now.

require 'duckdb'

db = DuckDB::Database.new
conn = db.connect

# Low-level API:
tf = DuckDB::TableFunction.new
tf.name = 'my_function'
tf.add_parameter(DuckDB::LogicalType::BIGINT)

tf.bind do |bind_info|
  bind_info.add_result_column('value', DuckDB::LogicalType::BIGINT)
end

tf.execute do |func_info, output|
  # Fill output data...
  0  # Return 0 to signal done
end

conn.register_table_function(tf)

# High-level API (recommended):
tf = DuckDB::TableFunction.create(
  name: 'my_function',
  parameters: [DuckDB::LogicalType::BIGINT],
  columns: { 'value' => DuckDB::LogicalType::BIGINT }
) do |func_info, output|
  # Fill output data...
  0  # Return row count (0 when done)
end

Defined Under Namespace

Classes: BindInfo, FunctionInfo, InitInfo

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#DuckDB::TableFunction.newDuckDB::TableFunction

Creates a new table function.

tf = DuckDB::TableFunction.new
tf.name = "my_function"
# ... configure tf ...


87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'ext/duckdb/table_function.c', line 87

static VALUE duckdb_table_function_initialize(VALUE self) {
    rubyDuckDBTableFunction *ctx;

    TypedData_Get_Struct(self, rubyDuckDBTableFunction, &table_function_data_type, ctx);

    ctx->table_function = duckdb_create_table_function();
    if (!ctx->table_function) {
        rb_raise(eDuckDBError, "Failed to create table function");
    }

    ctx->bind_proc = Qnil;
    ctx->init_proc = Qnil;
    ctx->execute_proc = Qnil;

    // Set extra_info to the C struct pointer (safe with GC compaction)
    // Store ctx instead of self - ctx is xmalloc'd and won't move during GC
    duckdb_table_function_set_extra_info(ctx->table_function, ctx, NULL);

    return self;
}

Class Method Details

.add_table_adapter(klass, adapter) ⇒ void

This method returns an undefined value.

Registers a table adapter for a Ruby class.

The adapter is used by DuckDB::Connection#expose_as_table to convert instances of klass into a DuckDB table function. The adapter must respond to call(object, name, columns: nil) and return a DuckDB::TableFunction.

Implementing a Table Adapter

An adapter is any object that responds to call(object, name, columns: nil). The columns: keyword argument allows callers to override the column schema; the adapter should fall back to its own schema detection when it is nil.

The execute block passed to DuckDB::TableFunction.create must:

  • Write one batch of rows into output per call

  • Return the number of rows written as an Integer

  • Return 0 to signal that all data has been exhausted

Examples:

Minimal adapter for CSV objects

class CSVTableAdapter
  def call(csv, name, columns: nil)
    columns ||= infer_columns(csv)

    DuckDB::TableFunction.create(name:, columns:) do |_func_info, output|
      row = csv.readline
      if row
        row.each_with_index { |cell, i| output.set_value(i, 0, cell[1]) }
        1  # wrote one row
      else
        csv.rewind
        0  # signal end of data
      end
    end
  end

  private

  def infer_columns(csv)
    headers = csv.first.headers
    csv.rewind
    headers.each_with_object({}) { |h, hsh| hsh[h] = DuckDB::LogicalType::VARCHAR }
  end
end

# Register and use:
DuckDB::TableFunction.add_table_adapter(CSV, CSVTableAdapter.new)
con.execute('SET threads=1')
con.expose_as_table(csv, 'csv_table')
con.query('SELECT * FROM csv_table()').to_a

Parameters:

  • klass (Class)

    the Ruby class to register an adapter for (e.g. CSV)

  • adapter (#call)

    the adapter object



174
175
176
# File 'lib/duckdb/table_function.rb', line 174

def add_table_adapter(klass, adapter)
  @table_adapters[klass] = adapter
end

.create(name:, columns:, parameters: nil) {|func_info, output| ... } ⇒ TableFunction

Creates a new table function with a declarative API.

rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity

Examples:

Simple range function

tf = TableFunction.create(
  name: 'my_range',
  parameters: [LogicalType::BIGINT],
  columns: { 'value' => LogicalType::BIGINT }
) do |func_info, output|
  # Generate data...
  0  # Signal done
end

Function that returns data

tf = TableFunction.create(
  name: 'my_function',
  columns: { 'value' => LogicalType::BIGINT }
) do |func_info, output|
  vec = output.get_vector(0)
  # Fill vector...
  3  # Return row count
end

Parameters:

  • name (String)

    The name of the table function

  • parameters (Array<LogicalType>, Hash<String, LogicalType>) (defaults to: nil)

    Function parameters (optional)

  • columns (Hash<String, LogicalType>)

    Output columns (required)

Yields:

  • (func_info, output)

    The execute block that generates data

Yield Parameters:

  • func_info (FunctionInfo)

    Function execution context

  • output (DataChunk)

    Output data chunk to fill

Yield Returns:

  • (Integer)

    Number of rows generated (0 when done)

Returns:

Raises:

  • (ArgumentError)


79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/duckdb/table_function.rb', line 79

def create(name:, columns:, parameters: nil, &)
  raise ArgumentError, 'name is required' unless name
  raise ArgumentError, 'columns are required' unless columns
  raise ArgumentError, 'block is required' unless block_given?

  tf = new
  tf.name = name

  # Add parameters (positional or named)
  if parameters
    case parameters
    when Array
      parameters.each { |type| tf.add_parameter(type) }
    when Hash
      parameters.each { |param_name, type| tf.add_named_parameter(param_name, type) }
    else
      raise ArgumentError, 'parameters must be Array or Hash'
    end
  end

  # Set bind callback to add result columns
  tf.bind do |bind_info|
    columns.each do |col_name, col_type|
      bind_info.add_result_column(col_name, col_type)
    end
  end

  # Set init callback (required by DuckDB)
  tf.init do |_init_info|
    # No-op
  end

  # Set execute callback - user's block returns row count
  tf.execute do |func_info, output|
    size = yield(func_info, output)
    output.size = Integer(size)
  end

  tf
end

.table_adapter_for(klass) ⇒ #call?

Returns the table adapter registered for the given class, or nil if none.

Examples:

adapter = DuckDB::TableFunction.table_adapter_for(CSV)

Parameters:

  • klass (Class)

    the Ruby class to look up

Returns:

  • (#call, nil)

    the registered adapter, or nil if not found



186
187
188
# File 'lib/duckdb/table_function.rb', line 186

def table_adapter_for(klass)
  @table_adapters[klass]
end

Instance Method Details

#add_named_parameter(name, logical_type) ⇒ self

Adds a named parameter to the table function.

tf.add_named_parameter("limit", DuckDB::LogicalType::BIGINT)

Returns:

  • (self)


165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# File 'ext/duckdb/table_function.c', line 165

static VALUE rbduckdb_table_function_add_named_parameter(VALUE self, VALUE name, VALUE logical_type) {
    rubyDuckDBTableFunction *ctx;
    rubyDuckDBLogicalType *ctx_logical_type;
    const char *param_name;

    TypedData_Get_Struct(self, rubyDuckDBTableFunction, &table_function_data_type, ctx);

    if (!ctx->table_function) {
        rb_raise(eDuckDBError, "Table function is destroyed");
    }

    param_name = StringValueCStr(name);
    ctx_logical_type = get_struct_logical_type(logical_type);
    duckdb_table_function_add_named_parameter(ctx->table_function, param_name, ctx_logical_type->logical_type);

    return self;
}

#add_parameter(logical_type) ⇒ self

Adds a positional parameter to the table function.

tf.add_parameter(DuckDB::LogicalType::BIGINT)
tf.add_parameter(DuckDB::LogicalType::VARCHAR)

Returns:

  • (self)


141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# File 'ext/duckdb/table_function.c', line 141

static VALUE rbduckdb_table_function_add_parameter(VALUE self, VALUE logical_type) {
    rubyDuckDBTableFunction *ctx;
    rubyDuckDBLogicalType *ctx_logical_type;

    TypedData_Get_Struct(self, rubyDuckDBTableFunction, &table_function_data_type, ctx);

    if (!ctx->table_function) {
        rb_raise(eDuckDBError, "Table function is destroyed");
    }

    ctx_logical_type = get_struct_logical_type(logical_type);
    duckdb_table_function_add_parameter(ctx->table_function, ctx_logical_type->logical_type);

    return self;
}

#bind {|bind_info| ... } ⇒ self

Sets the bind callback for the table function. The callback is called when the function is used in a query.

table_function.bind do |bind_info|
  bind_info.add_result_column('id', DuckDB::LogicalType::BIGINT)
  bind_info.add_result_column('name', DuckDB::LogicalType::VARCHAR)
end

Yields:

  • (bind_info)

Returns:

  • (self)


195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# File 'ext/duckdb/table_function.c', line 195

static VALUE rbduckdb_table_function_set_bind(VALUE self) {
    rubyDuckDBTableFunction *ctx;

    if (!rb_block_given_p()) {
        rb_raise(rb_eArgError, "block is required");
    }

    TypedData_Get_Struct(self, rubyDuckDBTableFunction, &table_function_data_type, ctx);

    if (!ctx->table_function) {
        rb_raise(eDuckDBError, "Table function is destroyed");
    }

    ctx->bind_proc = rb_block_proc();

    duckdb_table_function_set_bind(ctx->table_function, table_function_bind_callback);

    return self;
}

#execute {|function_info, output| ... } ⇒ Object

Sets the execute callback for the table function. The callback is invoked during query execution to generate output rows.

table_function.execute do |func_info, output|
  output.size = 10
  vec = output.get_vector(0)
  # Write data...
end

Yields:

  • (function_info, output)


326
327
328
329
330
331
332
333
334
335
336
337
338
339
# File 'ext/duckdb/table_function.c', line 326

static VALUE rbduckdb_table_function_set_execute(VALUE self) {
    rubyDuckDBTableFunction *ctx;

    if (!rb_block_given_p()) {
        rb_raise(rb_eArgError, "block is required for execute");
    }

    TypedData_Get_Struct(self, rubyDuckDBTableFunction, &table_function_data_type, ctx);

    ctx->execute_proc = rb_block_proc();
    duckdb_table_function_set_function(ctx->table_function, table_function_execute_callback);

    return self;
}

#init {|init_info| ... } ⇒ Object

Sets the init callback for the table function. The callback is invoked once during query initialization to set up execution state.

table_function.init do |init_info|
  # Initialize execution state
end

Yields:

  • (init_info)


260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
# File 'ext/duckdb/table_function.c', line 260

static VALUE rbduckdb_table_function_set_init(VALUE self) {
    rubyDuckDBTableFunction *ctx;

    if (!rb_block_given_p()) {
        rb_raise(rb_eArgError, "block is required for init");
    }

    TypedData_Get_Struct(self, rubyDuckDBTableFunction, &table_function_data_type, ctx);

    if (!ctx->table_function) {
        rb_raise(eDuckDBError, "Table function is destroyed");
    }

    ctx->init_proc = rb_block_proc();
    duckdb_table_function_set_init(ctx->table_function, table_function_init_callback);

    return self;
}

#name=(name) ⇒ Object

Sets the name of the table function.

tf.name = "my_function"


116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'ext/duckdb/table_function.c', line 116

static VALUE rbduckdb_table_function_set_name(VALUE self, VALUE name) {
    rubyDuckDBTableFunction *ctx;
    const char *func_name;

    TypedData_Get_Struct(self, rubyDuckDBTableFunction, &table_function_data_type, ctx);

    if (!ctx->table_function) {
        rb_raise(eDuckDBError, "Table function is destroyed");
    }

    func_name = StringValueCStr(name);
    duckdb_table_function_set_name(ctx->table_function, func_name);

    return name;
}

#name=(name) ⇒ Object

Sets the name of the table function.

tf.name = "my_function"


116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'ext/duckdb/table_function.c', line 116

static VALUE rbduckdb_table_function_set_name(VALUE self, VALUE name) {
    rubyDuckDBTableFunction *ctx;
    const char *func_name;

    TypedData_Get_Struct(self, rubyDuckDBTableFunction, &table_function_data_type, ctx);

    if (!ctx->table_function) {
        rb_raise(eDuckDBError, "Table function is destroyed");
    }

    func_name = StringValueCStr(name);
    duckdb_table_function_set_name(ctx->table_function, func_name);

    return name;
}