Class: XGBoost::DMatrix

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/xgboost/dmatrix.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data, label: nil, weight: nil, missing: Float::NAN) ⇒ DMatrix

Returns a new instance of DMatrix.



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/xgboost/dmatrix.rb', line 7

def initialize(data, label: nil, weight: nil, missing: Float::NAN)
  if data.is_a?(::FFI::AutoPointer)
    @handle = data
    return
  end

  if matrix?(data)
    nrow = data.row_count
    ncol = data.column_count
    flat_data = data.to_a.flatten
  elsif daru?(data)
    nrow, ncol = data.shape
    flat_data = data.map_rows(&:to_a).flatten
    feature_names = data.each_vector.map(&:name)
    feature_types =
      data.each_vector.map(&:db_type).map do |v|
        case v
        when "INTEGER"
          "int"
        when "DOUBLE"
          "float"
        else
          raise Error, "Unknown feature type: #{v}"
        end
      end
  elsif numo?(data)
    nrow, ncol = data.shape
  elsif rover?(data)
    nrow, ncol = data.shape
    feature_names = data.keys
    data = data.to_numo
  else
    nrow = data.count
    ncol = data.first.count
    if !data.all? { |r| r.size == ncol }
      raise ArgumentError, "Rows have different sizes"
    end
    flat_data = data.flatten
  end

  c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
  if numo?(data)
    c_data.write_bytes(data.cast_to(Numo::SFloat).to_string)
  else
    handle_missing(flat_data, missing)
    c_data.write_array_of_float(flat_data)
  end

  out = ::FFI::MemoryPointer.new(:pointer)
  check_call FFI.XGDMatrixCreateFromMat(c_data, nrow, ncol, missing, out)
  @handle = ::FFI::AutoPointer.new(out.read_pointer, FFI.method(:XGDMatrixFree))

  self.feature_names = feature_names || ncol.times.map { |i| "f#{i}" }
  self.feature_types = feature_types if feature_types

  self.label = label if label
  self.weight = weight if weight
end

Instance Attribute Details

#handleObject (readonly)

Returns the value of attribute handle.



5
6
7
# File 'lib/xgboost/dmatrix.rb', line 5

def handle
  @handle
end

Instance Method Details

#data_split_modeObject



110
111
112
113
114
# File 'lib/xgboost/dmatrix.rb', line 110

def data_split_mode
  out = ::FFI::MemoryPointer.new(:uint64)
  check_call FFI.XGDMatrixDataSplitMode(handle, out)
  out.read_uint64 == 0 ? :row : :col
end

#feature_namesObject



126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/xgboost/dmatrix.rb', line 126

def feature_names
  length = ::FFI::MemoryPointer.new(:uint64)
  sarr = ::FFI::MemoryPointer.new(:pointer)
  check_call(
    FFI.XGDMatrixGetStrFeatureInfo(
      handle,
      "feature_name",
      length,
      sarr
    )
  )
  feature_names = from_cstr_to_rbstr(sarr, length)
  feature_names.empty? ? nil : feature_names
end

#feature_names=(feature_names) ⇒ Object



141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# File 'lib/xgboost/dmatrix.rb', line 141

def feature_names=(feature_names)
  if feature_names.nil?
    check_call(
      FFI.XGDMatrixSetStrFeatureInfo(
        handle, "feature_name", nil, 0
      )
    )
    return
  end

  # validate feature name
  feature_names =
    validate_feature_info(
      feature_names,
      num_col,
      data_split_mode == :col,
      "feature names"
    )
  if feature_names.length != feature_names.uniq.length
    raise ArgumentError, "feature_names must be unique"
  end

  # prohibit the use symbols that may affect parsing. e.g. []<
  if !feature_names.all? { |f| f.is_a?(String) && !["[", "]", "<"].any? { |x| f.include?(x) } }
    raise ArgumentError, "feature_names must be string, and may not contain [, ] or <"
  end

  c_feature_names = array_of_pointers(feature_names.map { |f| string_pointer(f) })
  check_call(
    FFI.XGDMatrixSetStrFeatureInfo(
      handle,
      "feature_name",
      c_feature_names,
      feature_names.length
    )
  )
end

#feature_typesObject



179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/xgboost/dmatrix.rb', line 179

def feature_types
  length = ::FFI::MemoryPointer.new(:uint64)
  sarr = ::FFI::MemoryPointer.new(:pointer)
  check_call(
    FFI.XGDMatrixGetStrFeatureInfo(
      handle,
      "feature_type",
      length,
      sarr
    )
  )
  res = from_cstr_to_rbstr(sarr, length)
  res.empty? ? nil : res
end

#feature_types=(feature_types) ⇒ Object



194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# File 'lib/xgboost/dmatrix.rb', line 194

def feature_types=(feature_types)
  if feature_types.nil?
    check_call(
      FFI.XGDMatrixSetStrFeatureInfo(
        handle, "feature_type", nil, 0
      )
    )
    return
  end

  feature_types =
    validate_feature_info(
      feature_types,
      num_col,
      data_split_mode == :col,
      "feature types"
    )

  c_feature_types = array_of_pointers(feature_types.map { |f| string_pointer(f) })
  check_call(
    FFI.XGDMatrixSetStrFeatureInfo(
      handle,
      "feature_type",
      c_feature_types,
      feature_types.length
    )
  )
end

#group=(group) ⇒ Object



78
79
80
81
82
# File 'lib/xgboost/dmatrix.rb', line 78

def group=(group)
  c_data = ::FFI::MemoryPointer.new(:int, group.size)
  c_data.write_array_of_int(group)
  check_call FFI.XGDMatrixSetUIntInfo(handle, "group", c_data, group.size)
end

#labelObject



84
85
86
# File 'lib/xgboost/dmatrix.rb', line 84

def label
  float_info("label")
end

#label=(label) ⇒ Object



70
71
72
# File 'lib/xgboost/dmatrix.rb', line 70

def label=(label)
  set_float_info("label", label)
end

#num_colObject



98
99
100
101
102
# File 'lib/xgboost/dmatrix.rb', line 98

def num_col
  out = ::FFI::MemoryPointer.new(:uint64)
  check_call FFI.XGDMatrixNumCol(handle, out)
  out.read_uint64
end

#num_nonmissingObject



104
105
106
107
108
# File 'lib/xgboost/dmatrix.rb', line 104

def num_nonmissing
  out = ::FFI::MemoryPointer.new(:uint64)
  check_call FFI.XGDMatrixNumNonMissing(handle, out)
  out.read_uint64
end

#num_rowObject



92
93
94
95
96
# File 'lib/xgboost/dmatrix.rb', line 92

def num_row
  out = ::FFI::MemoryPointer.new(:uint64)
  check_call FFI.XGDMatrixNumRow(handle, out)
  out.read_uint64
end

#save_binary(fname, silent: true) ⇒ Object



66
67
68
# File 'lib/xgboost/dmatrix.rb', line 66

def save_binary(fname, silent: true)
  check_call FFI.XGDMatrixSaveBinary(handle, fname, silent ? 1 : 0)
end

#slice(rindex) ⇒ Object



116
117
118
119
120
121
122
123
124
# File 'lib/xgboost/dmatrix.rb', line 116

def slice(rindex)
  idxset = ::FFI::MemoryPointer.new(:int, rindex.count)
  idxset.write_array_of_int(rindex)
  out = ::FFI::MemoryPointer.new(:pointer)
  check_call FFI.XGDMatrixSliceDMatrix(handle, idxset, rindex.size, out)

  handle = ::FFI::AutoPointer.new(out.read_pointer, FFI.method(:XGDMatrixFree))
  DMatrix.new(handle)
end

#weightObject



88
89
90
# File 'lib/xgboost/dmatrix.rb', line 88

def weight
  float_info("weight")
end

#weight=(weight) ⇒ Object



74
75
76
# File 'lib/xgboost/dmatrix.rb', line 74

def weight=(weight)
  set_float_info("weight", weight)
end