Class: XGBoost::Booster

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/xgboost/booster.rb

Instance Method Summary collapse

Constructor Details

#initialize(params: nil, cache: nil, model_file: nil) ⇒ Booster

Returns a new instance of Booster.



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/xgboost/booster.rb', line 5

def initialize(params: nil, cache: nil, model_file: nil)
  cache ||= []
  cache.each do |d|
    if !d.is_a?(DMatrix)
      raise TypeError, "invalid cache item: #{d.class.name}"
    end
  end

  dmats = array_of_pointers(cache.map { |d| d.handle })
  out = ::FFI::MemoryPointer.new(:pointer)
  check_call FFI.XGBoosterCreate(dmats, cache.length, out)
  @handle = ::FFI::AutoPointer.new(out.read_pointer, FFI.method(:XGBoosterFree))

  cache.each do |d|
    assign_dmatrix_features(d)
  end

  if model_file
    check_call FFI.XGBoosterLoadModel(handle, model_file)
  end

  set_param(params)
end

Instance Method Details

#[](key_name) ⇒ Object

Raises:

  • (TypeError)


29
30
31
32
33
34
35
36
37
# File 'lib/xgboost/booster.rb', line 29

def [](key_name)
  if key_name.is_a?(String)
    return attr(key_name)
  end

  # TODO slice

  raise TypeError, "expected string"
end

#[]=(key_name, raw_value) ⇒ Object



39
40
41
# File 'lib/xgboost/booster.rb', line 39

def []=(key_name, raw_value)
  set_attr(**{key_name => raw_value})
end

#attr(key) ⇒ Object



55
56
57
58
59
60
# File 'lib/xgboost/booster.rb', line 55

def attr(key)
  ret = ::FFI::MemoryPointer.new(:pointer)
  success = ::FFI::MemoryPointer.new(:int)
  check_call FFI.XGBoosterGetAttr(handle, key.to_s, ret, success)
  success.read_int != 0 ? ret.read_pointer.read_string : nil
end

#attributesObject



62
63
64
65
66
67
68
# File 'lib/xgboost/booster.rb', line 62

def attributes
  length = ::FFI::MemoryPointer.new(:uint64)
  sarr = ::FFI::MemoryPointer.new(:pointer)
  check_call FFI.XGBoosterGetAttrNames(handle, length, sarr)
  attr_names = from_cstr_to_rbstr(sarr, length)
  attr_names.to_h { |n| [n, attr(n)] }
end

#best_iterationObject



132
133
134
# File 'lib/xgboost/booster.rb', line 132

def best_iteration
  attr(:best_iteration)&.to_i
end

#best_iteration=(iteration) ⇒ Object



136
137
138
# File 'lib/xgboost/booster.rb', line 136

def best_iteration=(iteration)
  set_attr(best_iteration: iteration)
end

#best_scoreObject



140
141
142
# File 'lib/xgboost/booster.rb', line 140

def best_score
  attr(:best_score)&.to_f
end

#best_score=(score) ⇒ Object



144
145
146
# File 'lib/xgboost/booster.rb', line 144

def best_score=(score)
  set_attr(best_score: score)
end

#dump(fmap: "", with_stats: false, dump_format: "text") ⇒ Object

returns an array of strings



180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/xgboost/booster.rb', line 180

def dump(fmap: "", with_stats: false, dump_format: "text")
  out_len = ::FFI::MemoryPointer.new(:uint64)
  out_result = ::FFI::MemoryPointer.new(:pointer)

  names = feature_names || []
  fnames = array_of_pointers(names.map { |fname| string_pointer(fname) })
  ftypes = array_of_pointers(feature_types || Array.new(names.size, string_pointer("float")))

  check_call FFI.XGBoosterDumpModelExWithFeatures(handle, names.size, fnames, ftypes, with_stats ? 1 : 0, dump_format, out_len, out_result)

  out_result.read_pointer.get_array_of_string(0, out_len.read_uint64)
end

#dump_model(fout, fmap: "", with_stats: false, dump_format: "text") ⇒ Object



160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# File 'lib/xgboost/booster.rb', line 160

def dump_model(fout, fmap: "", with_stats: false, dump_format: "text")
  ret = dump(fmap: fmap, with_stats: with_stats, dump_format: dump_format)
  File.open(fout, "wb") do |f|
    if dump_format == "json"
      f.print("[\n")
      ret.each_with_index do |r, i|
        f.print(r)
        f.print(",\n") if i < ret.size - 1
      end
      f.print("\n]")
    else
      ret.each_with_index do |r, i|
        f.print("booster[#{i}]:\n")
        f.print(r)
      end
    end
  end
end

#eval_set(evals, iteration) ⇒ Object



106
107
108
109
110
111
112
113
114
115
# File 'lib/xgboost/booster.rb', line 106

def eval_set(evals, iteration)
  dmats = array_of_pointers(evals.map { |v| v[0].handle })
  evnames = array_of_pointers(evals.map { |v| string_pointer(v[1]) })

  out_result = ::FFI::MemoryPointer.new(:pointer)

  check_call FFI.XGBoosterEvalOneIter(handle, iteration, dmats, evnames, evals.size, out_result)

  out_result.read_pointer.read_string
end

#feature_namesObject



84
85
86
# File 'lib/xgboost/booster.rb', line 84

def feature_names
  get_feature_info("feature_name")
end

#feature_names=(features) ⇒ Object



88
89
90
# File 'lib/xgboost/booster.rb', line 88

def feature_names=(features)
  set_feature_info(features, "feature_name")
end

#feature_typesObject



76
77
78
# File 'lib/xgboost/booster.rb', line 76

def feature_types
  get_feature_info("feature_type")
end

#feature_types=(features) ⇒ Object



80
81
82
# File 'lib/xgboost/booster.rb', line 80

def feature_types=(features)
  set_feature_info(features, "feature_type")
end

#fscore(fmap: "") ⇒ Object



193
194
195
196
# File 'lib/xgboost/booster.rb', line 193

def fscore(fmap: "")
  # always weight
  score(fmap: fmap, importance_type: "weight")
end

#num_boosted_roundsObject



148
149
150
151
152
# File 'lib/xgboost/booster.rb', line 148

def num_boosted_rounds
  rounds = ::FFI::MemoryPointer.new(:int)
  check_call FFI.XGBoosterBoostedRounds(handle, rounds)
  rounds.read_int
end

#num_featuresObject



154
155
156
157
158
# File 'lib/xgboost/booster.rb', line 154

def num_features
  features = ::FFI::MemoryPointer.new(:uint64)
  check_call FFI.XGBoosterGetNumFeature(handle, features)
  features.read_uint64
end

#predict(data, ntree_limit: nil) ⇒ Object



117
118
119
120
121
122
123
124
125
126
# File 'lib/xgboost/booster.rb', line 117

def predict(data, ntree_limit: nil)
  ntree_limit ||= 0
  out_len = ::FFI::MemoryPointer.new(:uint64)
  out_result = ::FFI::MemoryPointer.new(:pointer)
  check_call FFI.XGBoosterPredict(handle, data.handle, 0, ntree_limit, 0, out_len, out_result)
  out = out_result.read_pointer.read_array_of_float(out_len.read_uint64)
  num_class = out.size / data.num_row
  out = out.each_slice(num_class).to_a if num_class > 1
  out
end

#resetObject



50
51
52
53
# File 'lib/xgboost/booster.rb', line 50

def reset
  check_call FFI.XGBoosterReset(handle)
  self
end

#save_configObject



43
44
45
46
47
48
# File 'lib/xgboost/booster.rb', line 43

def save_config
  length = ::FFI::MemoryPointer.new(:uint64)
  json_string = ::FFI::MemoryPointer.new(:pointer)
  check_call FFI.XGBoosterSaveJsonConfig(handle, length, json_string)
  json_string.read_pointer.read_string(length.read_uint64).force_encoding(Encoding::UTF_8)
end

#save_model(fname) ⇒ Object



128
129
130
# File 'lib/xgboost/booster.rb', line 128

def save_model(fname)
  check_call FFI.XGBoosterSaveModel(handle, fname)
end

#score(fmap: "", importance_type: "weight") ⇒ Object



198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/xgboost/booster.rb', line 198

def score(fmap: "", importance_type: "weight")
  if importance_type == "weight"
    trees = dump(fmap: fmap, with_stats: false)
    fmap = {}
    trees.each do |tree|
      tree.split("\n").each do |line|
        arr = line.split("[")
        next if arr.size == 1

        fid = arr[1].split("]")[0].split("<")[0]
        fmap[fid] ||= 0
        fmap[fid] += 1
      end
    end
    fmap
  else
    average_over_splits = true
    if importance_type == "total_gain"
      importance_type = "gain"
      average_over_splits = false
    elsif importance_type == "total_cover"
      importance_type = "cover"
      average_over_splits = false
    end

    trees = dump(fmap: fmap, with_stats: true)

    importance_type += "="
    fmap = {}
    gmap = {}
    trees.each do |tree|
      tree.split("\n").each do |line|
        arr = line.split("[")
        next if arr.size == 1

        fid = arr[1].split("]")

        g = fid[1].split(importance_type)[1].split(",")[0].to_f

        fid = fid[0].split("<")[0]

        fmap[fid] ||= 0
        gmap[fid] ||= 0

        fmap[fid] += 1
        gmap[fid] += g
      end
    end

    if average_over_splits
      gmap.each_key do |fid|
        gmap[fid] = gmap[fid] / fmap[fid]
      end
    end

    gmap
  end
end

#set_attr(**kwargs) ⇒ Object



70
71
72
73
74
# File 'lib/xgboost/booster.rb', line 70

def set_attr(**kwargs)
  kwargs.each do |key, value|
    check_call FFI.XGBoosterSetAttr(handle, key.to_s, value&.to_s)
  end
end

#set_param(params, value = nil) ⇒ Object



92
93
94
95
96
97
98
99
100
# File 'lib/xgboost/booster.rb', line 92

def set_param(params, value = nil)
  if params.is_a?(Enumerable)
    params.each do |k, v|
      check_call FFI.XGBoosterSetParam(handle, k.to_s, v.to_s)
    end
  else
    check_call FFI.XGBoosterSetParam(handle, params.to_s, value.to_s)
  end
end

#update(dtrain, iteration) ⇒ Object



102
103
104
# File 'lib/xgboost/booster.rb', line 102

def update(dtrain, iteration)
  check_call FFI.XGBoosterUpdateOneIter(handle, iteration, dtrain.handle)
end