Class: XGBoost::DMatrix
- Inherits:
-
Object
- Object
- XGBoost::DMatrix
- Includes:
- Utils
- Defined in:
- lib/xgboost/dmatrix.rb
Instance Attribute Summary collapse
-
#handle ⇒ Object
readonly
Returns the value of attribute handle.
Instance Method Summary collapse
- #data_split_mode ⇒ Object
- #feature_names ⇒ Object
- #feature_names=(feature_names) ⇒ Object
- #feature_types ⇒ Object
- #feature_types=(feature_types) ⇒ Object
- #group=(group) ⇒ Object
-
#initialize(data, label: nil, weight: nil, missing: Float::NAN) ⇒ DMatrix
constructor
A new instance of DMatrix.
- #label ⇒ Object
- #label=(label) ⇒ Object
- #num_col ⇒ Object
- #num_nonmissing ⇒ Object
- #num_row ⇒ Object
- #save_binary(fname, silent: true) ⇒ Object
- #slice(rindex) ⇒ Object
- #weight ⇒ Object
- #weight=(weight) ⇒ Object
Constructor Details
#initialize(data, label: nil, weight: nil, missing: Float::NAN) ⇒ DMatrix
Returns a new instance of DMatrix.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/xgboost/dmatrix.rb', line 7 def initialize(data, label: nil, weight: nil, missing: Float::NAN) if data.is_a?(::FFI::AutoPointer) @handle = data return end if matrix?(data) nrow = data.row_count ncol = data.column_count flat_data = data.to_a.flatten elsif daru?(data) nrow, ncol = data.shape flat_data = data.map_rows(&:to_a).flatten feature_names = data.each_vector.map(&:name) feature_types = data.each_vector.map(&:db_type).map do |v| case v when "INTEGER" "int" when "DOUBLE" "float" else raise Error, "Unknown feature type: #{v}" end end elsif numo?(data) nrow, ncol = data.shape elsif rover?(data) nrow, ncol = data.shape feature_names = data.keys data = data.to_numo else nrow = data.count ncol = data.first.count if !data.all? { |r| r.size == ncol } raise ArgumentError, "Rows have different sizes" end flat_data = data.flatten end c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol) if numo?(data) c_data.write_bytes(data.cast_to(Numo::SFloat).to_string) else handle_missing(flat_data, missing) c_data.write_array_of_float(flat_data) end out = ::FFI::MemoryPointer.new(:pointer) check_call FFI.XGDMatrixCreateFromMat(c_data, nrow, ncol, missing, out) @handle = ::FFI::AutoPointer.new(out.read_pointer, FFI.method(:XGDMatrixFree)) self.feature_names = feature_names || ncol.times.map { |i| "f#{i}" } self.feature_types = feature_types if feature_types self.label = label if label self.weight = weight if weight end |
Instance Attribute Details
#handle ⇒ Object (readonly)
Returns the value of attribute handle.
5 6 7 |
# File 'lib/xgboost/dmatrix.rb', line 5 def handle @handle end |
Instance Method Details
#data_split_mode ⇒ Object
110 111 112 113 114 |
# File 'lib/xgboost/dmatrix.rb', line 110 def data_split_mode out = ::FFI::MemoryPointer.new(:uint64) check_call FFI.XGDMatrixDataSplitMode(handle, out) out.read_uint64 == 0 ? :row : :col end |
#feature_names ⇒ Object
126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
# File 'lib/xgboost/dmatrix.rb', line 126 def feature_names length = ::FFI::MemoryPointer.new(:uint64) sarr = ::FFI::MemoryPointer.new(:pointer) check_call( FFI.XGDMatrixGetStrFeatureInfo( handle, "feature_name", length, sarr ) ) feature_names = from_cstr_to_rbstr(sarr, length) feature_names.empty? ? nil : feature_names end |
#feature_names=(feature_names) ⇒ Object
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
# File 'lib/xgboost/dmatrix.rb', line 141 def feature_names=(feature_names) if feature_names.nil? check_call( FFI.XGDMatrixSetStrFeatureInfo( handle, "feature_name", nil, 0 ) ) return end # validate feature name feature_names = validate_feature_info( feature_names, num_col, data_split_mode == :col, "feature names" ) if feature_names.length != feature_names.uniq.length raise ArgumentError, "feature_names must be unique" end # prohibit the use symbols that may affect parsing. e.g. []< if !feature_names.all? { |f| f.is_a?(String) && !["[", "]", "<"].any? { |x| f.include?(x) } } raise ArgumentError, "feature_names must be string, and may not contain [, ] or <" end c_feature_names = array_of_pointers(feature_names.map { |f| string_pointer(f) }) check_call( FFI.XGDMatrixSetStrFeatureInfo( handle, "feature_name", c_feature_names, feature_names.length ) ) end |
#feature_types ⇒ Object
179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
# File 'lib/xgboost/dmatrix.rb', line 179 def feature_types length = ::FFI::MemoryPointer.new(:uint64) sarr = ::FFI::MemoryPointer.new(:pointer) check_call( FFI.XGDMatrixGetStrFeatureInfo( handle, "feature_type", length, sarr ) ) res = from_cstr_to_rbstr(sarr, length) res.empty? ? nil : res end |
#feature_types=(feature_types) ⇒ Object
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 |
# File 'lib/xgboost/dmatrix.rb', line 194 def feature_types=(feature_types) if feature_types.nil? check_call( FFI.XGDMatrixSetStrFeatureInfo( handle, "feature_type", nil, 0 ) ) return end feature_types = validate_feature_info( feature_types, num_col, data_split_mode == :col, "feature types" ) c_feature_types = array_of_pointers(feature_types.map { |f| string_pointer(f) }) check_call( FFI.XGDMatrixSetStrFeatureInfo( handle, "feature_type", c_feature_types, feature_types.length ) ) end |
#group=(group) ⇒ Object
78 79 80 81 82 |
# File 'lib/xgboost/dmatrix.rb', line 78 def group=(group) c_data = ::FFI::MemoryPointer.new(:int, group.size) c_data.write_array_of_int(group) check_call FFI.XGDMatrixSetUIntInfo(handle, "group", c_data, group.size) end |
#label ⇒ Object
84 85 86 |
# File 'lib/xgboost/dmatrix.rb', line 84 def label float_info("label") end |
#label=(label) ⇒ Object
70 71 72 |
# File 'lib/xgboost/dmatrix.rb', line 70 def label=(label) set_float_info("label", label) end |
#num_col ⇒ Object
98 99 100 101 102 |
# File 'lib/xgboost/dmatrix.rb', line 98 def num_col out = ::FFI::MemoryPointer.new(:uint64) check_call FFI.XGDMatrixNumCol(handle, out) out.read_uint64 end |
#num_nonmissing ⇒ Object
104 105 106 107 108 |
# File 'lib/xgboost/dmatrix.rb', line 104 def num_nonmissing out = ::FFI::MemoryPointer.new(:uint64) check_call FFI.XGDMatrixNumNonMissing(handle, out) out.read_uint64 end |
#num_row ⇒ Object
92 93 94 95 96 |
# File 'lib/xgboost/dmatrix.rb', line 92 def num_row out = ::FFI::MemoryPointer.new(:uint64) check_call FFI.XGDMatrixNumRow(handle, out) out.read_uint64 end |
#save_binary(fname, silent: true) ⇒ Object
66 67 68 |
# File 'lib/xgboost/dmatrix.rb', line 66 def save_binary(fname, silent: true) check_call FFI.XGDMatrixSaveBinary(handle, fname, silent ? 1 : 0) end |
#slice(rindex) ⇒ Object
116 117 118 119 120 121 122 123 124 |
# File 'lib/xgboost/dmatrix.rb', line 116 def slice(rindex) idxset = ::FFI::MemoryPointer.new(:int, rindex.count) idxset.write_array_of_int(rindex) out = ::FFI::MemoryPointer.new(:pointer) check_call FFI.XGDMatrixSliceDMatrix(handle, idxset, rindex.size, out) handle = ::FFI::AutoPointer.new(out.read_pointer, FFI.method(:XGDMatrixFree)) DMatrix.new(handle) end |
#weight ⇒ Object
88 89 90 |
# File 'lib/xgboost/dmatrix.rb', line 88 def weight float_info("weight") end |
#weight=(weight) ⇒ Object
74 75 76 |
# File 'lib/xgboost/dmatrix.rb', line 74 def weight=(weight) set_float_info("weight", weight) end |