Class: Evoc::TxStore
- Inherits:
-
Object
- Object
- Evoc::TxStore
- Includes:
- Enumerable, Logging
- Defined in:
- lib/evoc/tx_store.rb
Constant Summary collapse
- @@case_id =
CLASS VARIABLES
nil
Instance Attribute Summary collapse
-
#int_2_name ⇒ Object
Returns the value of attribute int_2_name.
-
#items ⇒ Object
readonly
Returns the value of attribute items.
-
#name_2_int ⇒ Object
Returns the value of attribute name_2_int.
-
#tx_index_mapping ⇒ Object
readonly
Returns the value of attribute tx_index_mapping.
-
#txes ⇒ Object
readonly
Returns the value of attribute txes.
Instance Method Summary collapse
-
#<<(tx) ⇒ Object
self << tx.
- #[](*indexes) ⇒ Object
-
#clear ⇒ Object
clear out the currently loaded transactions.
-
#clone_by_indexes(tx_indexes) ⇒ Object
Return a new tx_store containing the specified tx ids.
-
#clone_with_subset(start_index, stop_index, max_size = nil) ⇒ Object
#get_cloned_subset.
-
#each(&block) ⇒ Object
implementing #each gives us access to all Enumerable methods select, find_all etc.
- #first ⇒ Object
-
#get_tx(id:, id_type: :index) ⇒ Object
Retrieve a transaction using the given identifier.
-
#initialize(path: nil, case_id: nil, granularity: 'mixed') ⇒ TxStore
constructor
A new instance of TxStore.
- #ints2names(ints) ⇒ Object
- #last ⇒ Object
-
#load_transactions(path: nil, before: nil, after: nil, granularity: 'mixed') ⇒ Object
read in a JSON file of transactions.
- #names2ints(names) ⇒ Object
- #pretty_print ⇒ Object
-
#relevant_unchanged_items(query) ⇒ Object
Return the list of items that have changed with at least one item from the query.
- #size ⇒ Object
-
#to_json ⇒ Object
return a (string) json representation of the tx_store.
- #to_s ⇒ Object
-
#transactions_of(item, identifier: :index) ⇒ Object
Given an item, find those transactions where the item has been modified parameters: item: the item to check identifier: how to represent the found transactions, either using :index or :id.
-
#transactions_of_list(items, strict: false, identifier: :index) ⇒ Object
Returns the relevant transactions of the query That is: all the transactions where at least one item from the query were changed.
-
#valid_date?(json_object, after, before) ⇒ Boolean
a looser version of #between? we also allow nil comparisons if both <after> and <before> are nil we consider the date valid.
Methods included from Logging
configure_logger_for, #logger, logger_for, set_level
Constructor Details
#initialize(path: nil, case_id: nil, granularity: 'mixed') ⇒ TxStore
Returns a new instance of TxStore.
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
# File 'lib/evoc/tx_store.rb', line 10 def initialize(path: nil,case_id: nil, granularity: 'mixed') @txes = [] @items = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new # keeps track of each txs location # in the @txes array using the tx.index variable @tx_index_mapping = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToInt.new : Hash.new # keeps a dictionary of item ids and their full filename # populated when first importing the json file @name_2_int = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseRubyToInt.new : Hash.new @int_2_name = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new if !path.nil? load_transactions(path: path, granularity: granularity) end # Initialize class variables @@case_id ||= case_id end |
Instance Attribute Details
#int_2_name ⇒ Object
Returns the value of attribute int_2_name.
5 6 7 |
# File 'lib/evoc/tx_store.rb', line 5 def int_2_name @int_2_name end |
#items ⇒ Object (readonly)
Returns the value of attribute items.
4 5 6 |
# File 'lib/evoc/tx_store.rb', line 4 def items @items end |
#name_2_int ⇒ Object
Returns the value of attribute name_2_int.
5 6 7 |
# File 'lib/evoc/tx_store.rb', line 5 def name_2_int @name_2_int end |
#tx_index_mapping ⇒ Object (readonly)
Returns the value of attribute tx_index_mapping.
4 5 6 |
# File 'lib/evoc/tx_store.rb', line 4 def tx_index_mapping @tx_index_mapping end |
#txes ⇒ Object (readonly)
Returns the value of attribute txes.
4 5 6 |
# File 'lib/evoc/tx_store.rb', line 4 def txes @txes end |
Instance Method Details
#<<(tx) ⇒ Object
self << tx
adds <tx> and updates @items with which items are changed in which tx
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
# File 'lib/evoc/tx_store.rb', line 41 def << tx if tx.respond_to?(:items) if tx.index.nil? tx.index = self.size end ## # BUILD INTEGER REPRESENTATION # internally, items (e.g., files/methods) are stored as unique integers # but a dictionary is kept updated with item -> integer mappings if !tx.items.all? {|i| i.is_a?(Integer)} integer_representation = [] tx.items.each do |item| if !self.name_2_int.key?(item) int = self.name_2_int.size self.name_2_int[item] = int self.int_2_name[int] = item end integer_representation << self.name_2_int[item] end tx.items = integer_representation end ## # BUILD ITEM <-> TX MAPPING tx.items.each do |item| if !@items.key?(item) @items[item] = [tx] else @items[item] << tx end end @tx_index_mapping[tx.index] = @txes.size @txes << tx else raise Evoc::Exceptions::NotATransaction.new(tx) end end |
#[](*indexes) ⇒ Object
98 99 100 |
# File 'lib/evoc/tx_store.rb', line 98 def [] *indexes @txes[*indexes] end |
#clear ⇒ Object
clear out the currently loaded transactions
112 113 114 115 |
# File 'lib/evoc/tx_store.rb', line 112 def clear @txes.clear @items.clear end |
#clone_by_indexes(tx_indexes) ⇒ Object
Return a new tx_store containing the specified tx ids
330 331 332 333 334 335 336 337 338 |
# File 'lib/evoc/tx_store.rb', line 330 def clone_by_indexes(tx_indexes) subset = TxStore.new self.each do |tx| if tx_indexes.include?(tx.index) subset << tx end end return subset end |
#clone_with_subset(start_index, stop_index, max_size = nil) ⇒ Object
#get_cloned_subset
Returns a clone of <self> with transactions equal to the index range defined by
from and including <start_index> to and including <stop_index>
also exclude transactions with size larger than <max_size>
302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 |
# File 'lib/evoc/tx_store.rb', line 302 def clone_with_subset(start_index,stop_index,max_size = nil) clone = TxStore.new clone.name_2_int = self.name_2_int clone.int_2_name = self.int_2_name if start_index.nil? & stop_index.nil? & max_size.nil? then return self end # if only one of start_index and stop_index is provided, raise exception if !start_index.nil? ^ !stop_index.nil? raise ArgumentError.new "You must provide both a start and end index" end # check that its a valid range if range = self.txes[start_index..stop_index] if max_size.nil? range.each do |tx| clone << tx end else range.select {|tx| tx.size <= max_size}.each do |tx| clone << tx end end else raise ArgumentError.new, "#{start_index}..#{stop_index} was not a valid range on tx_store with size #{self.size}" end clone end |
#each(&block) ⇒ Object
implementing #each gives us access to all Enumerable methods select, find_all etc
80 81 82 83 84 85 86 87 88 |
# File 'lib/evoc/tx_store.rb', line 80 def each &block @txes.each do |tx| if block_given? block.call tx else yield tx end end end |
#first ⇒ Object
90 91 92 |
# File 'lib/evoc/tx_store.rb', line 90 def first @txes.first end |
#get_tx(id:, id_type: :index) ⇒ Object
Retrieve a transaction using the given identifier
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
# File 'lib/evoc/tx_store.rb', line 121 def get_tx(id:,id_type: :index) tx = nil case id_type when :index raise ArgumentError, "Index must be a Fixnum, #{id} was #{id.class}" unless id.is_a?(Fixnum) if index = @tx_index_mapping[id] tx = @txes[index] end when :id tx = @txes.find {|tx| tx.id == id } end if tx.nil? raise ArgumentError, "No transaction with #{id_type} #{id}" else return tx end end |
#ints2names(ints) ⇒ Object
33 34 35 |
# File 'lib/evoc/tx_store.rb', line 33 def ints2names(ints) ints.map {|i| self.int_2_name[i]} end |
#last ⇒ Object
94 95 96 |
# File 'lib/evoc/tx_store.rb', line 94 def last @txes.last end |
#load_transactions(path: nil, before: nil, after: nil, granularity: 'mixed') ⇒ Object
read in a JSON file of transactions
TRANSACTIONS ARE LOADED IN REVERSE ORDER!
this implies that the oldest transaction gets index 0 in the txes array and the newest has index txes.size-1 (givent that the json file is sorted from newest/top to oldest/bottom)
196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 |
# File 'lib/evoc/tx_store.rb', line 196 def load_transactions(path: nil, before: nil, after: nil, granularity: 'mixed') if !path.nil? json = nil if File.extname(path) == '.gz' Zlib::GzipReader.open(path) {|gz| json = gz.read } else json = File.read(path,external_encoding: 'iso-8859-1',internal_encoding: 'utf-8') end STDERR.puts "Loading transactions using strategy: #{granularity}" json.lines.reverse.each do |json_line| begin json_object = JSON.parse(json_line) if valid_date?(json_object,before,after) tx = nil id = json_object["sha"] date = json_object["date"] if items = json_object["changes"] if !items.compact.empty? case granularity when 'mixed' tx = Evoc::Tx.new(id: id,date: date,items: items.compact) when 'file' # group all items by parsable files, and return only the unique set of filenames items = items.group_by {|i| /^(?<parsable_file>.+?):/.match(i).to_s }.keys.reject(&:empty?) tx = Evoc::Tx.new(id: id,date: date,items: items) when 'method' # group all items by parsable files, return only the methods and @residuals items = items.group_by {|i| /^(?<parsable_file>.+?):/.match(i).to_s } # group items by parsable files .select {|k,v| !k.empty?} # filter out the non-parsable files .values # get the methods .flatten # flatten the list of list of methods tx = Evoc::Tx.new(id: id,date: date,items: items) when 'file_all' items = items.group_by {|i| /^(?<file>[^:]+?)(?::|\z)/.match(i)[:file].to_s } # group items by file name .keys # get the set of files tx = Evoc::Tx.new(id: id,date: date,items: items) else raise ArgumentError.new, "Granularity level must be one of 'mixed', 'file', 'method' or 'file_all', was called with #{granularity}" end else logger.warn "#{json["sha"]} \"changes\" field only contained nil value(s)" next end else logger.warn "#{json["sha"]} did not have a \"changes\" field" next end if tx.nil? logger.warn "#{json["sha"]} could not be converted to a tx" next end if tx.items.empty? logger.warn "#{json["sha"]} with granularity #{granularity} filtered out all artifacts" next end self << tx end rescue JSON::ParserError => e logger.warn e. next # skip to next line rescue Evoc::Exceptions::NoDateInJsonObject => e logger.warn e. next end end STDERR.puts "Loaded #{self.size} transactions from #{path}" end end |
#names2ints(names) ⇒ Object
29 30 31 |
# File 'lib/evoc/tx_store.rb', line 29 def names2ints(names) names.map {|n| self.name_2_int[n]} end |
#pretty_print ⇒ Object
368 369 370 |
# File 'lib/evoc/tx_store.rb', line 368 def pretty_print self.txes.reverse.each {|tx| CSV {|row| row << tx.items}} end |
#relevant_unchanged_items(query) ⇒ Object
Return the list of items that have changed with at least one item from the query
176 177 178 |
# File 'lib/evoc/tx_store.rb', line 176 def relevant_unchanged_items(query) transactions_of_list(query).map {|id| get_tx(id: id).items - query}.array_union end |
#size ⇒ Object
180 181 182 |
# File 'lib/evoc/tx_store.rb', line 180 def size @txes.size end |
#to_json ⇒ Object
return a (string) json representation of the tx_store
353 354 355 356 357 358 359 360 361 362 363 364 365 366 |
# File 'lib/evoc/tx_store.rb', line 353 def to_json commits = Hash.new {|h,k| h[k] = Hash.new(&h.default_proc) } self.each do |tx| sha = tx.id commits[sha][:sha] = sha commits[sha][:date] = tx.date commits[sha][:index] = tx.index commits[sha][:changes][:all] = [] tx.items.each {|item| commits[sha][:changes][:all] << item} end # print the commits sorted by index # but dont include the index in the json as there might be "holes" (after filtering etc) JSON.pretty_generate(commits.sort_by {|id,commit| commit[:index]}.reverse.map {|(_,commit)| commit.tap {|c| c.delete(:index)}}) end |
#to_s ⇒ Object
102 103 104 105 106 107 108 |
# File 'lib/evoc/tx_store.rb', line 102 def to_s history = "" self.txes.reverse.each do |tx| history << tx.items.map {|i| self.int_2_name[i] }.join(',') + "\n" end history end |
#transactions_of(item, identifier: :index) ⇒ Object
Given an item, find those transactions where the item has been modified parameters: item: the item to check identifier: how to represent the found transactions, either using :index or :id
144 145 146 147 148 149 150 151 152 153 154 155 |
# File 'lib/evoc/tx_store.rb', line 144 def transactions_of(item, identifier: :index) # if there are no transactions # just return an empty list if self.size.zero? txes = [] else # get the transactions # return [] if the item cannot be found txes = self.items.key?(item) ? self.items[item] : [] end txes.map(&identifier) end |
#transactions_of_list(items, strict: false, identifier: :index) ⇒ Object
Returns the relevant transactions of the query That is: all the transactions where at least one item from the query were changed
parameters: query: a list of items (optional) strict: if set to true, all the items of the query has had to be changed in the transaction for it to be included
166 167 168 169 170 171 172 |
# File 'lib/evoc/tx_store.rb', line 166 def transactions_of_list(items, strict: false, identifier: :index) if strict items.map {|item| transactions_of(item, identifier: identifier)}.array_intersection else items.map {|item| transactions_of(item, identifier: identifier)}.array_union end end |
#valid_date?(json_object, after, before) ⇒ Boolean
a looser version of #between? we also allow nil comparisons if both <after> and <before> are nil we consider the date valid
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 |
# File 'lib/evoc/tx_store.rb', line 272 def valid_date?(json_object,after,before) if date = json_object["date"] if after.nil? & before.nil? return true elsif !after.nil? & !before.nil? if date.between?(after, before) return true end elsif !after.nil? if date > after return true end elsif !before.nil? if date < before return true end end else raise Evoc::Exceptions::NoDateInJsonObject.new, "#{json_object["sha"]} had no \"date\" field." end return false end |