Class: Evoc::TxStore

Inherits:
Object
  • Object
show all
Includes:
Enumerable, Logging
Defined in:
lib/evoc/tx_store.rb

Constant Summary collapse

@@case_id =

CLASS VARIABLES

nil

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Logging

configure_logger_for, #logger, logger_for, set_level

Constructor Details

#initialize(path: nil, case_id: nil, granularity: 'mixed') ⇒ TxStore

Returns a new instance of TxStore.



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/evoc/tx_store.rb', line 10

def initialize(path: nil,case_id: nil, granularity: 'mixed')
  @txes = []
  @items = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new
  # keeps track of each txs location
  # in the @txes array using the tx.index variable
  @tx_index_mapping = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToInt.new : Hash.new
  # keeps a dictionary of item ids and their full filename
  # populated when first importing the json file
  @name_2_int = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseRubyToInt.new : Hash.new  
  @int_2_name = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new
  if !path.nil?
    load_transactions(path: path, granularity: granularity)
  end

  # Initialize class variables
  @@case_id ||= case_id
end

Instance Attribute Details

#int_2_nameObject

Returns the value of attribute int_2_name.



5
6
7
# File 'lib/evoc/tx_store.rb', line 5

def int_2_name
  @int_2_name
end

#itemsObject (readonly)

Returns the value of attribute items.



4
5
6
# File 'lib/evoc/tx_store.rb', line 4

def items
  @items
end

#name_2_intObject

Returns the value of attribute name_2_int.



5
6
7
# File 'lib/evoc/tx_store.rb', line 5

def name_2_int
  @name_2_int
end

#tx_index_mappingObject (readonly)

Returns the value of attribute tx_index_mapping.



4
5
6
# File 'lib/evoc/tx_store.rb', line 4

def tx_index_mapping
  @tx_index_mapping
end

#txesObject (readonly)

Returns the value of attribute txes.



4
5
6
# File 'lib/evoc/tx_store.rb', line 4

def txes
  @txes
end

Instance Method Details

#<<(tx) ⇒ Object

self << tx

adds <tx> and updates @items with which items are changed in which tx



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/evoc/tx_store.rb', line 41

def << tx
  if tx.respond_to?(:items)
    if tx.index.nil?
      tx.index = self.size
    end
    ##
    # BUILD INTEGER REPRESENTATION
    # internally, items (e.g., files/methods) are stored as unique integers
    # but a dictionary is kept updated with item -> integer mappings
    if !tx.items.all? {|i| i.is_a?(Integer)}
      integer_representation = []
      tx.items.each do |item|
        if !self.name_2_int.key?(item)
          int = self.name_2_int.size
          self.name_2_int[item] = int
          self.int_2_name[int] = item
        end
        integer_representation << self.name_2_int[item]
      end
      tx.items = integer_representation
    end
    ##
    # BUILD ITEM <-> TX MAPPING
    tx.items.each do |item|
      if !@items.key?(item)
        @items[item] = [tx]
      else
        @items[item] << tx
      end
    end
    @tx_index_mapping[tx.index] = @txes.size
    @txes << tx
  else
    raise Evoc::Exceptions::NotATransaction.new(tx)
  end
end

#[](*indexes) ⇒ Object



98
99
100
# File 'lib/evoc/tx_store.rb', line 98

def [] *indexes
  @txes[*indexes]
end

#clearObject

clear out the currently loaded transactions



112
113
114
115
# File 'lib/evoc/tx_store.rb', line 112

def clear
  @txes.clear
  @items.clear
end

#clone_by_indexes(tx_indexes) ⇒ Object

Return a new tx_store containing the specified tx ids



330
331
332
333
334
335
336
337
338
# File 'lib/evoc/tx_store.rb', line 330

def clone_by_indexes(tx_indexes)
    subset = TxStore.new
    self.each do |tx|
        if tx_indexes.include?(tx.index)
            subset << tx
        end
    end
    return subset
end

#clone_with_subset(start_index, stop_index, max_size = nil) ⇒ Object

#get_cloned_subset

Returns a clone of <self> with transactions equal to the index range defined by

from and including <start_index> to and including <stop_index>

also exclude transactions with size larger than <max_size>



302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
# File 'lib/evoc/tx_store.rb', line 302

def clone_with_subset(start_index,stop_index,max_size = nil)
  clone = TxStore.new
  clone.name_2_int = self.name_2_int
  clone.int_2_name = self.int_2_name
  if start_index.nil? & stop_index.nil? & max_size.nil? then return self end
  # if only one of start_index and stop_index is provided, raise exception
  if !start_index.nil? ^ !stop_index.nil?
    raise ArgumentError.new "You must provide both a start and end index"
  end
  # check that its a valid range
  if range = self.txes[start_index..stop_index]
    if max_size.nil?
      range.each do |tx|
        clone << tx
      end
    else
      range.select {|tx| tx.size <= max_size}.each do |tx|
        clone << tx
      end
    end
  else
    raise ArgumentError.new, "#{start_index}..#{stop_index} was not a valid range on tx_store with size #{self.size}"
  end
  clone
end

#each(&block) ⇒ Object

implementing #each gives us access to all Enumerable methods select, find_all etc



80
81
82
83
84
85
86
87
88
# File 'lib/evoc/tx_store.rb', line 80

def each &block
  @txes.each do |tx|
    if block_given?
      block.call tx
    else
      yield tx
    end
  end
end

#firstObject



90
91
92
# File 'lib/evoc/tx_store.rb', line 90

def first
  @txes.first
end

#get_tx(id:, id_type: :index) ⇒ Object

Retrieve a transaction using the given identifier



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/evoc/tx_store.rb', line 121

def get_tx(id:,id_type: :index)
  tx = nil
  case id_type
  when :index
    raise ArgumentError, "Index must be a Fixnum, #{id} was #{id.class}" unless id.is_a?(Fixnum)
    if index = @tx_index_mapping[id]
      tx = @txes[index]
    end
  when :id
    tx = @txes.find {|tx| tx.id == id }
  end
  if tx.nil?
    raise ArgumentError, "No transaction with #{id_type} #{id}"
  else
    return tx
  end
end

#ints2names(ints) ⇒ Object



33
34
35
# File 'lib/evoc/tx_store.rb', line 33

def ints2names(ints)
  ints.map {|i| self.int_2_name[i]}
end

#lastObject



94
95
96
# File 'lib/evoc/tx_store.rb', line 94

def last
  @txes.last
end

#load_transactions(path: nil, before: nil, after: nil, granularity: 'mixed') ⇒ Object

read in a JSON file of transactions

TRANSACTIONS ARE LOADED IN REVERSE ORDER!

this implies that the oldest transaction gets index 0 in the txes array and the newest has index txes.size-1 (givent that the json file is sorted from newest/top to oldest/bottom)

Parameters:

  • path (String) (defaults to: nil)

    the path to the json history file

  • granularity (Symbol) (defaults to: 'mixed')

    one of :mixed,:file or :method



196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
# File 'lib/evoc/tx_store.rb', line 196

def load_transactions(path: nil, before: nil, after: nil, granularity: 'mixed')
  if !path.nil?
    json = nil
    if File.extname(path) == '.gz'
      Zlib::GzipReader.open(path) {|gz|
        json = gz.read
      }
    else
      json = File.read(path,external_encoding: 'iso-8859-1',internal_encoding: 'utf-8')
    end

    STDERR.puts "Loading transactions using strategy: #{granularity}"
    json.lines.reverse.each do |json_line|
      begin
        json_object = JSON.parse(json_line)
        if valid_date?(json_object,before,after)
          tx = nil
          id = json_object["sha"]
          date = json_object["date"]
          if items = json_object["changes"]
            if !items.compact.empty?
              case granularity
              when 'mixed'
                tx = Evoc::Tx.new(id: id,date: date,items: items.compact)
              when 'file'
                # group all items by parsable files, and return only the unique set of filenames
                items = items.group_by {|i| /^(?<parsable_file>.+?):/.match(i).to_s }.keys.reject(&:empty?)
                tx = Evoc::Tx.new(id: id,date: date,items: items)
              when 'method'
                # group all items by parsable files, return only the methods and @residuals
                items = items.group_by {|i| /^(?<parsable_file>.+?):/.match(i).to_s }                   # group items by parsable files
                                                                              .select {|k,v| !k.empty?} # filter out the non-parsable files 
                                                                              .values                   # get the methods
                                                                              .flatten                  # flatten the list of list of methods 
                tx = Evoc::Tx.new(id: id,date: date,items: items)
              when 'file_all'
                items = items.group_by {|i| /^(?<file>[^:]+?)(?::|\z)/.match(i)[:file].to_s } # group items by file name
                                                                                .keys # get the set of files
                tx = Evoc::Tx.new(id: id,date: date,items: items)
              else
                raise ArgumentError.new, "Granularity level must be one of 'mixed', 'file', 'method' or 'file_all', was called with #{granularity}"
              end
            else
              logger.warn "#{json["sha"]} \"changes\" field only contained nil value(s)"
              next
            end
          else
            logger.warn "#{json["sha"]} did not have a \"changes\" field"
            next
          end
          if tx.nil?
            logger.warn "#{json["sha"]} could not be converted to a tx"
            next
          end
          if tx.items.empty?
            logger.warn "#{json["sha"]} with granularity #{granularity} filtered out all artifacts"
            next
          end
          self << tx
        end
      rescue JSON::ParserError => e
        logger.warn e.message
        next # skip to next line
      rescue Evoc::Exceptions::NoDateInJsonObject => e
        logger.warn e.message
        next
      end
    end
    STDERR.puts "Loaded #{self.size} transactions from #{path}"
  end
end

#names2ints(names) ⇒ Object



29
30
31
# File 'lib/evoc/tx_store.rb', line 29

def names2ints(names)
  names.map {|n| self.name_2_int[n]}
end

#pretty_printObject



368
369
370
# File 'lib/evoc/tx_store.rb', line 368

def pretty_print
  self.txes.reverse.each {|tx| CSV {|row| row << tx.items}}
end

#relevant_unchanged_items(query) ⇒ Object

Return the list of items that have changed with at least one item from the query



176
177
178
# File 'lib/evoc/tx_store.rb', line 176

def relevant_unchanged_items(query)
  transactions_of_list(query).map {|id| get_tx(id: id).items - query}.array_union
end

#sizeObject



180
181
182
# File 'lib/evoc/tx_store.rb', line 180

def size
  @txes.size
end

#to_jsonObject

return a (string) json representation of the tx_store



353
354
355
356
357
358
359
360
361
362
363
364
365
366
# File 'lib/evoc/tx_store.rb', line 353

def to_json 
  commits = Hash.new {|h,k| h[k] = Hash.new(&h.default_proc) }
  self.each do |tx|
    sha = tx.id
    commits[sha][:sha]            = sha
    commits[sha][:date]           = tx.date
    commits[sha][:index]          = tx.index
    commits[sha][:changes][:all]  = []
    tx.items.each {|item| commits[sha][:changes][:all] << item}
  end
  # print the commits sorted by index 
  # but dont include the index in the json as there might be "holes" (after filtering etc)
  JSON.pretty_generate(commits.sort_by {|id,commit| commit[:index]}.reverse.map {|(_,commit)| commit.tap {|c| c.delete(:index)}})
end

#to_sObject



102
103
104
105
106
107
108
# File 'lib/evoc/tx_store.rb', line 102

def to_s
  history = ""
  self.txes.reverse.each do |tx|
    history << tx.items.map {|i| self.int_2_name[i] }.join(',') + "\n"
  end
  history
end

#transactions_of(item, identifier: :index) ⇒ Object

Given an item, find those transactions where the item has been modified parameters: item: the item to check identifier: how to represent the found transactions, either using :index or :id



144
145
146
147
148
149
150
151
152
153
154
155
# File 'lib/evoc/tx_store.rb', line 144

def transactions_of(item, identifier: :index)
  # if there are no transactions 
  # just return an empty list
  if self.size.zero?
    txes = []
  else
  # get the transactions
  # return [] if the item cannot be found
    txes = self.items.key?(item) ? self.items[item] : []
  end
  txes.map(&identifier)
end

#transactions_of_list(items, strict: false, identifier: :index) ⇒ Object

Returns the relevant transactions of the query That is: all the transactions where at least one item from the query were changed

parameters: query: a list of items (optional) strict: if set to true, all the items of the query has had to be changed in the transaction for it to be included



166
167
168
169
170
171
172
# File 'lib/evoc/tx_store.rb', line 166

def transactions_of_list(items, strict: false, identifier: :index)
  if strict
    items.map {|item| transactions_of(item, identifier: identifier)}.array_intersection
  else
    items.map {|item| transactions_of(item, identifier: identifier)}.array_union
  end
end

#valid_date?(json_object, after, before) ⇒ Boolean

a looser version of #between? we also allow nil comparisons if both <after> and <before> are nil we consider the date valid

Returns:

  • (Boolean)


272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# File 'lib/evoc/tx_store.rb', line 272

def valid_date?(json_object,after,before)
  if date = json_object["date"]
    if after.nil? & before.nil?
      return true
    elsif !after.nil? & !before.nil?
      if date.between?(after, before)
        return true
      end
    elsif !after.nil?
      if date > after
        return true
      end
    elsif !before.nil?
      if date < before
        return true
      end
    end
  else
    raise Evoc::Exceptions::NoDateInJsonObject.new, "#{json_object["sha"]} had no \"date\" field."
  end
  return false
end