Class: Evoc::SVD

Inherits:
Object
  • Object
show all
Defined in:
lib/evoc/svd.rb

Overview

CLASS SVD

public fields:

index: 
   hash of {index -> file}, where file is at "index" in the co_change_matrix and svd matrix

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(tx_store = nil) ⇒ SVD

Returns a new instance of SVD.



11
12
13
14
15
16
# File 'lib/evoc/svd.rb', line 11

def initialize(tx_store = nil)
  if tx_store.is_a?(Evoc::TxStore)
    @index2item,@item2index,@co_change_matrix = txstore_2_co_change_matrix(tx_store)
    @u,@s,@v = svd(@co_change_matrix)
  end
end

Instance Attribute Details

#co_change_matrixObject

Returns the value of attribute co_change_matrix.



9
10
11
# File 'lib/evoc/svd.rb', line 9

def co_change_matrix
  @co_change_matrix
end

#sObject

Returns the value of attribute s.



9
10
11
# File 'lib/evoc/svd.rb', line 9

def s
  @s
end

#uObject

Returns the value of attribute u.



9
10
11
# File 'lib/evoc/svd.rb', line 9

def u
  @u
end

#vObject

Returns the value of attribute v.



9
10
11
# File 'lib/evoc/svd.rb', line 9

def v
  @v
end

Instance Method Details

#clusters(query, threshold = 0) ⇒ Object

Find the clusters in the current svd given a change-vector/query

threshold: the minimum value of an element in the U matrix,

to be considered as part of an cluster


43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/evoc/svd.rb', line 43

def clusters(query,threshold = 0)
  clusters = Hash.new
  perfect_match = []
  query_indexes = query.map {|q_item| item2index(q_item)}.compact #remove nil values
  col_index = 0
  self.u.each_column do |col|
    #initiate cluster
    clusters[col_index] = {pos: {query_match: [], clustered: []},
                            neg: {query_match: [], clustered: []}}
    # get the column of the item
    col.each_with_index do |row_item,row_index|
      # check that the row item is part of cluster
      if row_item.abs > threshold 
        sign = row_item > 0 ? :pos : :neg
        # check if its another item from the query
        if query_indexes.include? row_index
          clusters[col_index][sign][:query_match] << index2item(row_index)
          # check if all items in the cluster was in the query (perfect match)
          if clusters[col_index][sign][:query_match].size == query.size
            perfect_match << [col_index,sign]
          end
        else
          clusters[col_index][sign][:clustered] << [index2item(row_index),row_item]
        end
      end
    end
    col_index += 1
  end
  [perfect_match,clusters]
end

#index2item(index) ⇒ Object



26
27
28
# File 'lib/evoc/svd.rb', line 26

def index2item index
  @index2item[index]
end

#indexesObject



18
19
20
# File 'lib/evoc/svd.rb', line 18

def indexes
  @index2item.keys
end

#item2index(item) ⇒ Object



30
31
32
# File 'lib/evoc/svd.rb', line 30

def item2index item
  @item2index[item]
end

#itemsObject



22
23
24
# File 'lib/evoc/svd.rb', line 22

def items
  @item2index.keys
end

#svd(co_change_matrix) ⇒ Object



34
35
36
# File 'lib/evoc/svd.rb', line 34

def svd(co_change_matrix)
  u,s,v = co_change_matrix.gesvd
end

#txstore_2_co_change_matrix(tx_store) ⇒ Object

Returns the co-change matrix of currently loaded files each column/row combination specifies how many times the two files changed together

f1 f2 f3

f1 2 1 1 f2 1 3 1 f3 1 1 1



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/evoc/svd.rb', line 84

def txstore_2_co_change_matrix(tx_store)
  co_change_hash = Hash.new
  tx_store.each do |tx|
    tx.items.each do |file_id| 
      if co_change_hash[file_id] == nil
        co_change_hash[file_id] = {:co_changed => Hash.new}
      end
      tx.items.each do |co_changed_file_id|
        if co_change_hash[file_id][:co_changed][co_changed_file_id] == nil
          co_change_hash[file_id][:co_changed][co_changed_file_id] = 1
        else
          co_change_hash[file_id][:co_changed][co_changed_file_id] += 1
        end
      end
    end		
  end
  # add indexes
  co_change_hash.each_with_index do |(key,value),index|
    co_change_hash[key][:index] = index
  end
  # Generate the 2 wise dependency weight array
  #
  n = co_change_hash.size
  co_change_matrix = NMatrix.new(n,0,dtype: :float64)
  co_change_hash.each_with_index do |(key,value),index|
    this_file = index
    value[:co_changed].each do |(co_changed_file,sum_co_changes)|
      co_index = (co_change_hash[co_changed_file][:index])
      co_change_matrix[co_index,this_file] = sum_co_changes
    end
  end
  index2item = Hash.new
  item2index = Hash.new
  co_change_hash.each do |k,v|
    index2item[v[:index]] = k
    item2index[k] = v[:index]
  end
  [index2item,item2index,co_change_matrix]
end