Class: CollaborativeFilter::SimpleBooster

Inherits:
Object
  • Object
show all
Defined in:
lib/boosters/simple_booster.rb

Instance Method Summary collapse

Instance Method Details

#generate_profilesObject



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/boosters/simple_booster.rb', line 26

def generate_profiles
  all_items = @datasets.inject([]) { |o,(dn,ds)| o.concat ds.items; o }.uniq
  gene_lists = @genes.map { |gn,gene| gene.all(all_items) }

  # Generating a profile for each user.
  # In essence ...
  # user_id => [ {'superhero' => -1, 'horror' => +2 },
  #              {'spiderman' => 2, 'atomic robo' => 1 } ]
  #
  # Iterate through each dataset, as we take all of them into account.
  CollaborativeFilter.log "  Generating user_profs: #{Time.now}"
  user_profs = @datasets.inject({}) { |profiles,(ds_name,ds)|
    CollaborativeFilter.log "   Starting new dataset: #{Time.now}"

    ds.users.each_with_index do |user_id,user_idx|
      profiles[user_id] ||= []

      # Grab the User's ratings from their column in the input matrix
      user_ratings = ds.m.col(user_idx).to_a
      user_ratings.each_index do |item_idx|

        # user_ratings is an array with an entry for each item for the user
        score = user_ratings[item_idx]

        next if score == 0

        # we have a master list of all items in all datasets which we need
        # for the profiles to span datasets.  Find this item's index in there.
        all_items_idx = all_items.index(ds.items[item_idx])

        # iterate through each gene type (genres, franchises, etc)
        gene_lists.each_index do |gene_type_idx|

          # find the value of the gene for this particular item (e.g. this item's genre is horror)
          # this value is always an array and can contain more than one value
          gis = gene_lists[gene_type_idx][all_items_idx]

          profiles[user_id][gene_type_idx] ||= {}

          adj = (score - @options[:crossover]) / gis.size

          gis.each do |gi|
            # we keep a tuple for each gene value (genre => horror) the first element
            # is the count of how many items we've noted and last is the total adjustment
            # they are later used to make an average
            profiles[user_id][gene_type_idx][gi] ||= [0,0]
            profiles[user_id][gene_type_idx][gi][0] += 1
            profiles[user_id][gene_type_idx][gi][1] += adj
          end
        end
      end
    end
    profiles
  }.to_a.map { |user_id,genes| 
    # Grab each of those tuples we made above ([count, total]) and turn each one into an
    # average multiplied by the 'factor' option.  Meaning...  If you rated Superman 2 points
    # above the crossover, and Spiderman 1 point above the threshold, we have a tuple that
    # looks like [2,3].  3 / 2 = 1.5.  Then we multiply by the factor (say 0.5) meaning we
    # only want to half weight on the factors.  So, on average you've rated Superheros 1.5
    # above the crossover, however, since our factor is 0.5, we're going to record 0.75 as
    # the modifier.  This limits the power of the content booster.
    #
    # The more genes you have the lower you'll want to set the factor, as each of them modify
    # the recommendations in turn.  I should probably change the factor to be configurable
    # per gene.
    [user_id, genes.map { |m| m.to_a.map { |gi,(qty,tot)| [gi, (tot/qty) * @options[:factor] ] } } ]
  }

  CollaborativeFilter.log "  Boosting recommendations: #{Time.now}"

  new_recs = []
  @recs.each_index do |user_idx|
    # Grab a user's raw recs and their profile which we generated above
    user_id, user_recs = @recs[user_idx]
    user_id,  = user_profs[user_idx]

    # Iterate through each of the individual items in the recommendations
    new_user_recs = user_recs.map { |item_id, score| 


      .each_index do |gene_type_idx|
        # Grab this item's genes for this particular gene type from the master list
        item_gene = gene_lists[gene_type_idx][all_items.index(item_id)]

        # item_gene will always be an array, if it's empty we can move on
        next if item_gene.empty?

        # an item can have multiple genes for a gene type, we just use the average
        item_mod = item_gene.inject([0,0]) { |o,g| 

          # find the user's modifier for this gene
          mod = [gene_type_idx].detect { |ig| ig.first == g }
          next o unless mod
          [o[0] + mod.last, o[1] + 1]
        }
        # move on unless we have at least modifier
        next unless item_mod[1] > 0
        score += item_mod[0] / item_mod[1]
      end

      # if the score is at or above the threshold, add it to our new recs list
      next if score < @options[:threshold]
      [item_id, score > 5 ? 5 : score]
    }.compact
    new_recs << [user_id, new_user_recs]
  end

  new_recs
end

#run(recs, datasets, genes, options) ⇒ Object



21
22
23
24
# File 'lib/boosters/simple_booster.rb', line 21

def run(recs,datasets,genes,options)
  @recs, @datasets, @genes, @options = recs, datasets, genes, options
  generate_profiles
end