Class: Lita::Handlers::Markov::Engine

Inherits:
Object
  • Object
show all
Defined in:
lib/lita/handlers/markov/engine.rb

Defined Under Namespace

Classes: EmptyDictionaryError

Constant Summary collapse

DEFAULT_DATABASE_URL =

Default development database URL

'mysql2://root@localhost/lita-markov'
STRING_SEPARATOR =
/\s+/
NON_WORD_CHARACTERS =

Don’t allow anything besides letters, digits, whitespace, and puncutation

/[^\w\d'"“”’:+-]/
/http[^\s]+/
SIMPLE_CODE_BLOCK =
/`[^`]+`/
EXTENDED_CODE_BLOCK =
/```.+```/m
REPEATED_WHITESPACE =
/\s+/

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(database_url = nil) ⇒ Engine

Returns a new instance of Engine.



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/lita/handlers/markov/engine.rb', line 13

def initialize(database_url = nil)
  @handler = handler
  @depth   = 2

  database_url = database_url || DEFAULT_DATABASE_URL

  @db = Sequel.connect database_url

  @db.create_table?(:dictionary) do
    column :user,          String,  null: false # The user the states are associated with
    column :current_state, String,  null: false # Word(s) the user has "said"
    column :next_state,    String,  null: false # Word that follows that word
    column :frequency,     Integer, null: false # Frequency that the next word follows the current state/word

    primary_key [:user, :current_state, :next_state]
  end
end

Instance Attribute Details

#dbObject (readonly)

Returns the value of attribute db.



11
12
13
# File 'lib/lita/handlers/markov/engine.rb', line 11

def db
  @db
end

#handlerObject

Returns the value of attribute handler.



10
11
12
# File 'lib/lita/handlers/markov/engine.rb', line 10

def handler
  @handler
end

Instance Method Details

#add_entry(user, current_state, next_state) ⇒ Object

def ingest



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/lita/handlers/markov/engine.rb', line 51

def add_entry user, current_state, next_state
  dictionary = @db[:dictionary]

  @db.transaction do
    entry = {
      user: user,
      current_state: current_state,
      next_state: next_state
    }

    if dictionary.where(entry).any?
      # Entry is already present, so increment its frequency
      frequency = dictionary.where(entry).get(:frequency)

      dictionary.where(entry).update frequency: frequency + 1
    else
      dictionary.insert entry.merge(frequency: 1)
    end
  end
end

#generate_sentence_for(user, length = 30) ⇒ Object



136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/lita/handlers/markov/engine.rb', line 136

def generate_sentence_for(user, length = 30)
  sentence               = []
  ended_with_punctuation = false

  start_new_sentence user, sentence

  while sentence.length < length
    current_state = sentence.slice(sentence.length - @depth, @depth).join ' '

    next_state = get_next_state user, current_state

    if next_state.nil?
      if sentence.length > (length / 2)
        # Stop if we failed to find a next state and it's long enough
        break
      else
        # Otherwise pick a completely random next-state from the user
        # for the lols
        next_state = get_random_next_state user
      end
    end

    sentence << next_state

    if next_state == '.'
      if sentence.length > (length / 2)
        # If it's long enough then we can end on a period
        ended_with_punctuation = true
        break
      else
        # Otherwise append a period and start a fresh sentence
        last = sentence.pop
        sentence.push(sentence.pop + last)
        start_new_sentence user, sentence
      end
    end
  end

  chain = sentence.slice(0..-2).join(' ')
  chain << ' ' unless ended_with_punctuation
  chain << sentence.last

  chain
end

#get_next_state(user, current_state) ⇒ Object



102
103
104
105
106
107
108
109
# File 'lib/lita/handlers/markov/engine.rb', line 102

def get_next_state(user, current_state)
  states = @db[:dictionary]
    .where(user: user, current_state: current_state)
    .select(:next_state, :frequency)
    .all

  sample_states(states)
end

#get_random_next_state(user) ⇒ Object



111
112
113
114
115
116
117
118
# File 'lib/lita/handlers/markov/engine.rb', line 111

def get_random_next_state(user)
  states = @db[:dictionary]
    .where(user: user)
    .select(:next_state, :frequency)
    .all

  sample_states states
end

#ingest(user, string) ⇒ Object

user - Username of the user string - String of words that the user has just said (ideally a sentence)



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/lita/handlers/markov/engine.rb', line 33

def ingest user, string
  string = sanitize_string string
  words  = separate_string string

  return if words.length < @depth

  # Capitalize the first word and add a period at the end
  words = [words[0].capitalize] + words.slice(1..-1) + ['.']

  # Iterate over it one step at a time in sets of `@depth + 1`
  words.each_cons(@depth + 1) do |words|
    current_state = words[0]+' '+words[1]
    next_state    = words[2]

    add_entry user, current_state, next_state
  end # words.each_cons
end

#random_capitalized_word(user) ⇒ Object



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/lita/handlers/markov/engine.rb', line 72

def random_capitalized_word(user)
  states = @db[:dictionary]
    .where(user: user)
    .map(:current_state)

  capitalized_states = states.select do |state|
    /^[A-Z]/ =~ state
  end

  if capitalized_states.length > 0
    state = capitalized_states.sample
  else
    state = states.sample
  end

  raise EmptyDictionaryError, 'No data for user' if state.nil?

  return state.split(' ').first
end

#random_second_word(user, first_word) ⇒ Object



92
93
94
95
96
97
98
99
100
# File 'lib/lita/handlers/markov/engine.rb', line 92

def random_second_word(user, first_word)
  states = @db[:dictionary]
    .where(Sequel.like(:current_state, first_word+'%'))
    .where(user: user)
    .map(:current_state)

  state = states.sample
  state.split(' ').last
end

#sample_states(states) ⇒ Object



120
121
122
123
124
125
126
# File 'lib/lita/handlers/markov/engine.rb', line 120

def sample_states(states)
  distribution = states.flat_map do |state|
    Array.new(state[:frequency]) { state[:next_state] }
  end

  distribution.sample
end

#sanitize_string(string) ⇒ Object



200
201
202
203
204
205
206
207
208
# File 'lib/lita/handlers/markov/engine.rb', line 200

def sanitize_string string
  string = string
    .gsub(HYPERLINKS, ''.freeze)             # Remove any hyperlinks
    .gsub(SIMPLE_CODE_BLOCK, ''.freeze)      # Remove code blocks and illegal characters
    .gsub(EXTENDED_CODE_BLOCK, ''.freeze)
    .gsub(NON_WORD_CHARACTERS, ' '.freeze)   # Convert non-word characters into whitespace
    .gsub(REPEATED_WHITESPACE, ' '.freeze)   # Convert repeated whitespace into just single spaces
    .strip()
end

#separate_string(string) ⇒ Object



183
184
185
186
187
188
189
190
# File 'lib/lita/handlers/markov/engine.rb', line 183

def separate_string string
  # Including the punctuation in group so they'll be included in the
  # split results
  string
    .split(STRING_SEPARATOR)
    .map { |w| w.strip!; w }
    .select { |w| !w.empty? }
end

#start_new_sentence(user, sentence) ⇒ Object



128
129
130
131
132
133
134
# File 'lib/lita/handlers/markov/engine.rb', line 128

def start_new_sentence(user, sentence)
  first_word = random_capitalized_word user
  second_word = random_second_word user, first_word

  sentence << first_word
  sentence << second_word
end