Module: Spellchecker::DetectDuplicate
- Defined in:
- lib/spellchecker/detect_duplicate.rb
Constant Summary collapse
- MIN_LENGTH =
2
- SKIP_WORDS =
Set.new( %w[very many truly yeah much far yada yare blah bla etc win toco really super peri long had have happened good goody ever dub bye mommy wild that right well huge large dan tan yum yummy agar kori lai please mumble extremely highly root whoa knock check woof bounce bouncy million tut wow mola paw hubba histrio cha nom chop same extra more bang big go no pom la ah ha oh ew] ).freeze
- SKIP_PHRASES =
Set.new(['try and', 'and try', 'and again', 'again and', 'hand in', 'over and', 'and over', 'more and', 'and more', 'test and', 'and test', 'after month', 'bigger and', 'and bigger', 'hours and', 'and hours', 'month after', 'and deeper', 'deeper and', 'step by', 'by step', 'and purred', 'pages of', 'and lots', 'and on', 'face to', 'louder and', 'and louder', 'and thousands', 'day by', 'years and', 'such and', 'and so', 'and such', 'one by', 'side to', 'thousands of', 'back to', 'bit by', 'years of', 'days of', 'weeks of']).freeze
- SKIP_PHRASE_WORDS =
Set.new(%w[and])
Class Method Summary collapse
- .call(token) ⇒ Spellchecker::Mistake?
- .find_duplicate(t1, t2, t3, t4) ⇒ Spellchecker::Mistake?
- .from_to_phrase?(t1, t2, t3) ⇒ Boolean
-
.quoted?(t1, _t2, t3, t4) ⇒ Boolean
rubocop:enable Metrics/AbcSize.
-
.repetition?(t1, t2, t3, t4) ⇒ Boolean
rubocop:disable Metrics/AbcSize.
- .skip_phrase?(t1, t2, t3, t4) ⇒ Boolean
Class Method Details
.call(token) ⇒ Spellchecker::Mistake?
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
# File 'lib/spellchecker/detect_duplicate.rb', line 37 def call(token) t1 = token return if t1.text.length < MIN_LENGTH return if SKIP_WORDS.include?(t1.downcased) t2 = t1.next t3 = t2.next t4 = t3.next text, correction = find_duplicate(t1, t2, t3, t4) return unless text return if t2.capital? || t3.capital? return if SKIP_PHRASES.include?(correction.downcase) return unless Dictionaries::EnglishWords.include?(t2.text) return if skip_phrase?(t1, t2, t3, t4) return if repetition?(t1, t2, t3, t4) return if from_to_phrase?(t1, t2, t3) return if quoted?(t1, t2, t3, t4) Mistake.new(text: text, correction: correction, position: token.position, type: MistakeTypes::DUPLICATE) end |
.find_duplicate(t1, t2, t3, t4) ⇒ Spellchecker::Mistake?
68 69 70 71 72 73 74 |
# File 'lib/spellchecker/detect_duplicate.rb', line 68 def find_duplicate(t1, t2, t3, t4) if t1.downcased == t2.downcased [[t1, t2].map(&:text).join(' '), t1.text] elsif [t1.downcased, t2.downcased] == [t3.downcased, t4.downcased] [[t1, t2, t3, t4].map(&:text).join(' '), [t1, t2].map(&:text).join(' ')] end end |
.from_to_phrase?(t1, t2, t3) ⇒ Boolean
100 101 102 |
# File 'lib/spellchecker/detect_duplicate.rb', line 100 def from_to_phrase?(t1, t2, t3) t1.prev.downcased == 'from' && t2.downcased == 'to' && t1.downcased == t3.downcased end |
.quoted?(t1, _t2, t3, t4) ⇒ Boolean
rubocop:enable Metrics/AbcSize
96 97 98 |
# File 'lib/spellchecker/detect_duplicate.rb', line 96 def quoted?(t1, _t2, t3, t4) t1.prev.text == '"' && (t3.text == '"' || t4.text == '"') end |
.repetition?(t1, t2, t3, t4) ⇒ Boolean
rubocop:disable Metrics/AbcSize
84 85 86 87 88 89 90 91 92 93 |
# File 'lib/spellchecker/detect_duplicate.rb', line 84 def repetition?(t1, t2, t3, t4) return true if t1.downcased == t3.downcased && t1.downcased == t4.next.downcased return true if t1.prev.downcased == t2.downcased && t2.downcased == t4.downcased return true if t1.prev.downcased == t1.downcased && t1.downcased == t3.downcased return true if t1.downcased == t2.downcased && (t1.downcased == t3.downcased || t1.downcased == t1.prev.downcased || t1.downcased == t4.downcased) false end |
.skip_phrase?(t1, t2, t3, t4) ⇒ Boolean
76 77 78 79 80 81 |
# File 'lib/spellchecker/detect_duplicate.rb', line 76 def skip_phrase?(t1, t2, t3, t4) return true if t1.downcased == t3.downcased && SKIP_PHRASE_WORDS.include?(t1.downcased) return true if t2.downcased == t4.downcased && SKIP_PHRASE_WORDS.include?(t2.downcased) false end |