Class: String

Inherits:
Object
  • Object
show all
Defined in:
lib/searchlink/semver.rb,
lib/searchlink/string.rb,
lib/searchlink/curl/html.rb,
lib/searchlink/searches/hook.rb

Overview

Hookmark String helpers

Instance Method Summary collapse

Instance Method Details

#add_query_stringString

Format and append a query string

Returns:

  • (String)

    The formatted query string



112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/searchlink/string.rb', line 112

def add_query_string
  return self if SL.query.empty?

  query = SL.query.map { |k, v| "#{k}=#{v}" }.join("&")

  query = if self =~ /\?[^= ]+=\S+/
            "&#{query}"
          else
            "?#{query}"
          end

  "#{self}#{query}"
end

#add_query_string!String

Destructive version of #add_query_string

Returns:

  • (String)

    The formatted query string

See Also:



129
130
131
# File 'lib/searchlink/string.rb', line 129

def add_query_string!
  replace add_query_string
end

#append_affiliate_string(aff_string) ⇒ String

Append an affiliate string to a URL

Parameters:

  • aff_string (String)

    The affiliate string

Returns:

  • (String)

    The URL with the affiliate string

See Also:



263
264
265
266
# File 'lib/searchlink/string.rb', line 263

def append_affiliate_string(aff_string)
  separator = self =~ /\?/ ? "&" : "?"
  "#{self}#{aff_string.sub(/^[?&]?/, separator)}"
end

#append_affiliate_string!(aff_string) ⇒ String

Destructively append an affiliate string to a URL

Parameters:

  • aff_string (String)

    The affiliate string

Returns:

  • (String)

    The URL with the affiliate string

See Also:



275
276
277
# File 'lib/searchlink/string.rb', line 275

def append_affiliate_string!(aff_string)
  replace append_affiliate_string(aff_string)
end

#cleanString

Remove newlines, escape quotes, and remove Google Analytics strings

Returns:

  • (String)

    cleaned URL/String



237
238
239
240
241
242
243
# File 'lib/searchlink/string.rb', line 237

def clean
  gsub(/\n+/, " ")
    .gsub(/"/, "&quot")
    .gsub(/\|/, "-")
    .gsub(/([&?]utm_[scm].+=[^&\s!,.)\]]++?)+(&.*)/, '\2')
    .sub(/\?&/, "").strip
end

#close_punctuationString

Complete incomplete punctuation pairs

Returns:

  • (String)

    string with all punctuation properly paired



329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
# File 'lib/searchlink/string.rb', line 329

def close_punctuation
  return self unless self =~ /[“‘\[(<]/

  words = split(/\s+/)

  punct_chars = {
    "“" => "”",
    "‘" => "’",
    "[" => "]",
    "(" => ")",
    "<" => ">"
  }

  left_punct = []

  words.each do |w|
    punct_chars.each do |k, v|
      left_punct.push(k) if w =~ /#{Regexp.escape(k)}/
      left_punct.delete_at(left_punct.rindex(k)) if w =~ /#{Regexp.escape(v)}/
    end
  end

  tail = ""
  left_punct.reverse.each { |c| tail += punct_chars[c] }

  gsub(/[^a-z)\]’”.…]+$/i, "...").strip + tail
end

#close_punctuation!Object

Destructive punctuation close

See Also:



319
320
321
# File 'lib/searchlink/string.rb', line 319

def close_punctuation!
  replace close_punctuation
end

#code_indentString

Indent each line of string with 4 spaces

Returns:

  • (String)

    indented string



629
630
631
# File 'lib/searchlink/string.rb', line 629

def code_indent
  split(/\n/).map { |l| "    #{l}" }.join("\n")
end

Count the links in a string

Returns:

  • (Integer)

    The number of links



28
29
30
# File 'lib/searchlink/string.rb', line 28

def count_links
  scan(/\[(.*?)\]\((.*?)\)/).length
end

#distance(t) ⇒ Object



541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
# File 'lib/searchlink/string.rb', line 541

def distance(t)
  s = dup
  m = s.length
  n = t.length
  return m if n.zero?
  return n if m.zero?

  d = Array.new(m + 1) { Array.new(n + 1) }

  (0..m).each { |i| d[i][0] = i }
  (0..n).each { |j| d[0][j] = j }
  (1..n).each do |j|
    (1..m).each do |i|
      d[i][j] = if s[i - 1] == t[j - 1] # adjust index into string
                  d[i - 1][j - 1] # no operation required
                else
                  [d[i - 1][j] + 1, # deletion
                   d[i][j - 1] + 1, # insertion
                   d[i - 1][j - 1] + 1 # substitution
    ].min
                end
    end
  end
  d[m][n]
end

#extract_query(known_queries = {}) ⇒ Object

Extract query string from search string



67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/searchlink/string.rb', line 67

def extract_query(known_queries = {})
  string = gsub(/(?<= )\?((\S+?)=(\S+?)(?=&|$|\s))+/) do |mtch|
    tokens = mtch.sub(/^\?/, "").split("&")
    tokens.each do |token|
      key, value = token.split("=")

      known_queries[key] = value
    end

    ""
  end.gsub(/ +/, " ").strip

  [known_queries, string]
end

#extract_shortenerObject

Extract a shortner from a string



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/searchlink/string.rb', line 83

def extract_shortener
  return self unless self =~ /_[ibt]$/i

  shortener = split(/_/).last
  SL.shortener = case shortener
                 when /i/i
                   :isgd
                 when /b/i
                   :bitly
                 when /t/i
                   :tinyurl
                 else
                   :none
                 end

  sub(/_[ibt]$/i, "")
end

#extract_shortener!String

Destructive version of #extract_shortener

Returns:

  • (String)

    The string without the shortener

See Also:



104
105
106
# File 'lib/searchlink/string.rb', line 104

def extract_shortener!
  replace extract_shortener
end

#fix_gist_filedescription_of_the_return_value

Convert file-myfile-rb to myfile.rb

Returns:

  • (description_of_the_return_value)


212
213
214
# File 'lib/searchlink/string.rb', line 212

def fix_gist_file
  sub(/^file-/, "").sub(/-([^-]+)$/, '.\1')
end

#indent_levelInteger

Count the indent level of a string

Returns:



16
17
18
19
20
21
22
23
24
# File 'lib/searchlink/string.rb', line 16

def indent_level
  return 0 if empty?

  gsub!(/^    /, "\t") while self =~ /^    /
  indent = match(/^\t+/)
  return 0 unless indent

  indent[0].length
end

#matches_all(terms) ⇒ Object

Test that self matches every word in terms

Parameters:

  • terms (String)

    The terms to test



604
605
606
607
608
# File 'lib/searchlink/string.rb', line 604

def matches_all(terms)
  rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
  rx_terms.each { |rx| return false unless gsub(/[^a-z0-9 ]/i, "") =~ rx }
  true
end

#matches_any(terms) ⇒ Object

Test if self contains any of terms

Parameters:

  • terms (String)

    The terms to test



593
594
595
596
597
# File 'lib/searchlink/string.rb', line 593

def matches_any(terms)
  rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
  rx_terms.each { |rx| return true if gsub(/[^a-z0-9 ]/i, "") =~ rx }
  false
end

#matches_exact(string) ⇒ Object

Test if self contains exactl match for string (case insensitive)

Parameters:

  • string (String)

    The string to match



572
573
574
575
# File 'lib/searchlink/string.rb', line 572

def matches_exact(string)
  comp = gsub(/[^a-z0-9 ]/i, "")
  comp =~ /\b#{string.gsub(/[^a-z0-9 ]/i, '').split(/ +/).map { |s| Regexp.escape(s) }.join(' +')}/i
end

#matches_fuzzy(terms, separator: " ", start_word: true, threshhold: 5) ⇒ Object



527
528
529
530
531
532
533
534
535
536
537
538
539
# File 'lib/searchlink/string.rb', line 527

def matches_fuzzy(terms, separator: " ", start_word: true, threshhold: 5)
  sources = split(/(#{separator})+/)
  words = terms.split(/(#{separator})+/)
  matches = 0
  sources.each do |src|
    words.each do |term|
      d = src.distance(term)
      matches += 1 if d <= threshhold
    end
  end

  ((matches / words.count.to_f) * 10).round(3)
end

#matches_none(terms) ⇒ Object

Test that self does not contain any of terms

Parameters:

  • terms (String)

    The terms to test



582
583
584
585
586
# File 'lib/searchlink/string.rb', line 582

def matches_none(terms)
  rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
  rx_terms.each { |rx| return false if gsub(/[^a-z0-9 ]/i, "") =~ rx }
  true
end

#matches_score(terms, separator: " ", start_word: true) ⇒ Object

Score string based on number of matches, 0 - 10

Parameters:

  • terms (String)

    The terms to match

  • separator (String) (defaults to: " ")

    The word separator

  • start_word (Boolean) (defaults to: true)

    Require match to be at beginning of word



514
515
516
517
518
519
520
521
522
523
524
525
# File 'lib/searchlink/string.rb', line 514

def matches_score(terms, separator: " ", start_word: true)
  matched = 0
  regexes = terms.to_rx_array(separator: separator, start_word: start_word)

  regexes.each do |rx|
    matched += 1 if self =~ rx
  end

  return 0 if matched.zero?

  ((matched / regexes.count.to_f) * 10).round(3)
end

#nil_if_missingNil, String

Test an AppleScript response, substituting nil for ‘Missing Value’

Returns:

  • (Nil, String)

    nil if string is “missing value”



499
500
501
502
503
# File 'lib/searchlink/string.rb', line 499

def nil_if_missing
  return nil if self =~ /missing value/

  self
end

#normalize_triggerString

Adds ?: to any parentheticals in a regular expression to avoid match groups

Returns:

  • (String)

    modified regular expression



151
152
153
# File 'lib/searchlink/string.rb', line 151

def normalize_trigger
  gsub(/\((?!\?:)/, "(?:").gsub(/(^(\^|\\A)|(\$|\\Z)$)/, "").downcase
end

#parse_flagsObject

parse command line flags into long options



173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# File 'lib/searchlink/string.rb', line 173

def parse_flags
  gsub(/(\+\+|--)([dirtvsc]+)\b/) do
    m = Regexp.last_match
    bool = m[1] == "++" ? "" : "no-"
    output = " "
    m[2].split("").each do |arg|
      output += case arg
                when "c"
                  "--#{bool}confirm"
                when "d"
                  "--#{bool}debug "
                when "i"
                  "--#{bool}inline "
                when "r"
                  "--#{bool}prefix_random "
                when "t"
                  "--#{bool}include_titles "
                when "v"
                  "--#{bool}validate_links "
                when "s"
                  "--#{bool}remove_seo "
                else
                  ""
                end
    end

    output
  end.gsub(/ +/, " ")
end

#parse_flags!Object



203
204
205
# File 'lib/searchlink/string.rb', line 203

def parse_flags!
  replace parse_flags
end

#path_elementsArray

Extract the most relevant portions from a URL path

Returns:

  • (Array)

    array of relevant path elements



301
302
303
304
305
306
307
308
309
310
311
312
# File 'lib/searchlink/string.rb', line 301

def path_elements
  path = url_path
  # force trailing slash
  path.sub!(%r{/?$}, "/")
  # remove last path element
  path.sub!(%r{/[^/]+[.-][^/]+/$}, "")
  # remove starting/ending slashes
  path.gsub!(%r{(^/|/$)}, "")
  # split at slashes, delete sections that are shorter
  # than 5 characters or only consist of numbers
  path.split(%r{/}).delete_if { |section| section =~ /^\d+$/ || section.length < 5 }
end

#remove_entitiesObject



6
7
8
# File 'lib/searchlink/curl/html.rb', line 6

def remove_entities
  gsub(/&nbsp;/, " ")
end

#remove_protocolString

Remove the protocol from a URL

Returns:

  • (String)

    just hostname and path of URL



284
285
286
# File 'lib/searchlink/string.rb', line 284

def remove_protocol
  sub(%r{^(https?|s?ftp|file)://}, "")
end

#remove_seo(url) ⇒ String

Remove SEO elements from a title

Parameters:

  • url

    The url of the page from which the title came

Returns:



376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
# File 'lib/searchlink/string.rb', line 376

def remove_seo(url)
  title = dup
  url = URI.parse(url)
  host = url.hostname
  unless host
    return self unless SL.config["debug"]

    SL.add_error("Invalid URL", "Could not remove SEO for #{url}")
    return self
  end

  path = url.path
  root_page = path =~ %r{^/?$} ? true : false

  title.gsub!(/\s*(&ndash;|&mdash;)\s*/, " - ")
  title.gsub!(/&[lr]dquo;/, '"')
  title.gsub!(/&[lr]dquo;/, "'")
  title.gsub!(/&#8211;/, " — ")
  title = CGI.unescapeHTML(title)
  title.gsub!(/ +/, " ")

  seo_title_separators = %w[| » « — – - · :]

  begin
    re_parts = []

    host_parts = host.sub(/(?:www\.)?(.*?)\.[^.]+$/, '\1').split(/\./).delete_if { |p| p.length < 3 }
    h_re = !host_parts.empty? ? host_parts.map { |seg| seg.downcase.split(//).join(".?") }.join("|") : ""
    re_parts.push(h_re) unless h_re.empty?

    # p_re = path.path_elements.map{|seg| seg.downcase.split(//).join('.?') }.join('|')
    # re_parts.push(p_re) if p_re.length > 0

    site_re = "(#{re_parts.join('|')})"

    dead_switch = 0

    while title.downcase.gsub(/[^a-z]/i, "") =~ /#{site_re}/i
      break if dead_switch > 5

      seo_title_separators.each_with_index do |sep, i|
        parts = title.split(/ *#{Regexp.escape(sep)} +/)

        next if parts.length == 1

        remaining_separators = seo_title_separators[i..].map { |s| Regexp.escape(s) }.join("")
        seps = Regexp.new("^[^#{remaining_separators}]+$")

        longest = parts.longest_element.strip

        unless parts.empty?
          parts.delete_if do |pt|
            compressed = pt.strip.downcase.gsub(/[^a-z]/i, "")
            compressed =~ /#{site_re}/ && pt =~ seps ? !root_page : false
          end
        end

        title = if parts.empty?
                  longest
                elsif parts.length < 2
                  parts.join(sep)
                elsif parts.length > 2
                  parts.longest_element.strip
                else
                  parts.join(sep)
                end
      end
      dead_switch += 1
    end
  rescue StandardError => e
    return self unless SL.config["debug"]

    SL.add_error("Error SEO processing title for #{url}", e)
    return self
  end

  seps = Regexp.new(" *[#{seo_title_separators.map { |s| Regexp.escape(s) }.join('')}] +")
  if title =~ seps
    seo_parts = title.split(seps)
    title = seo_parts.longest_element.strip if seo_parts.length.positive?
  end

  title && title.length > 5 ? title.gsub(/\s+/, " ") : CGI.unescapeHTML(self)
end

#remove_seo!(url) ⇒ Object

Destructively remove SEO elements from a title

Parameters:

  • url

    The url of the page from which the title came

See Also:



365
366
367
# File 'lib/searchlink/string.rb', line 365

def remove_seo!(url)
  replace remove_seo(url)
end

Scan a string for links

Returns:

  • (Hash)

    Hash of links



8
9
10
11
12
# File 'lib/searchlink/string.rb', line 8

def scan_links
  links = {}
  scan(/\[(.*?)\]:\s+(.*?)\n/).each { |match| links[match[1].strip] = match[0] }
  links
end

#scrubupObject

Scrub invalid characters from string



57
58
59
# File 'lib/searchlink/string.rb', line 57

def scrubup
  encode("utf-16", invalid: :replace).encode("utf-8").gsub(/\u00A0/, " ")
end

#scrubup!Object

See Also:

  • #scrub


62
63
64
# File 'lib/searchlink/string.rb', line 62

def scrubup!
  replace scrub
end

#shorten_pathObject

Shorten path by adding ~ for home directory



636
637
638
639
# File 'lib/searchlink/string.rb', line 636

def shorten_path
  home_directory = ENV["HOME"]
  sub(home_directory, "~")
end

#slugifyString

Turn a string into a slug, removing spaces and non-alphanumeric characters

Returns:

  • (String)

    slugified string



221
222
223
# File 'lib/searchlink/string.rb', line 221

def slugify
  downcase.gsub(/[^a-z0-9_]/i, "-").gsub(/-+/, "-").sub(/-?$/, "")
end

#slugify!Object

Destructive slugify

See Also:



227
228
229
# File 'lib/searchlink/string.rb', line 227

def slugify!
  replace slugify
end

#spacerString

Generate a spacer based on character widths for help dialog display

Returns:

  • (String)

    string containing tabs



160
161
162
163
164
165
166
167
168
169
170
# File 'lib/searchlink/string.rb', line 160

def spacer
  len = length
  scan(/[mwv]/).each { len += 1 }
  scan(/t/).each { len -= 1 }
  case len
  when 0..3
    "\t\t"
  when 4..12
    " \t"
  end
end

#split_hookObject



8
9
10
11
12
13
14
15
# File 'lib/searchlink/searches/hook.rb', line 8

def split_hook
  elements = split(/\|\|/)
  {
    name: elements[0].nil_if_missing,
    url: elements[1].nil_if_missing,
    path: elements[2].nil_if_missing
  }
end

#split_hooksObject



17
18
19
# File 'lib/searchlink/searches/hook.rb', line 17

def split_hooks
  split(/\^\^/).map(&:split_hook)
end

#to_amString

convert itunes to apple music link

Returns:

  • (String)

    apple music link



248
249
250
251
252
253
# File 'lib/searchlink/string.rb', line 248

def to_am
  input = dup
  input.sub!(%r{/itunes\.apple\.com}, "geo.itunes.apple.com")
  append = input =~ %r{\?[^/]+=} ? "&app=music" : "?app=music"
  input + append
end

#to_rx_array(separator: " ", start_word: true) ⇒ Array

Break a string into an array of Regexps

Parameters:

  • separator (String) (defaults to: " ")

    The word separator

  • start_word (Boolean) (defaults to: true)

    Require matches at start of word

Returns:

  • (Array)

    array of regular expressions



619
620
621
622
623
# File 'lib/searchlink/string.rb', line 619

def to_rx_array(separator: " ", start_word: true)
  bound = start_word ? '\b' : ""
  str = gsub(/(#{separator})+/, separator)
  str.split(/#{separator}/).map { |arg| /#{bound}#{arg.gsub(/[^a-z0-9]/i, '.?')}/i }
end

#truncate(max) ⇒ Object

Truncate string to given length, preserving words

Parameters:

  • max (Number)

    The maximum length



477
478
479
480
481
482
483
484
485
486
487
488
489
490
# File 'lib/searchlink/string.rb', line 477

def truncate(max)
  return self if length < max

  trunc_title = []

  words = split(/\s+/)
  words.each do |word|
    break unless trunc_title.join(" ").length.close_punctuation + word.length <= max

    trunc_title << word
  end

  trunc_title.empty? ? words[0] : trunc_title.join(" ")
end

#truncate!(max) ⇒ Object

Truncate in place

Parameters:

  • max (Number)

    The maximum length

See Also:



468
469
470
# File 'lib/searchlink/string.rb', line 468

def truncate!(max)
  replace truncate(max)
end

#url_decodeObject



141
142
143
# File 'lib/searchlink/string.rb', line 141

def url_decode
  CGI.unescape(self)
end

#url_encodeString

URL Encode string

Returns:

  • (String)

    url encoded string



137
138
139
# File 'lib/searchlink/string.rb', line 137

def url_encode
  ERB::Util.url_encode(gsub(/%22/, '"'))
end

#url_pathString

Return just the path of a URL

Returns:



293
294
295
# File 'lib/searchlink/string.rb', line 293

def url_path
  URI.parse(self).path
end

#valid_version?Boolean

Test if given string is a valid semantic version number with major, minor and patch (and optionally pre)

Returns:

  • (Boolean)

    string is semantic version number



39
40
41
42
# File 'lib/searchlink/semver.rb', line 39

def valid_version?
  pattern = /^\d+\.\d+\.\d+(-?([^0-9]+\d*))?$/
  self =~ pattern ? true : false
end

#word_wrap(col_width = 60, prefix = "") ⇒ Object

As with #word_wrap, but modifies the string in place.

CREDIT: Gavin Kistner, Dayne Broderson



49
50
51
52
53
54
# File 'lib/searchlink/string.rb', line 49

def word_wrap(col_width = 60, prefix = "")
  str = dup
  str.gsub!(/(\S{#{col_width}})(?=\S)/, "#{prefix}\\1")
  str.gsub!(/(.{1,#{col_width}})(?:\s+|$)/, "#{prefix}\\1\n")
  str
end

#word_wrap!(col_width = 60, prefix = "") ⇒ Object

Word wrap a string not exceeding max width. CREDIT: Gavin Kistner, Dayne Broderson



41
42
43
# File 'lib/searchlink/string.rb', line 41

def word_wrap!(col_width = 60, prefix = "")
  replace dup.word_wrap(col_width, prefix)
end

#yaml_valObject

Quote a YAML value if needed



33
34
35
36
# File 'lib/searchlink/string.rb', line 33

def yaml_val
  yaml = YAML.safe_load("key: '#{self}'")
  YAML.dump(yaml).match(/key: (.*?)$/)[1]
end