Module: OllamaChat::SourceFetching

Included in:
Chat
Defined in:
lib/ollama_chat/source_fetching.rb

Instance Method Summary collapse

Instance Method Details

#add_image(images, source_io, source) ⇒ Object



45
46
47
48
49
# File 'lib/ollama_chat/source_fetching.rb', line 45

def add_image(images, source_io, source)
  STDERR.puts "Adding #{source_io&.content_type} image #{source.to_s.inspect}."
  image = Ollama::Image.for_io(source_io, path: source.to_s)
  (images << image).uniq!
end

#embed(source) ⇒ Object



129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/ollama_chat/source_fetching.rb', line 129

def embed(source)
  if @embedding.on?
    STDOUT.puts "Now embedding #{source.to_s.inspect}."
    fetch_source(source) do |source_io|
      content = parse_source(source_io)
      content.present? or return
      source_io.rewind
      embed_source(source_io, source)
    end
    config.prompts.embed % { source: }
  else
    STDOUT.puts "Embedding is off, so I will just give a small summary of this source."
    summarize(source)
  end
end

#embed_source(source_io, source, count: nil) ⇒ Object



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/ollama_chat/source_fetching.rb', line 83

def embed_source(source_io, source, count: nil)
  @embedding.on? or return parse_source(source_io)
  m = "Embedding #{italic { source_io&.content_type }} document #{source.to_s.inspect}."
  if count
    STDOUT.puts '%u. %s' % [ count, m ]
  else
    STDOUT.puts m
  end
  text = parse_source(source_io) or return
  text.downcase!
  splitter_config = config.embedding.splitter
  inputs = nil
  case splitter_config.name
  when 'Character'
    splitter = Documentrix::Documents::Splitters::Character.new(
      chunk_size: splitter_config.chunk_size,
    )
    inputs = splitter.split(text)
  when 'RecursiveCharacter'
    splitter = Documentrix::Documents::Splitters::RecursiveCharacter.new(
      chunk_size: splitter_config.chunk_size,
    )
    inputs = splitter.split(text)
  when 'Semantic'
    splitter = Documentrix::Documents::Splitters::Semantic.new(
      ollama:, model: config.embedding.model.name,
      chunk_size: splitter_config.chunk_size,
    )
    inputs = splitter.split(
      text,
      breakpoint: splitter_config.breakpoint.to_sym,
      percentage: splitter_config.percentage?,
      percentile: splitter_config.percentile?,
    )
  end
  inputs or return
  source = source.to_s
  if source.start_with?(?!)
    source = Kramdown::ANSI::Width.truncate(
      source[1..-1].gsub(/\W+/, ?_),
      length: 10
    )
  end
  @documents.add(inputs, source:, batch_size: config.embedding.batch_size?)
end

#fetch_source(source, &block) ⇒ Object



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/ollama_chat/source_fetching.rb', line 14

def fetch_source(source, &block)
  case source
  when %r(\A!(.*))
    command = $1
    OllamaChat::Utils::Fetcher.execute(command) do |tmp|
      block.(tmp)
    end
  when %r(\Ahttps?://\S+)
    links.add(source.to_s)
    OllamaChat::Utils::Fetcher.get(
      source,
      headers:      config.request_headers?.to_h,
      cache:        @cache,
      debug:        config.debug,
      http_options: http_options(OllamaChat::Utils::Fetcher.normalize_url(source))
    ) do |tmp|
      block.(tmp)
    end
  when %r(\Afile://(/\S*?)#|\A((?:\.\.|[~.]?)/\S*))
    filename = $~.captures.compact.first
    filename = File.expand_path(filename)
    OllamaChat::Utils::Fetcher.read(filename) do |tmp|
      block.(tmp)
    end
  else
    raise "invalid source"
  end
rescue => e
  STDERR.puts "Cannot fetch source #{source.to_s.inspect}: #{e.class} #{e}\n#{e.backtrace * ?\n}"
end

#http_options(url) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
# File 'lib/ollama_chat/source_fetching.rb', line 2

def http_options(url)
  options = {}
  if ssl_no_verify = config.ssl_no_verify?
    hostname = URI.parse(url).hostname
    options |= { ssl_verify_peer: !ssl_no_verify.include?(hostname) }
  end
  if proxy = config.proxy?
    options |= { proxy: }
  end
  options
end

#import(source) ⇒ Object



58
59
60
61
62
63
64
# File 'lib/ollama_chat/source_fetching.rb', line 58

def import(source)
  fetch_source(source) do |source_io|
    content = import_source(source_io, source) or return
    source_io.rewind
    content
  end
end

#import_source(source_io, source) ⇒ Object



51
52
53
54
55
56
# File 'lib/ollama_chat/source_fetching.rb', line 51

def import_source(source_io, source)
  source = source.to_s
  STDOUT.puts "Importing #{italic { source_io&.content_type }} document #{source.to_s.inspect} now."
  source_content = parse_source(source_io)
  "Imported #{source.inspect}:\n\n#{source_content}\n\n"
end

#summarize(source, words: nil) ⇒ Object



75
76
77
78
79
80
81
# File 'lib/ollama_chat/source_fetching.rb', line 75

def summarize(source, words: nil)
  fetch_source(source) do |source_io|
    content = summarize_source(source_io, source, words:) or return
    source_io.rewind
    content
  end
end

#summarize_source(source_io, source, words: nil) ⇒ Object



66
67
68
69
70
71
72
73
# File 'lib/ollama_chat/source_fetching.rb', line 66

def summarize_source(source_io, source, words: nil)
  STDOUT.puts "Summarizing #{italic { source_io&.content_type }} document #{source.to_s.inspect} now."
  words = words.to_i
  words < 1 and words = 100
  source_content = parse_source(source_io)
  source_content.present? or return
  config.prompts.summarize % { source_content:, words: }
end