Class: Omamori::AIAnalysisEngine::DiffSplitter

Inherits:
Object
  • Object
show all
Defined in:
lib/omamori/ai_analysis_engine/diff_splitter.rb

Constant Summary collapse

DEFAULT_CHUNK_SIZE =

TODO: Determine appropriate chunk size based on token limits Gemini 1.5 Pro has a large context window (1 million tokens), but splitting might still be necessary for very large inputs or to manage cost/latency.

8000

Instance Method Summary collapse

Constructor Details

#initialize(chunk_size: DEFAULT_CHUNK_SIZE) ⇒ DiffSplitter

Characters as a proxy for tokens



12
13
14
# File 'lib/omamori/ai_analysis_engine/diff_splitter.rb', line 12

def initialize(chunk_size: DEFAULT_CHUNK_SIZE)
  @chunk_size = chunk_size
end

Instance Method Details

#process_in_chunks(content, gemini_client, json_schema, prompt_manager, risks_to_check, model: "gemini-1.5-pro-latest") ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/omamori/ai_analysis_engine/diff_splitter.rb', line 31

def process_in_chunks(content, gemini_client, json_schema, prompt_manager, risks_to_check, model: "gemini-1.5-pro-latest")
  all_results = []
  chunks = split(content)

  puts "Splitting content into #{chunks.size} chunks..."

  chunks.each_with_index do |chunk, index|
    puts "Processing chunk #{index + 1}/#{chunks.size}..."
    prompt = prompt_manager.build_prompt(chunk, risks_to_check, json_schema)
    result = gemini_client.analyze(prompt, json_schema, model: model)
    all_results << result
    # TODO: Handle potential rate limits or errors between chunks
  end

  # TODO: Combine results from all chunks
  combine_results(all_results)
end

#split(content) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/omamori/ai_analysis_engine/diff_splitter.rb', line 16

def split(content)
  chunks = []
  current_chunk = ""
  content.each_line do |line|
    if (current_chunk.length + line.length) > @chunk_size
      chunks << current_chunk unless current_chunk.empty?
      current_chunk = line
    else
      current_chunk += line
    end
  end
  chunks << current_chunk unless current_chunk.empty?
  chunks
end