Class: MboxMerge::Merger

Inherits:
Object
  • Object
show all
Defined in:
lib/mbox_merge.rb

Class Method Summary collapse

Class Method Details

.merge_mboxes(output_mbox, *mbox_files) ⇒ Statistics

Merges multiple mbox files into a single mbox file, sorting emails by date and presenting statistics of merged emails.

Parameters:

  • output_mbox (String)

    Path to the output mbox file

  • mbox_files (Array<String>)

    List of mbox files to be merged

Returns:

  • (Statistics)

    Statistics object containing information about the merging



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/mbox_merge.rb', line 12

def self.merge_mboxes(output_mbox, *mbox_files)
    emails = []

    # Load emails from each mbox file
    mbox_files.each do |mbox_file|
        File.open(mbox_file, 'r') do |f|
            # Split the file into chunks using the standard mbox 'From ' separator
            f.read.split(/^From /).each do |raw_message|
                begin
                    # Only try to read valid messages (non-empty chunks)
                    unless raw_message.strip.empty?
                        # Prepend the 'From ' as it's removed by the split
                        email = Mail.read_from_string("From #{raw_message}")
                        emails << email
                    end
                rescue => e
                    puts "Failed to parse email: #{e.message}"
                end
            end
        end
    end

    # Calculate the total emails
    total_emails = emails.size

    # Sort emails by date (convert dates to Time objects for consistent comparison)
    sorted_emails = emails.sort_by do |email|
        begin
            email.date.to_time
        rescue
            Time.now  # emails without a valid date (TODO)
        end
    end

    # Progress bar for merging
    progress_bar = ProgressBar.create(title: "Merging", total: sorted_emails.size, format: '%t: |%B| %p%%')

    # Write the sorted emails to the output mbox file
    File.open(output_mbox, 'w') do |out|
        sorted_emails.each do |email|
            # Ensure email date is present; fallback to Time.now if nil
            email_date = email.date ? email.date.to_time.strftime("%a, %d %b %Y %H:%M:%S %z") : Time.now.strftime("%a, %d %b %Y %H:%M:%S %z")
            out.puts "From #{email.envelope_from} #{email_date}"
            out.puts email.to_s
            progress_bar.increment
        end
    end

    # Return statistics
    Statistics.new(mbox_files.size, total_emails)
end