Class: MboxMerge::Merger
- Inherits:
-
Object
- Object
- MboxMerge::Merger
- Defined in:
- lib/mbox_merge.rb
Class Method Summary collapse
-
.merge_mboxes(output_mbox, *mbox_files) ⇒ Statistics
Merges multiple mbox files into a single mbox file, sorting emails by date and presenting statistics of merged emails.
Class Method Details
.merge_mboxes(output_mbox, *mbox_files) ⇒ Statistics
Merges multiple mbox files into a single mbox file, sorting emails by date and presenting statistics of merged emails.
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
# File 'lib/mbox_merge.rb', line 12 def self.merge_mboxes(output_mbox, *mbox_files) emails = [] # Load emails from each mbox file mbox_files.each do |mbox_file| File.open(mbox_file, 'r') do |f| # Split the file into chunks using the standard mbox 'From ' separator f.read.split(/^From /).each do || begin # Only try to read valid messages (non-empty chunks) unless .strip.empty? # Prepend the 'From ' as it's removed by the split email = Mail.read_from_string("From #{}") emails << email end rescue => e puts "Failed to parse email: #{e.}" end end end end # Calculate the total emails total_emails = emails.size # Sort emails by date (convert dates to Time objects for consistent comparison) sorted_emails = emails.sort_by do |email| begin email.date.to_time rescue Time.now # emails without a valid date (TODO) end end # Progress bar for merging = ProgressBar.create(title: "Merging", total: sorted_emails.size, format: '%t: |%B| %p%%') # Write the sorted emails to the output mbox file File.open(output_mbox, 'w') do |out| sorted_emails.each do |email| # Ensure email date is present; fallback to Time.now if nil email_date = email.date ? email.date.to_time.strftime("%a, %d %b %Y %H:%M:%S %z") : Time.now.strftime("%a, %d %b %Y %H:%M:%S %z") out.puts "From #{email.envelope_from} #{email_date}" out.puts email.to_s .increment end end # Return statistics Statistics.new(mbox_files.size, total_emails) end |