Class: Redwood::MBox

Inherits:
Source show all
Includes:
SerializeLabelsNicely
Defined in:
lib/sup/mbox.rb

Direct Known Subclasses

Loader, SentLoader

Defined Under Namespace

Classes: Loader

Constant Summary collapse

BREAK_RE =
/^From \S+ (.+)$/

Instance Attribute Summary collapse

Attributes inherited from Source

#id, #uri, #usual

Class Method Summary collapse

Instance Method Summary collapse

Methods included from SerializeLabelsNicely

#after_unmarshal!, #before_marshal

Methods inherited from Source

#==, #labels?, parse_raw_email_header, #read?, #supported_labels?, #synchronize, #to_s, #try_lock, #unlock, #valid?

Constructor Details

#initialize(uri_or_fp, usual = true, archived = false, id = nil, labels = nil) ⇒ MBox

uri_or_fp is horrific. need to refactor.



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/sup/mbox.rb', line 16

def initialize uri_or_fp, usual=true, archived=false, id=nil, labels=nil
  @mutex = Mutex.new
  @labels = Set.new((labels || []) - LabelManager::RESERVED_LABELS)

  case uri_or_fp
  when String
    @expanded_uri = Source.expand_filesystem_uri(uri_or_fp)
    parts = /^([a-zA-Z0-9]*:(\/\/)?)(.*)/.match @expanded_uri
    if parts
      prefix = parts[1]
      @path = parts[3]
      uri = URI(prefix + Source.encode_path_for_uri(@path))
    else
      uri = URI(Source.encode_path_for_uri @expanded_uri)
      @path = uri.path
    end

    raise ArgumentError, "not an mbox uri" unless uri.scheme == "mbox"
    raise ArgumentError, "mbox URI ('#{uri}') cannot have a host: #{uri.host}" unless uri.host.nil? || uri.host.empty?
    raise ArgumentError, "mbox URI must have a path component" unless uri.path
    @f = nil
  else
    @f = uri_or_fp
    @path = uri_or_fp.path
    @expanded_uri = "mbox://#{Source.encode_path_for_uri @path}"
  end

  super uri_or_fp, usual, archived, id
end

Instance Attribute Details

#labelsObject (readonly)

Returns the value of attribute labels.



13
14
15
# File 'lib/sup/mbox.rb', line 13

def labels
  @labels
end

Class Method Details

.is_break_line?(l) ⇒ Boolean

Returns:

  • (Boolean)


197
198
199
200
201
202
203
204
205
206
207
# File 'lib/sup/mbox.rb', line 197

def self.is_break_line? l
  l =~ BREAK_RE or return false
  time = $1
  begin
    Time.strptime time, "%a %b %d %H:%M:%S %Y"
    true
  rescue NoMethodError, ArgumentError
    warn "found invalid date in potential mbox split line, not splitting: #{l.inspect}"
    false
  end
end

.suggest_labels_for(path) ⇒ Object



49
50
51
52
53
54
55
56
57
# File 'lib/sup/mbox.rb', line 49

def self.suggest_labels_for path
  ## heuristic: use the filename as a label, unless the file
  ## has a path that probably represents an inbox.
  if File.dirname(path) =~ /\b(var|usr|spool)\b/
    []
  else
    [File.basename(path).downcase.intern]
  end
end

Instance Method Details

#default_labelsObject



159
160
161
# File 'lib/sup/mbox.rb', line 159

def default_labels
  [:inbox, :unread]
end

#each_raw_message_line(offset) ⇒ Object

apparently it’s a million times faster to call this directly if we’re just moving messages around on disk, than reading things into memory with raw_message.



129
130
131
132
133
134
135
136
137
# File 'lib/sup/mbox.rb', line 129

def each_raw_message_line offset
  @mutex.synchronize do
    ensure_open
    @f.seek offset
    until @f.eof? || MBox::is_break_line?(l = @f.gets)
      yield l
    end
  end
end

#fallback_date_for_message(offset) ⇒ Object



139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/sup/mbox.rb', line 139

def fallback_date_for_message offset
  ## This is a bit awkward... We treat the From line as a delimiter,
  ## not part of the message. So the offset is pointing *after* the
  ## From line for the desired message. With a bit of effort we can
  ## scan backwards to find its From line and extract a date from it.
  buf = @mutex.synchronize do
    ensure_open
    start = offset
    loop do
      start = (start - 200).clamp 0, 2**64
      @f.seek start
      buf = @f.read (offset - start)
      break buf if buf.include? ?\n or start == 0
    end
  end
  BREAK_RE.match buf.lines.last do |m|
    Time.strptime m[1], "%a %b %d %H:%M:%S %Y"
  end
end

#file_pathObject



46
# File 'lib/sup/mbox.rb', line 46

def file_path; @path end

#first_new_messageObject

offset of first new message or nil



193
194
195
# File 'lib/sup/mbox.rb', line 193

def first_new_message
  next_offset(last_indexed_message || 0)
end

#go_idleObject



64
65
66
67
68
69
70
# File 'lib/sup/mbox.rb', line 64

def go_idle
  @mutex.synchronize do
    return if @f.nil? or @path.nil?
    @f.close
    @f = nil
  end
end

#is_source_for?(uri) ⇒ Boolean

Returns:

  • (Boolean)


47
# File 'lib/sup/mbox.rb', line 47

def is_source_for? uri; super || (uri == @expanded_uri) end

#last_indexed_messageObject

TODO optimize this by iterating over allterms list backwards or storing source_info negated



188
189
190
# File 'lib/sup/mbox.rb', line 188

def last_indexed_message
  benchmark(:mbox_read_index) { Index.instance.enum_for(:each_source_info, self.id).map(&:to_i).max }
end

#load_header(offset) ⇒ Object



72
73
74
75
76
77
78
79
80
# File 'lib/sup/mbox.rb', line 72

def load_header offset
  header = nil
  @mutex.synchronize do
    ensure_open
    @f.seek offset
    header = parse_raw_email_header @f
  end
  header
end

#load_message(offset) ⇒ Object



82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/sup/mbox.rb', line 82

def load_message offset
  @mutex.synchronize do
    ensure_open
    @f.seek offset
    begin
      ## don't use RMail::Mailbox::MBoxReader because it doesn't properly ignore
      ## "From" at the start of a message body line.
      string = +""
      until @f.eof? || MBox::is_break_line?(l = @f.gets)
        string << l
      end
      RMail::Parser.read string
    rescue RMail::Parser::Error => e
      raise FatalSourceError, "error parsing mbox file: #{e.message}"
    end
  end
end

#next_offset(offset) ⇒ Object



176
177
178
179
180
181
182
183
184
# File 'lib/sup/mbox.rb', line 176

def next_offset offset
  @mutex.synchronize do
    ensure_open
    @f.seek offset
    nil while line = @f.gets and not MBox::is_break_line? line
    offset = @f.tell
    offset != File.size(@f) ? offset : nil
  end
end

#pollObject



163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/sup/mbox.rb', line 163

def poll
  first_offset = first_new_message
  offset = first_offset
  end_offset = File.size @f
  while offset and offset < end_offset
    yield :add,
      :info => offset,
      :labels => (labels + default_labels),
      :progress => (offset - first_offset).to_f/end_offset
    offset = next_offset offset
  end
end

#raw_header(offset) ⇒ Object



100
101
102
103
104
105
106
107
108
109
110
# File 'lib/sup/mbox.rb', line 100

def raw_header offset
  ret = +""
  @mutex.synchronize do
    ensure_open
    @f.seek offset
    until @f.eof? || (l = @f.gets) =~ /^\r*$/
      ret << l
    end
  end
  ret
end

#raw_message(offset) ⇒ Object



112
113
114
# File 'lib/sup/mbox.rb', line 112

def raw_message offset
  enum_for(:each_raw_message_line, offset).reduce(:+)
end

#store_message(date, from_email, &block) ⇒ Object



116
117
118
119
120
121
122
123
# File 'lib/sup/mbox.rb', line 116

def store_message date, from_email, &block
  need_blank = File.exist?(@path) && !File.zero?(@path)
  File.open(@path, "ab") do |f|
    f.puts if need_blank
    f.puts "From #{from_email} #{date.asctime}"
    yield f
  end
end