Class: Origami::PDF

Inherits:
Object
  • Object
show all
Defined in:
lib/origami/pdf.rb,
lib/origami/xfa.rb,
lib/origami/page.rb,
lib/origami/header.rb,
lib/origami/actions.rb,
lib/origami/catalog.rb,
lib/origami/trailer.rb,
lib/origami/acroform.rb,
lib/origami/filespec.rb,
lib/origami/metadata.rb,
lib/origami/signature.rb,
lib/origami/xreftable.rb,
lib/origami/encryption.rb,
lib/origami/collections.rb,
lib/origami/parsers/pdf.rb,
lib/origami/destinations.rb,
lib/origami/linearization.rb,
lib/origami/outputintents.rb,
lib/origami/parsers/pdf/lazy.rb,
lib/origami/parsers/pdf/linear.rb

Overview

Main class representing a PDF file and its inner contents. A PDF file contains a set of Revision.

Defined Under Namespace

Classes: Header, Instruction, InvalidHeaderError, LazyParser, LinearParser, LinearizationError, Parser, Revision

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(parser = nil) ⇒ PDF

Creates a new PDF instance.

parser

The Parser object creating the document. If none is specified, some default structures are automatically created to get a minimal working document.


155
156
157
158
159
160
161
162
163
164
165
# File 'lib/origami/pdf.rb', line 155

def initialize(parser = nil)
  @header = PDF::Header.new
  @revisions = []
  @parser = parser
  @loaded = false

  add_new_revision
  @revisions.first.trailer = Trailer.new

  init if parser.nil?
end

Instance Attribute Details

#headerObject

Document header and revisions.


119
120
121
# File 'lib/origami/pdf.rb', line 119

def header
  @header
end

#revisionsObject

Document header and revisions.


119
120
121
# File 'lib/origami/pdf.rb', line 119

def revisions
  @revisions
end

Class Method Details

.create(output, options = {}) {|pdf| ... } ⇒ Object Also known as: write

Creates a new PDF and saves it. If a block is passed, the PDF instance can be processed before saving.

Yields:

  • (pdf)

142
143
144
145
146
# File 'lib/origami/pdf.rb', line 142

def create(output, options = {})
  pdf = PDF.new
  yield(pdf) if block_given?
  pdf.save(output, options)
end

.read(path, options = {}) ⇒ Object

Reads and parses a PDF file from disk.


125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/origami/pdf.rb', line 125

def read(path, options = {})
  path = File.expand_path(path) if path.is_a?(::String)
  lazy = options[:lazy]

  parser_class = if lazy
    PDF::LazyParser
  else
    PDF::LinearParser
  end

  parser_class.new(options).parse(path)
end

Instance Method Details

#<<(object) ⇒ Object Also known as: insert

Adds a new object to the PDF file. If this object has no version number, then a new one will be automatically computed and assignated to him.

It returns a Reference to this Object.

object

The object to add.


317
318
319
320
321
322
323
324
325
326
327
328
# File 'lib/origami/pdf.rb', line 317

def <<(object)
  owner = object.document

  #
  # Does object belongs to another PDF ?
  #
  if owner && !owner.equal?(self)
    import object
  else
    add_to_revision(object, @revisions.last)
  end
end

#add_fields(*fields) ⇒ Object

Add a field to the Acrobat form.

field

The Field to add.

Raises:

  • (TypeError)

44
45
46
47
48
49
50
51
52
53
54
# File 'lib/origami/acroform.rb', line 44

def add_fields(*fields)
  raise TypeError, "Expected Field arguments" unless fields.all? { |f| f.is_a?(Field) }

  self.Catalog.AcroForm ||= InteractiveForm.new.set_indirect(true)
  self.Catalog.AcroForm.Fields ||= []

  self.Catalog.AcroForm.Fields.concat(fields)
  fields.each do |field| field.set_indirect(true) end

  self
end

#add_new_revisionObject

Ends the current Revision, and starts a new one.


364
365
366
367
368
369
370
371
372
# File 'lib/origami/pdf.rb', line 364

def add_new_revision
  root = @revisions.last.trailer[:Root] unless @revisions.empty?

  @revisions << Revision.new(self)
  @revisions.last.trailer = Trailer.new
  @revisions.last.trailer.Root = root

  self
end

#add_to_revision(object, revision) ⇒ Object

Adds a new object to a specific revision. If this object has no version number, then a new one will be automatically computed and assignated to him.

It returns a Reference to this Object.

object

The object to add.

revision

The revision to add the object to.


350
351
352
353
354
355
356
357
358
359
# File 'lib/origami/pdf.rb', line 350

def add_to_revision(object, revision)
  object.set_indirect(true)
  object.set_document(self)

  object.no, object.generation = allocate_new_object_number if object.no == 0

  revision.body[object.reference] = object

  object.reference
end

#allocate_new_object_numberObject

Returns a new number/generation for future object.


519
520
521
522
523
524
525
526
527
528
# File 'lib/origami/pdf.rb', line 519

def allocate_new_object_number
  last_object = each_object(compressed: true).max_by { |object| object.no }
  no = if last_object.nil?
    1
  else
    last_object.no + 1
  end

  [no, 0]
end

#append_page(page = Page.new) {|page| ... } ⇒ Object

Appends a page or list of pages to the end of the page tree.

page

The page to append to the document. Creates a new Page if not specified.

Pass the Page object if a block is present.

Yields:

  • (page)

29
30
31
32
33
34
35
36
# File 'lib/origami/page.rb', line 29

def append_page(page = Page.new)
  init_page_tree

  self.Catalog.Pages.append_page(page)
  yield(page) if block_given?

  self
end

#attach_file(path, register: true, name: nil, filter: :FlateDecode) ⇒ Object

Attachs an embedded file to the PDF.

path

The path to the file to attach.

register

Whether the file shall be registered in the name directory.

name

The embedded file name of the attachment.

filter

The stream filter used to store the file contents.


30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/origami/filespec.rb', line 30

def attach_file(path, register: true, name: nil, filter: :FlateDecode)
  if path.is_a? FileSpec
    filespec = path
    name ||= ''
  else
    if path.respond_to?(:read)
      data = path.read.force_encoding('binary')
      name ||= ''
    else
      data = File.binread(File.expand_path(path))
      name ||= File.basename(path)
    end

    fstream = EmbeddedFileStream.new
    fstream.data = data

    fstream.Filter = filter
    filespec = FileSpec.new(F: fstream)
  end

  fspec = FileSpec.new.setType(:Filespec).setF(name.dup).setEF(filespec)

  if register
    self.register(
      Names::EMBEDDED_FILES,
      name.dup,
      fspec
    )
  end

  fspec
end

#authorObject


43
44
45
# File 'lib/origami/metadata.rb', line 43

def author
  get_document_info_field(:Author)
end

#cast_object(reference, type) ⇒ Object

Casts a PDF object into another object type. The target type must be a subtype of the original type.


499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
# File 'lib/origami/pdf.rb', line 499

def cast_object(reference, type) # :nodoc:
  @revisions.each do |rev|
    if rev.body.include?(reference)
      object = rev.body[reference]
      return object if object.is_a?(type)

      if type < rev.body[reference].class
        rev.body[reference] = object.cast_to(type, @parser)

        return rev.body[reference]
      end
    end
  end

  nil
end

#CatalogObject

Returns the current Catalog Dictionary.

Raises:


39
40
41
42
43
44
# File 'lib/origami/catalog.rb', line 39

def Catalog
  cat = trailer_key(:Root)
  raise InvalidPDFError, "Broken catalog" unless cat.is_a?(Catalog)

  cat
end

#Catalog=(cat) ⇒ Object

Sets the current Catalog Dictionary.

Raises:

  • (TypeError)

49
50
51
52
53
54
55
# File 'lib/origami/catalog.rb', line 49

def Catalog=(cat)
  raise TypeError, "Must be a Catalog object" unless cat.is_a?(Catalog)

  delete_object(@revisions.last.trailer[:Root]) if @revisions.last.trailer[:Root]

  @revisions.last.trailer.Root = self << cat
end

#create_form(*fields) ⇒ Object

Creates a new AcroForm with specified fields.


33
34
35
36
37
38
# File 'lib/origami/acroform.rb', line 33

def create_form(*fields)
  acroform = self.Catalog.AcroForm ||= InteractiveForm.new.set_indirect(true)
  add_fields(*fields)

  acroform
end

#create_metadata(info = {}) ⇒ Object

Modifies or creates a metadata stream.


109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/origami/metadata.rb', line 109

def (info = {})
  skeleton = <<-XMP
        <?packet begin="\xef\xbb\xbf" id="W5M0MpCehiHzreSzNTczkc9d"?>
          <x:xmpmeta xmlns:x="adobe:ns:meta/">
            <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
              <rdf:Description rdf:about="" xmlns:pdf="http://ns.adobe.com/pdf/1.3/">
              </rdf:Description>
            </rdf:RDF>
          </x:xmpmeta>
        <?xpacket end="w"?>
  XMP

  xml =
    if self.Catalog.Metadata.is_a?(Stream)
      self.Catalog.Metadata.data
    else
      skeleton
    end

  doc = REXML::Document.new(xml)
  desc = doc.elements['*/*/rdf:Description']

  info.each do |name, value|
    elt = REXML::Element.new "pdf:#{name}"
    elt.text = value

    desc.elements << elt
  end

  xml = ""
  doc.write(xml, 4)

  if self.Catalog.Metadata.is_a?(Stream)
    self.Catalog.Metadata.data = xml
  else
    self.Catalog.Metadata = Stream.new(xml)
  end

  self.Catalog.Metadata
end

#create_xfa_form(xdp, *fields) ⇒ Object


48
49
50
51
52
53
# File 'lib/origami/xfa.rb', line 48

def create_xfa_form(xdp, *fields)
  acroform = create_form(*fields)
  acroform.XFA = XFAStream.new(xdp, Filter: :FlateDecode)

  acroform
end

#creation_dateObject


63
64
65
# File 'lib/origami/metadata.rb', line 63

def creation_date
  get_document_info_field(:CreationDate)
end

#creatorObject


55
56
57
# File 'lib/origami/metadata.rb', line 55

def creator
  get_document_info_field(:Creator)
end

#decrypt(passwd = "") ⇒ Object

Decrypts the current document.

passwd

The password to decrypt the document.

Raises:


48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/origami/encryption.rb', line 48

def decrypt(passwd = "")
  raise EncryptionError, "PDF is not encrypted" unless encrypted?

  # Turn the encryption dictionary into a standard encryption dictionary.
  handler = trailer_key(:Encrypt)
  handler = cast_object(handler.reference, Encryption::Standard::Dictionary)

  unless handler.Filter == :Standard
    raise EncryptionNotSupportedError, "Unknown security handler : '#{handler.Filter}'"
  end

  doc_id = trailer_key(:ID)
  if doc_id.is_a?(Array)
    doc_id = doc_id.first
  else
    raise EncryptionError, "Document ID was not found or is invalid" unless handler.V.to_i == 5
  end

  encryption_key = handler.derive_encryption_key(passwd, doc_id)

  extend(Encryption::EncryptedDocument)
  self.encryption_handler = handler
  self.encryption_key = encryption_key

  decrypt_objects

  self
end

#delete_object(no, generation = 0) ⇒ Object

Remove an object.


401
402
403
404
405
406
407
408
409
410
411
412
413
414
# File 'lib/origami/pdf.rb', line 401

def delete_object(no, generation = 0)
  case no
  when Reference
    target = no
  when ::Integer
    target = Reference.new(no, generation)
  else
    raise TypeError, "Invalid parameter type : #{no.class}"
  end

  @revisions.each do |rev|
    rev.body.delete(target)
  end
end

#delinearize!Object

Tries to delinearize the document if it has been linearized. This operation is xrefs destructive, should be fixed in the future to merge tables.

Raises:


43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/origami/linearization.rb', line 43

def delinearize!
  raise LinearizationError, 'Not a linearized document' unless linearized?

  #
  # Saves the first trailer.
  #
  prev_trailer = @revisions.first.trailer

  linear_dict = @revisions.first.objects.min_by { |obj| obj.file_offset }

  #
  # Removes hint streams used by linearization.
  #
  delete_hint_streams(linear_dict)

  #
  # Update the trailer.
  #
  last_trailer = (@revisions.last.trailer ||= Trailer.new)
  last_trailer.dictionary ||= Dictionary.new

  if prev_trailer.dictionary?
    last_trailer.dictionary =
      last_trailer.dictionary.merge(prev_trailer.dictionary)
  else
    xrefstm = @revisions.last.xrefstm
    unless xrefstm.is_a?(XRefStream)
      raise LinearizationError,
        'Cannot find trailer info while delinearizing document'
    end

    last_trailer.dictionary[:Root] = xrefstm[:Root]
    last_trailer.dictionary[:Encrypt] = xrefstm[:Encrypt]
    last_trailer.dictionary[:Info] = xrefstm[:Info]
    last_trailer.dictionary[:ID] = xrefstm[:ID]
  end

  #
  # Remove all xrefs.
  # Fix: Should be merged instead.
  #
  remove_xrefs

  #
  # Remove the linearization revision.
  #
  @revisions.first.body.delete(linear_dict.reference)
  @revisions.last.body.merge! @revisions.first.body

  remove_revision(0)

  self
end

#document_infoObject

Returns the document information dictionary if present.


35
36
37
# File 'lib/origami/metadata.rb', line 35

def document_info
  trailer_key :Info
end

#document_info?Boolean

Returns true if the document has a document information dictionary.

Returns:


28
29
30
# File 'lib/origami/metadata.rb', line 28

def document_info?
  trailer_key? :Info
end

#each_fieldObject

Iterates over each Acroform Field.


66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/origami/acroform.rb', line 66

def each_field
  unless block_given?
    return enum_for(__method__) do
      if form? && self.Catalog.AcroForm.Fields.is_a?(Array)
        self.Catalog.AcroForm.Fields.length
      else
        0
      end
    end
  end

  if form? && self.Catalog.AcroForm.Fields.is_a?(Array)
    self.Catalog.AcroForm.Fields.each do |field|
      yield(field.solve)
    end
  end
end

#each_name(root, &block) ⇒ Object

Returns an Enumerator of all names under the specified root name directory.


132
133
134
135
136
137
138
139
140
# File 'lib/origami/catalog.rb', line 132

def each_name(root, &block)
  return enum_for(__method__, root) unless block_given?

  names_root = get_names_root(root)
  return if names_root.nil?

  names_from_node(names_root, &block)
  self
end

#each_named_dest(&b) ⇒ Object

Calls block for each named destination.


33
34
35
# File 'lib/origami/destinations.rb', line 33

def each_named_dest(&b)
  each_name(Names::DESTINATIONS, &b)
end

#each_named_embedded_file(&b) ⇒ Object Also known as: each_attachment

Calls block for each named embedded file.


73
74
75
# File 'lib/origami/filespec.rb', line 73

def each_named_embedded_file(&b)
  each_name(Names::EMBEDDED_FILES, &b)
end

#each_named_page(&b) ⇒ Object

Calls block for each named page.


95
96
97
# File 'lib/origami/page.rb', line 95

def each_named_page(&b)
  each_name(Names::PAGES, &b)
end

#each_named_script(&b) ⇒ Object

Calls block for each named JavaScript script.


33
34
35
# File 'lib/origami/actions.rb', line 33

def each_named_script(&b)
  each_name(Names::JAVASCRIPT, &b)
end

#each_object(compressed: false, recursive: false, &block) ⇒ Object

Iterates over the objects of the document. compressed: iterates over the objects inside object streams. recursive: iterates recursively inside objects like arrays and dictionaries.


278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
# File 'lib/origami/pdf.rb', line 278

def each_object(compressed: false, recursive: false, &block)
  unless block_given?
    return enum_for(__method__, compressed: compressed,
      recursive: recursive)
  end

  @revisions.each do |revision|
    revision.each_object do |object|
      block.call(object)

      walk_object(object, &block) if recursive

      if object.is_a?(ObjectStream) && compressed
        object.each do |child_obj|
          block.call(child_obj)

          walk_object(child_obj) if recursive
        end
      end
    end
  end
end

#each_page(&b) ⇒ Object

Iterate through each page, returns self.


70
71
72
73
74
# File 'lib/origami/page.rb', line 70

def each_page(&b)
  init_page_tree

  self.Catalog.Pages.each_page(&b)
end

#enable_usage_rights(cert, pkey, *rights) ⇒ Object

Enable the document Usage Rights.

rights

list of rights defined in UsageRights::Rights


203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
# File 'lib/origami/signature.rb', line 203

def enable_usage_rights(cert, pkey, *rights)
  # Always uses a detached PKCS7 signature for UR.
  method = Signature::PKCS7_DETACHED

  #
  # Load key pair
  #
  key = pkey.is_a?(OpenSSL::PKey::RSA) ? pkey : OpenSSL::PKey::RSA.new(pkey)
  certificate = cert.is_a?(OpenSSL::X509::Certificate) ? cert : OpenSSL::X509::Certificate.new(cert)

  #
  # Forge digital signature dictionary
  #
  digsig = Signature::DigitalSignature.new.set_indirect(true)

  self.Catalog.AcroForm ||= InteractiveForm.new
  # self.Catalog.AcroForm.SigFlags = InteractiveForm::SigFlags::APPEND_ONLY

  digsig.Type = :Sig
  digsig.Contents = HexaString.new("\x00" * Signature.required_size(method, certificate, key, []))
  digsig.Filter = :'Adobe.PPKLite'
  digsig.Name = "ARE Acrobat Product v8.0 P23 0002337"
  digsig.SubFilter = Name.new(method)
  digsig.ByteRange = [0, 0, 0, 0]

  sigref = Signature::Reference.new
  sigref.Type = :SigRef
  sigref.TransformMethod = :UR3
  sigref.Data = self.Catalog

  sigref.TransformParams = UsageRights::TransformParams.new
  sigref.TransformParams.P = true
  sigref.TransformParams.Type = :TransformParams
  sigref.TransformParams.V = UsageRights::TransformParams::VERSION

  rights.each do |right|
    sigref.TransformParams[right.first] ||= []
    sigref.TransformParams[right.first].concat(right[1..])
  end

  digsig.Reference = [sigref]

  self.Catalog.Perms ||= Perms.new
  self.Catalog.Perms.UR3 = digsig

  #
  #  Flattening the PDF to get file view.
  #
  compile

  #
  # Creating an empty Xref table to compute signature byte range.
  #
  rebuild_dummy_xrefs

  sig_offset = get_object_offset(digsig.no, digsig.generation) + digsig.signature_offset

  digsig.ByteRange[0] = 0
  digsig.ByteRange[1] = sig_offset
  digsig.ByteRange[2] = sig_offset + digsig.Contents.size

  until digsig.ByteRange[3] == filesize - digsig.ByteRange[2]
    digsig.ByteRange[3] = filesize - digsig.ByteRange[2]
  end

  # From that point on, the file size remains constant

  #
  # Correct Xrefs variations caused by ByteRange modifications.
  #
  rebuild_xrefs

  file_data = output
  signable_data = file_data[digsig.ByteRange[0], digsig.ByteRange[1]] +
    file_data[digsig.ByteRange[2], digsig.ByteRange[3]]

  signature = Signature.compute(method, signable_data, certificate, key, [])
  digsig.Contents[0, signature.size] = signature

  #
  # No more modification are allowed after signing.
  #
  freeze
end

#encrypt(options = {}) ⇒ Object

Encrypts the current document with the provided passwords. The document will be encrypted at writing-on-disk time.

userpasswd

The user password.

ownerpasswd

The owner password.

options

A set of options to configure encryption.

Raises:


84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/origami/encryption.rb', line 84

def encrypt(options = {})
  raise EncryptionError, "PDF is already encrypted" if encrypted?

  #
  # Default encryption options.
  #
  params =
    {
      user_passwd: '',
      owner_passwd: '',
      cipher: 'aes',            # :RC4 or :AES
      key_size: 128,            # Key size in bits
      hardened: false,          # Use newer password validation (since Reader X)
      encrypt_metadata: true,   # Metadata shall be encrypted?
      permissions: Encryption::Standard::Permissions::ALL    # Document permissions
    }.update(options)

  # Get the cryptographic parameters.
  version, revision = crypto_revision_from_options(params)

  # Create the security handler.
  handler, encryption_key = create_security_handler(version, revision, params)

  # Turn this document into an EncryptedDocument instance.
  extend(Encryption::EncryptedDocument)
  self.encryption_handler = handler
  self.encryption_key = encryption_key

  self
end

#encrypted?Boolean

Returns whether the PDF file is encrypted.

Returns:


40
41
42
# File 'lib/origami/encryption.rb', line 40

def encrypted?
  trailer_key? :Encrypt
end

#fieldsObject

Returns an Array of Acroform fields.


59
60
61
# File 'lib/origami/acroform.rb', line 59

def fields
  each_field.to_a
end

#form?Boolean

Returns true if the document contains an acrobat form.

Returns:


26
27
28
# File 'lib/origami/acroform.rb', line 26

def form?
  self.Catalog.key? :AcroForm
end

#get_destination_by_name(name) ⇒ Object

Lookup destination in the destination name directory.


26
27
28
# File 'lib/origami/destinations.rb', line 26

def get_destination_by_name(name)
  resolve_name Names::DESTINATIONS, name
end

#get_embedded_file_by_name(name) ⇒ Object

Lookup embedded file in the embedded files name directory.


66
67
68
# File 'lib/origami/filespec.rb', line 66

def get_embedded_file_by_name(name)
  resolve_name Names::EMBEDDED_FILES, name
end

#get_field(name) ⇒ Object

Returns the corresponding named Field.


87
88
89
90
91
92
93
# File 'lib/origami/acroform.rb', line 87

def get_field(name)
  each_field do |field|
    return field if field[:T].solve == name
  end

  nil
end

#get_object(no, generation = 0, use_xrefstm: true) ⇒ Object Also known as: []

Search for an indirect object in the document.

no

Reference or number of the object.

generation

Object generation.


421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
# File 'lib/origami/pdf.rb', line 421

def get_object(no, generation = 0, use_xrefstm: true) # :nodoc:
  case no
  when Reference
    target = no
  when ::Integer
    target = Reference.new(no, generation)
  when Origami::Object
    return no
  else
    raise TypeError, "Invalid parameter type : #{no.class}"
  end

  #
  # Search through accessible indirect objects.
  #
  @revisions.reverse_each do |rev|
    return rev.body[target] if rev.body.include?(target)
  end

  #
  # Search through xref sections.
  #
  @revisions.reverse_each do |rev|
    next unless rev.xreftable?

    xref = rev.xreftable.find(target.refno)
    next if xref.nil? || xref.free?

    # Try loading the object if it is not present.
    object = load_object_at_offset(rev, xref.offset)
    return object unless object.nil?
  end

  return nil unless use_xrefstm

  # Search through xref streams.
  @revisions.reverse_each do |rev|
    next unless rev.xrefstm?

    xrefstm = rev.xrefstm

    xref = xrefstm.find(target.refno)
    next if xref.nil?

    #
    # We found a matching XRef.
    #
    if xref.is_a?(XRefToCompressedObject)
      objstm = get_object(xref.objstmno, 0, use_xrefstm: use_xrefstm)

      object = objstm.extract_by_index(xref.index)
      if object.is_a?(Origami::Object) && (object.no == target.refno)
        return object
      else
        return objstm.extract(target.refno)
      end
    elsif xref.is_a?(XRef)
      object = load_object_at_offset(rev, xref.offset)
      return object unless object.nil?
    end
  end

  #
  # Lastly search directly into Object streams (might be very slow).
  #
  @revisions.reverse_each do |rev|
    stream = rev.objects.find { |obj| obj.is_a?(ObjectStream) and obj.include?(target.refno) }
    return stream.extract(target.refno) unless stream.nil?
  end

  nil
end

#get_object_by_offset(offset) ⇒ Object

Looking for an object present at a specified file offset.


394
395
396
# File 'lib/origami/pdf.rb', line 394

def get_object_by_offset(offset) # :nodoc:
  each_object.find { |obj| obj.file_offset == offset }
end

#get_page(n) ⇒ Object

Get the n-th Page object.


79
80
81
82
83
# File 'lib/origami/page.rb', line 79

def get_page(n)
  init_page_tree

  self.Catalog.Pages.get_page(n)
end

#get_page_by_name(name) ⇒ Object

Lookup page in the page name directory.


88
89
90
# File 'lib/origami/page.rb', line 88

def get_page_by_name(name)
  resolve_name Names::PAGES, name
end

#get_script_by_name(name) ⇒ Object

Lookup script in the scripts name directory.


26
27
28
# File 'lib/origami/actions.rb', line 26

def get_script_by_name(name)
  resolve_name Names::JAVASCRIPT, name
end

#grep(pattern, streams: true, object_streams: true) ⇒ Object

Returns an array of strings, names and streams matching the given pattern. streams: Search into decoded stream data. object_streams: Search into objects inside object streams.

Raises:

  • (TypeError)

244
245
246
247
248
249
250
251
252
253
254
255
256
# File 'lib/origami/pdf.rb', line 244

def grep(pattern, streams: true, object_streams: true) # :nodoc:
  pattern = /#{Regexp.escape(pattern)}/i if pattern.is_a?(::String)
  raise TypeError, "Expected a String or Regexp" unless pattern.is_a?(Regexp)

  result = []

  indirect_objects.each do |object|
    result.concat search_object(object, pattern,
      streams: streams, object_streams: object_streams)
  end

  result
end

#import(object) ⇒ Object

Similar to PDF#insert or PDF#<<, but for an object belonging to another document. Object will be recursively copied and new version numbers will be assigned. Returns the new reference to the imported object.

object

The object to import.


337
338
339
# File 'lib/origami/pdf.rb', line 337

def import(object)
  insert(object.export)
end

#indirect_objectsObject Also known as: root_objects

Return an array of indirect objects.


304
305
306
# File 'lib/origami/pdf.rb', line 304

def indirect_objects
  @revisions.inject([]) do |set, rev| set.concat(rev.objects) end
end

#insert_page(index, page = Page.new) {|page| ... } ⇒ Object

Inserts a page at position index into the document.

index

Page index (starting from one).

page

The page to insert into the document. Creates a new one if none given.

Pass the Page object if a block is present.

Yields:

  • (page)

45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/origami/page.rb', line 45

def insert_page(index, page = Page.new)
  init_page_tree

  # Page from another document must be exported.
  page = page.export if page.document && (page.document != self)

  self.Catalog.Pages.insert_page(index, page)

  yield(page) if block_given?

  self
end

#keywordsObject


51
52
53
# File 'lib/origami/metadata.rb', line 51

def keywords
  get_document_info_field(:Keywords)
end

#linearized?Boolean

Returns whether the current document is linearized.

Returns:


29
30
31
32
33
34
35
36
37
# File 'lib/origami/linearization.rb', line 29

def linearized?
  begin
    first_obj = @revisions.first.objects.min_by { |obj| obj.file_offset }
  rescue
    return false
  end

  @revisions.size > 1 and first_obj.is_a?(Dictionary) and first_obj.has_key? :Linearized
end

#loaded!Object

Mark the document as complete. No more objects needs to be fetched by the parser.


534
535
536
# File 'lib/origami/pdf.rb', line 534

def loaded!
  @loaded = true
end

#loaded?Boolean

Returns if the document as been fully loaded by the parser.

Returns:


541
542
543
# File 'lib/origami/pdf.rb', line 541

def loaded?
  @loaded
end

#ls(pattern, follow_references: true) ⇒ Object

Returns an array of Objects whose name (in a Dictionary) is matching pattern.

Raises:

  • (TypeError)

261
262
263
264
265
266
267
268
269
270
271
# File 'lib/origami/pdf.rb', line 261

def ls(pattern, follow_references: true)
  pattern = /#{Regexp.escape(pattern)}/i if pattern.is_a?(::String)
  raise TypeError, "Expected a String or Regexp" unless pattern.is_a?(Regexp)

  grep(pattern, streams: false, object_streams: true)
    .select { |object| object.is_a?(Name) and object.parent.is_a?(Dictionary) and object.parent.key?(object) }
    .collect { |object|
    result = object.parent[object]
    follow_references ? result.solve : result
  }
end

#metadataObject

Returns a Hash of the information found in the metadata stream


81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/origami/metadata.rb', line 81

def 
   = self.Catalog.Metadata

  if .is_a?(Stream)
    doc = REXML::Document.new(.data)
    info = {}

    doc.elements.each('*/*/rdf:Description') do |description|
      description.attributes.each_attribute do |attr|
        case attr.prefix
        when 'pdf', 'xap'
          info[attr.name] = attr.value
        end
      end

      description.elements.each('*') do |element|
        value = (element.elements['.//rdf:li'] || element).text
        info[element.name] = value.to_s
      end
    end

    info
  end
end

#metadata?Boolean

Returns true if the document has a catalog metadata stream.

Returns:


74
75
76
# File 'lib/origami/metadata.rb', line 74

def metadata?
  self.Catalog.Metadata.is_a?(Stream)
end

#mod_dateObject


67
68
69
# File 'lib/origami/metadata.rb', line 67

def mod_date
  get_document_info_field(:ModDate)
end

#names(root) ⇒ Object

Returns a Hash of all names under the specified root name directory.


125
126
127
# File 'lib/origami/catalog.rb', line 125

def names(root)
  each_name(root).to_h
end

#onDocumentClose(action) ⇒ Object

Sets an action to run on document closing.

action

A JavaScript Action Object.


71
72
73
74
75
76
# File 'lib/origami/catalog.rb', line 71

def onDocumentClose(action)
  self.Catalog.AA ||= CatalogAdditionalActions.new
  self.Catalog.AA.WC = action

  self
end

#onDocumentOpen(action) ⇒ Object

Sets an action to run on document opening.

action

An Action Object.


61
62
63
64
65
# File 'lib/origami/catalog.rb', line 61

def onDocumentOpen(action)
  self.Catalog.OpenAction = action

  self
end

#onDocumentPrint(action) ⇒ Object

Sets an action to run on document printing.

action

A JavaScript Action Object.


82
83
84
85
86
87
# File 'lib/origami/catalog.rb', line 82

def onDocumentPrint(action)
  self.Catalog.AA ||= CatalogAdditionalActions.new
  self.Catalog.AA.WP = action

  self
end

#original_dataObject

Original data parsed to create this document, nil if created from scratch.


184
185
186
# File 'lib/origami/pdf.rb', line 184

def original_data
  @parser&.target_data
end

#original_filenameObject

Original file name if parsed from disk, nil otherwise.


170
171
172
# File 'lib/origami/pdf.rb', line 170

def original_filename
  @parser&.target_filename
end

#original_filesizeObject

Original file size if parsed from a data stream, nil otherwise.


177
178
179
# File 'lib/origami/pdf.rb', line 177

def original_filesize
  @parser&.target_filesize
end

#pagesObject

Returns an Enumerator of Page


61
62
63
64
65
# File 'lib/origami/page.rb', line 61

def pages
  init_page_tree

  self.Catalog.Pages.pages
end

#pdfa1?Boolean

Returns:


41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/origami/outputintents.rb', line 41

def pdfa1?
  self.Catalog.OutputIntents.is_a?(Array) and
    self.Catalog.OutputIntents.any? { |intent|
      intent.solve.S == OutputIntent::Intent::PDFA1
    } and
    metadata? and (
      doc = REXML::Document.new self.Catalog.Metadata.data
      REXML::XPath.match(doc, "*/*/rdf:Description[@xmlns:pdfaid]").any? { |desc|
        desc.elements["pdfaid:conformance"].text == "A" and
        desc.elements["pdfaid:part"].text == "1"
      }
    )
end

#portfolio?Boolean

Returns true if the document behaves as a portfolio for embedded files.

Returns:


26
27
28
# File 'lib/origami/collections.rb', line 26

def portfolio?
  self.Catalog.Collection.is_a?(Dictionary)
end

#producerObject


59
60
61
# File 'lib/origami/metadata.rb', line 59

def producer
  get_document_info_field(:Producer)
end

#register(root, name, value) ⇒ Object

Registers an object into a specific Names root dictionary.

root

The root dictionary (see Names::Root)

name

The value name.

value

The value to associate with this name.


95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/origami/catalog.rb', line 95

def register(root, name, value)
  self.Catalog.Names ||= Names.new

  value.set_indirect(true) unless value.is_a?(Reference)

  namesroot = self.Catalog.Names[root]
  if namesroot.nil?
    names = NameTreeNode.new(Names: []).set_indirect(true)
    self.Catalog.Names[root] = names
    names.Names << name << value
  else
    namesroot.solve[:Names] << name << value
  end
end

#remove_revision(index) ⇒ Object

Removes a whole document revision.

index

Revision index, first is 0.


378
379
380
381
382
383
384
385
386
387
388
389
# File 'lib/origami/pdf.rb', line 378

def remove_revision(index)
  if (index < 0) || (index > @revisions.size)
    raise IndexError, "Not a valid revision index"
  end

  if @revisions.size == 1
    raise InvalidPDFError, "Cannot remove last revision"
  end

  @revisions.delete_at(index)
  self
end

#remove_xrefsObject

Tries to strip any xrefs information off the document.


26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/origami/xreftable.rb', line 26

def remove_xrefs
  @revisions.reverse_each do |rev|
    if rev.xrefstm?
      delete_object(rev.xrefstm.reference)
    end

    if rev.trailer.XRefStm.is_a?(Integer)
      xrefstm = get_object_by_offset(rev.trailer.XRefStm)

      delete_object(xrefstm.reference) if xrefstm.is_a?(XRefStream)
    end

    rev.xrefstm = rev.xreftable = nil
  end
end

#resolve_name(root, name) ⇒ Object

Retrieve the corresponding value associated with name in the specified root name directory, or nil if the value does not exist.


115
116
117
118
119
120
# File 'lib/origami/catalog.rb', line 115

def resolve_name(root, name)
  namesroot = get_names_root(root)
  return nil if namesroot.nil?

  resolve_name_from_node(namesroot, name)
end

#save(path, params = {}) ⇒ Object Also known as: write

Saves the current document.

filename

The path where to save this PDF.


192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
# File 'lib/origami/pdf.rb', line 192

def save(path, params = {})
  options =
    {
      delinearize: true,
      recompile: true,
      decrypt: false
    }
  options.update(params)

  if frozen? # incompatible flags with frozen doc (signed)
    options[:recompile] =
      options[:rebuild_xrefs] =
        options[:noindent] =
          options[:obfuscate] = false
  end

  if path.respond_to?(:write)
    fd = path
  else
    path = File.expand_path(path)
    fd = File.open(path, 'w').binmode
    close = true
  end

  load_all_objects unless loaded?

  intents_as_pdfa1 if options[:intent] =~ /pdf[\/-]?A1?/i
  delinearize! if options[:delinearize] && linearized?
  compile(options) if options[:recompile]

  fd.write output(options)
  fd.close if close

  self
end

#save_upto(revision, filename) ⇒ Object

Saves the file up to given revision number. This can be useful to visualize the modifications over different incremental updates.

revision

The revision number to save.

filename

The path where to save this PDF.


235
236
237
# File 'lib/origami/pdf.rb', line 235

def save_upto(revision, filename)
  save(filename, up_to_revision: revision)
end

#set_extension_level(version, level) ⇒ Object

Sets PDF extension level and version. Only supported values are “1.7” and 3.


26
27
28
29
30
31
32
33
34
# File 'lib/origami/catalog.rb', line 26

def set_extension_level(version, level)
  exts = (self.Catalog.Extensions ||= Extensions.new)

  exts[:ADBE] = DeveloperExtension.new
  exts[:ADBE].BaseVersion = Name.new(version)
  exts[:ADBE].ExtensionLevel = level

  self
end

#sign(certificate, key, method: Signature::PKCS7_DETACHED, ca: [], annotation: nil, issuer: nil, location: nil, contact: nil, reason: nil) ⇒ Object

Sign the document with the given key and x509 certificate.

certificate

The X509 certificate containing the public key.

key

The private key associated with the certificate.

method

The PDF signature identifier.

ca

Optional CA certificates used to sign the user certificate.

annotation

Annotation associated with the signature.

issuer

Issuer name.

location

Signature location.

contact

Signer contact.

reason

Signing reason.


80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# File 'lib/origami/signature.rb', line 80

def sign(certificate, key,
  method: Signature::PKCS7_DETACHED,
  ca: [],
  annotation: nil,
  issuer: nil,
  location: nil,
  contact: nil,
  reason: nil)
  unless certificate.is_a?(OpenSSL::X509::Certificate)
    raise TypeError, "A OpenSSL::X509::Certificate object must be passed."
  end

  unless key.is_a?(OpenSSL::PKey::RSA)
    raise TypeError, "A OpenSSL::PKey::RSA object must be passed."
  end

  unless ca.is_a?(::Array)
    raise TypeError, "Expected an Array of CA certificates."
  end

  unless annotation.nil? || annotation.is_a?(Annotation::Widget::Signature)
    raise TypeError, "Expected a Annotation::Widget::Signature object."
  end

  #
  # XXX: Currently signing a linearized document will result in a broken document.
  # Delinearize the document first until we find a proper way to handle this case.
  #
  if linearized?
    delinearize!
  end

  digsig = Signature::DigitalSignature.new.set_indirect(true)

  if annotation.nil?
    annotation = Annotation::Widget::Signature.new
    annotation.Rect = Rectangle[llx: 0.0, lly: 0.0, urx: 0.0, ury: 0.0]
  end

  annotation.V = digsig
  add_fields(annotation)
  self.Catalog.AcroForm.SigFlags =
    InteractiveForm::SigFlags::SIGNATURES_EXIST | InteractiveForm::SigFlags::APPEND_ONLY

  digsig.Type = :Sig
  digsig.Contents = HexaString.new("\x00" * Signature.required_size(method, certificate, key, ca))
  digsig.Filter = :'Adobe.PPKLite'
  digsig.SubFilter = Name.new(method)
  digsig.ByteRange = [0, 0, 0, 0]
  digsig.Name = issuer

  digsig.Location = HexaString.new(location) if location
  digsig.ContactInfo = HexaString.new(contact) if contact
  digsig.Reason = HexaString.new(reason) if reason

  # PKCS1 signatures require a Cert entry.
  if method == Signature::PKCS1_RSA_SHA1
    digsig.Cert =
      if ca.empty?
        HexaString.new(certificate.to_der)
      else
        [HexaString.new(certificate.to_der)] + ca.map { |crt| HexaString.new(crt.to_der) }
      end
  end

  #
  #  Flattening the PDF to get file view.
  #
  compile

  #
  # Creating an empty Xref table to compute signature byte range.
  #
  rebuild_dummy_xrefs

  sig_offset = get_object_offset(digsig.no, digsig.generation) + digsig.signature_offset

  digsig.ByteRange[0] = 0
  digsig.ByteRange[1] = sig_offset
  digsig.ByteRange[2] = sig_offset + digsig.Contents.to_s.bytesize

  until digsig.ByteRange[3] == filesize - digsig.ByteRange[2]
    digsig.ByteRange[3] = filesize - digsig.ByteRange[2]
  end

  # From that point on, the file size remains constant

  #
  # Correct Xrefs variations caused by ByteRange modifications.
  #
  rebuild_xrefs

  file_data = output
  signable_data = file_data[digsig.ByteRange[0], digsig.ByteRange[1]] +
    file_data[digsig.ByteRange[2], digsig.ByteRange[3]]

  #
  # Computes and inserts the signature.
  #
  signature = Signature.compute(method, signable_data, certificate, key, ca)
  digsig.Contents[0, signature.size] = signature

  #
  # No more modification are allowed after signing.
  #
  freeze
end

#signatureObject

Raises:


293
294
295
296
297
298
299
300
301
# File 'lib/origami/signature.rb', line 293

def signature
  raise SignatureError, "Not a signed document" unless signed?

  each_field do |field|
    return field.V if (field.FT == :Sig) && field.V.is_a?(Dictionary)
  end

  raise SignatureError, "Cannot find digital signature"
end

#signed?Boolean

Returns whether the document contains a digital signature.

Returns:


191
192
193
194
195
196
197
# File 'lib/origami/signature.rb', line 191

def signed?
  self.Catalog.AcroForm.is_a?(Dictionary) and
    self.Catalog.AcroForm.SigFlags.is_a?(Integer) and
    (self.Catalog.AcroForm.SigFlags & InteractiveForm::SigFlags::SIGNATURES_EXIST != 0)
rescue InvalidReferenceError
  false
end

#subjectObject


47
48
49
# File 'lib/origami/metadata.rb', line 47

def subject
  get_document_info_field(:Subject)
end

#titleObject


39
40
41
# File 'lib/origami/metadata.rb', line 39

def title
  get_document_info_field(:Title)
end

#trailerObject

Returns the current trailer. This might be either a Trailer or XRefStream.

Raises:


27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/origami/trailer.rb', line 27

def trailer
  #
  # First look for a standard trailer dictionary
  #
  trl = if @revisions.last.trailer.dictionary?
    @revisions.last.trailer

  #
  # Otherwise look for a xref stream.
  #
  else
    @revisions.last.xrefstm
  end

  raise InvalidPDFError, "No trailer found" if trl.nil?

  trl
end

#usage_rights?Boolean

Returns:


288
289
290
291
# File 'lib/origami/signature.rb', line 288

def usage_rights?
  !self.Catalog.Perms.nil? and
    (!self.Catalog.Perms.has_key?(:UR3) or !self.Catalog.Perms.has_key?(:UR))
end

#verify(trusted_certs: [], use_system_store: false, allow_self_signed: false, &verify_cb) ⇒ Object

Verify a document signature.

_:trusted_certs_: an array of trusted X509 certificates.
_:use_system_store_: use the system store for certificate authorities.
_:allow_self_signed_: allow self-signed certificates in the verification chain.
_verify_cb_: block called when encountering a certificate that cannot be verified.
             Passed argument in the OpenSSL::X509::StoreContext.

37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/origami/signature.rb', line 37

def verify(trusted_certs: [],
  use_system_store: false,
  allow_self_signed: false,
  &verify_cb)
  digsig = signature
  digsig = digsig.cast_to(Signature::DigitalSignature) unless digsig.is_a?(Signature::DigitalSignature)

  signature = digsig.signature_data
  chain = digsig.certificate_chain
  subfilter = digsig.SubFilter.value

  store = OpenSSL::X509::Store.new
  store.set_default_paths if use_system_store
  trusted_certs.each { |ca| store.add_cert(ca) }

  store.verify_callback = ->(success, ctx) {
    return true if success

    error = ctx.error
    is_self_signed = error == OpenSSL::X509::V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT ||
      error == OpenSSL::X509::V_ERR_SELF_SIGNED_CERT_IN_CHAIN

    return true if is_self_signed && allow_self_signed && verify_cb.nil?

    verify_cb.call(ctx) unless verify_cb.nil?
  }

  data = extract_signed_data(digsig)
  Signature.verify(subfilter.to_s, data, signature, store, chain)
end

#xfa_form?Boolean

Returns:


55
56
57
# File 'lib/origami/xfa.rb', line 55

def xfa_form?
  form? and self.Catalog.AcroForm.key?(:XFA)
end