Class: Google::Cloud::DocumentAI::V1beta3::ProcessOptions

Inherits:
Object
  • Object
show all
Extended by:
Protobuf::MessageExts::ClassMethods
Includes:
Protobuf::MessageExts
Defined in:
proto_docs/google/cloud/documentai/v1beta3/document_processor_service.rb

Overview

Options for Process API

Defined Under Namespace

Classes: IndividualPageSelector, LayoutConfig

Instance Attribute Summary collapse

Instance Attribute Details

#from_end::Integer

Returns Only process certain pages from the end, same as above.

Note: The following fields are mutually exclusive: from_end, individual_page_selector, from_start. If a field in that set is populated, all other fields in the set will automatically be cleared.

Returns:

  • (::Integer)

    Only process certain pages from the end, same as above.

    Note: The following fields are mutually exclusive: from_end, individual_page_selector, from_start. If a field in that set is populated, all other fields in the set will automatically be cleared.



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'proto_docs/google/cloud/documentai/v1beta3/document_processor_service.rb', line 56

class ProcessOptions
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Serving config for layout parser processor.
  # @!attribute [rw] chunking_config
  #   @return [::Google::Cloud::DocumentAI::V1beta3::ProcessOptions::LayoutConfig::ChunkingConfig]
  #     Optional. Config for chunking in layout parser processor.
  # @!attribute [rw] return_images
  #   @return [::Boolean]
  #     Optional. Whether to include images in layout parser processor response.
  # @!attribute [rw] return_bounding_boxes
  #   @return [::Boolean]
  #     Optional. Whether to include bounding boxes in layout parser processor
  #     response.
  # @!attribute [rw] enable_image_annotation
  #   @return [::Boolean]
  #     Optional. Whether to include image annotations in layout parser response.
  # @!attribute [rw] enable_image_extraction
  #   @return [::Boolean]
  #     Optional. Whether to extract images in layout parser response.
  # @!attribute [rw] enable_llm_layout_parsing
  #   @return [::Boolean]
  #     Optional. Whether to refine PDF layout using LLM.
  # @!attribute [rw] enable_table_annotation
  #   @return [::Boolean]
  #     Optional. Whether to include table annotations in layout parser response.
  class LayoutConfig
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # Serving config for chunking.
    # @!attribute [rw] chunk_size
    #   @return [::Integer]
    #     Optional. The chunk sizes to use when splitting documents, in order of
    #     level.
    # @!attribute [rw] include_ancestor_headings
    #   @return [::Boolean]
    #     Optional. Whether or not to include ancestor headings when splitting.
    # @!attribute [rw] semantic_chunking_group_size
    #   @return [::Boolean]
    #     Optional. The number of tokens to group together when evaluating
    #     semantic similarity. THIS FIELD IS NOT YET USED.
    # @!attribute [rw] breakpoint_percentile_threshold
    #   @return [::Integer]
    #     Optional. The percentile of cosine dissimilarity that must be exceeded
    #     between a group of tokens and the next. The smaller this number is, the
    #     more chunks will be generated. THIS FIELD IS NOT YET USED.
    class ChunkingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end
  end

  # A list of individual page numbers.
  # @!attribute [rw] pages
  #   @return [::Array<::Integer>]
  #     Optional. Indices of the pages (starting from 1).
  class IndividualPageSelector
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end

#from_start::Integer

Returns Only process certain pages from the start. Process all if the document has fewer pages.

Note: The following fields are mutually exclusive: from_start, individual_page_selector, from_end. If a field in that set is populated, all other fields in the set will automatically be cleared.

Returns:

  • (::Integer)

    Only process certain pages from the start. Process all if the document has fewer pages.

    Note: The following fields are mutually exclusive: from_start, individual_page_selector, from_end. If a field in that set is populated, all other fields in the set will automatically be cleared.



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'proto_docs/google/cloud/documentai/v1beta3/document_processor_service.rb', line 56

class ProcessOptions
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Serving config for layout parser processor.
  # @!attribute [rw] chunking_config
  #   @return [::Google::Cloud::DocumentAI::V1beta3::ProcessOptions::LayoutConfig::ChunkingConfig]
  #     Optional. Config for chunking in layout parser processor.
  # @!attribute [rw] return_images
  #   @return [::Boolean]
  #     Optional. Whether to include images in layout parser processor response.
  # @!attribute [rw] return_bounding_boxes
  #   @return [::Boolean]
  #     Optional. Whether to include bounding boxes in layout parser processor
  #     response.
  # @!attribute [rw] enable_image_annotation
  #   @return [::Boolean]
  #     Optional. Whether to include image annotations in layout parser response.
  # @!attribute [rw] enable_image_extraction
  #   @return [::Boolean]
  #     Optional. Whether to extract images in layout parser response.
  # @!attribute [rw] enable_llm_layout_parsing
  #   @return [::Boolean]
  #     Optional. Whether to refine PDF layout using LLM.
  # @!attribute [rw] enable_table_annotation
  #   @return [::Boolean]
  #     Optional. Whether to include table annotations in layout parser response.
  class LayoutConfig
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # Serving config for chunking.
    # @!attribute [rw] chunk_size
    #   @return [::Integer]
    #     Optional. The chunk sizes to use when splitting documents, in order of
    #     level.
    # @!attribute [rw] include_ancestor_headings
    #   @return [::Boolean]
    #     Optional. Whether or not to include ancestor headings when splitting.
    # @!attribute [rw] semantic_chunking_group_size
    #   @return [::Boolean]
    #     Optional. The number of tokens to group together when evaluating
    #     semantic similarity. THIS FIELD IS NOT YET USED.
    # @!attribute [rw] breakpoint_percentile_threshold
    #   @return [::Integer]
    #     Optional. The percentile of cosine dissimilarity that must be exceeded
    #     between a group of tokens and the next. The smaller this number is, the
    #     more chunks will be generated. THIS FIELD IS NOT YET USED.
    class ChunkingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end
  end

  # A list of individual page numbers.
  # @!attribute [rw] pages
  #   @return [::Array<::Integer>]
  #     Optional. Indices of the pages (starting from 1).
  class IndividualPageSelector
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end

#individual_page_selector::Google::Cloud::DocumentAI::V1beta3::ProcessOptions::IndividualPageSelector

Returns Which pages to process (1-indexed).

Note: The following fields are mutually exclusive: individual_page_selector, from_start, from_end. If a field in that set is populated, all other fields in the set will automatically be cleared.

Returns:



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'proto_docs/google/cloud/documentai/v1beta3/document_processor_service.rb', line 56

class ProcessOptions
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Serving config for layout parser processor.
  # @!attribute [rw] chunking_config
  #   @return [::Google::Cloud::DocumentAI::V1beta3::ProcessOptions::LayoutConfig::ChunkingConfig]
  #     Optional. Config for chunking in layout parser processor.
  # @!attribute [rw] return_images
  #   @return [::Boolean]
  #     Optional. Whether to include images in layout parser processor response.
  # @!attribute [rw] return_bounding_boxes
  #   @return [::Boolean]
  #     Optional. Whether to include bounding boxes in layout parser processor
  #     response.
  # @!attribute [rw] enable_image_annotation
  #   @return [::Boolean]
  #     Optional. Whether to include image annotations in layout parser response.
  # @!attribute [rw] enable_image_extraction
  #   @return [::Boolean]
  #     Optional. Whether to extract images in layout parser response.
  # @!attribute [rw] enable_llm_layout_parsing
  #   @return [::Boolean]
  #     Optional. Whether to refine PDF layout using LLM.
  # @!attribute [rw] enable_table_annotation
  #   @return [::Boolean]
  #     Optional. Whether to include table annotations in layout parser response.
  class LayoutConfig
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # Serving config for chunking.
    # @!attribute [rw] chunk_size
    #   @return [::Integer]
    #     Optional. The chunk sizes to use when splitting documents, in order of
    #     level.
    # @!attribute [rw] include_ancestor_headings
    #   @return [::Boolean]
    #     Optional. Whether or not to include ancestor headings when splitting.
    # @!attribute [rw] semantic_chunking_group_size
    #   @return [::Boolean]
    #     Optional. The number of tokens to group together when evaluating
    #     semantic similarity. THIS FIELD IS NOT YET USED.
    # @!attribute [rw] breakpoint_percentile_threshold
    #   @return [::Integer]
    #     Optional. The percentile of cosine dissimilarity that must be exceeded
    #     between a group of tokens and the next. The smaller this number is, the
    #     more chunks will be generated. THIS FIELD IS NOT YET USED.
    class ChunkingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end
  end

  # A list of individual page numbers.
  # @!attribute [rw] pages
  #   @return [::Array<::Integer>]
  #     Optional. Indices of the pages (starting from 1).
  class IndividualPageSelector
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end

#layout_config::Google::Cloud::DocumentAI::V1beta3::ProcessOptions::LayoutConfig

Optional. Only applicable to LAYOUT_PARSER_PROCESSOR. Returns error if set on other processor types.

Returns:



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'proto_docs/google/cloud/documentai/v1beta3/document_processor_service.rb', line 56

class ProcessOptions
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Serving config for layout parser processor.
  # @!attribute [rw] chunking_config
  #   @return [::Google::Cloud::DocumentAI::V1beta3::ProcessOptions::LayoutConfig::ChunkingConfig]
  #     Optional. Config for chunking in layout parser processor.
  # @!attribute [rw] return_images
  #   @return [::Boolean]
  #     Optional. Whether to include images in layout parser processor response.
  # @!attribute [rw] return_bounding_boxes
  #   @return [::Boolean]
  #     Optional. Whether to include bounding boxes in layout parser processor
  #     response.
  # @!attribute [rw] enable_image_annotation
  #   @return [::Boolean]
  #     Optional. Whether to include image annotations in layout parser response.
  # @!attribute [rw] enable_image_extraction
  #   @return [::Boolean]
  #     Optional. Whether to extract images in layout parser response.
  # @!attribute [rw] enable_llm_layout_parsing
  #   @return [::Boolean]
  #     Optional. Whether to refine PDF layout using LLM.
  # @!attribute [rw] enable_table_annotation
  #   @return [::Boolean]
  #     Optional. Whether to include table annotations in layout parser response.
  class LayoutConfig
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # Serving config for chunking.
    # @!attribute [rw] chunk_size
    #   @return [::Integer]
    #     Optional. The chunk sizes to use when splitting documents, in order of
    #     level.
    # @!attribute [rw] include_ancestor_headings
    #   @return [::Boolean]
    #     Optional. Whether or not to include ancestor headings when splitting.
    # @!attribute [rw] semantic_chunking_group_size
    #   @return [::Boolean]
    #     Optional. The number of tokens to group together when evaluating
    #     semantic similarity. THIS FIELD IS NOT YET USED.
    # @!attribute [rw] breakpoint_percentile_threshold
    #   @return [::Integer]
    #     Optional. The percentile of cosine dissimilarity that must be exceeded
    #     between a group of tokens and the next. The smaller this number is, the
    #     more chunks will be generated. THIS FIELD IS NOT YET USED.
    class ChunkingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end
  end

  # A list of individual page numbers.
  # @!attribute [rw] pages
  #   @return [::Array<::Integer>]
  #     Optional. Indices of the pages (starting from 1).
  class IndividualPageSelector
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end

#ocr_config::Google::Cloud::DocumentAI::V1beta3::OcrConfig

Only applicable to OCR_PROCESSOR and FORM_PARSER_PROCESSOR. Returns error if set on other processor types.

Returns:



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'proto_docs/google/cloud/documentai/v1beta3/document_processor_service.rb', line 56

class ProcessOptions
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Serving config for layout parser processor.
  # @!attribute [rw] chunking_config
  #   @return [::Google::Cloud::DocumentAI::V1beta3::ProcessOptions::LayoutConfig::ChunkingConfig]
  #     Optional. Config for chunking in layout parser processor.
  # @!attribute [rw] return_images
  #   @return [::Boolean]
  #     Optional. Whether to include images in layout parser processor response.
  # @!attribute [rw] return_bounding_boxes
  #   @return [::Boolean]
  #     Optional. Whether to include bounding boxes in layout parser processor
  #     response.
  # @!attribute [rw] enable_image_annotation
  #   @return [::Boolean]
  #     Optional. Whether to include image annotations in layout parser response.
  # @!attribute [rw] enable_image_extraction
  #   @return [::Boolean]
  #     Optional. Whether to extract images in layout parser response.
  # @!attribute [rw] enable_llm_layout_parsing
  #   @return [::Boolean]
  #     Optional. Whether to refine PDF layout using LLM.
  # @!attribute [rw] enable_table_annotation
  #   @return [::Boolean]
  #     Optional. Whether to include table annotations in layout parser response.
  class LayoutConfig
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # Serving config for chunking.
    # @!attribute [rw] chunk_size
    #   @return [::Integer]
    #     Optional. The chunk sizes to use when splitting documents, in order of
    #     level.
    # @!attribute [rw] include_ancestor_headings
    #   @return [::Boolean]
    #     Optional. Whether or not to include ancestor headings when splitting.
    # @!attribute [rw] semantic_chunking_group_size
    #   @return [::Boolean]
    #     Optional. The number of tokens to group together when evaluating
    #     semantic similarity. THIS FIELD IS NOT YET USED.
    # @!attribute [rw] breakpoint_percentile_threshold
    #   @return [::Integer]
    #     Optional. The percentile of cosine dissimilarity that must be exceeded
    #     between a group of tokens and the next. The smaller this number is, the
    #     more chunks will be generated. THIS FIELD IS NOT YET USED.
    class ChunkingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end
  end

  # A list of individual page numbers.
  # @!attribute [rw] pages
  #   @return [::Array<::Integer>]
  #     Optional. Indices of the pages (starting from 1).
  class IndividualPageSelector
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end

#schema_override::Google::Cloud::DocumentAI::V1beta3::DocumentSchema

Optional. Override the schema of the ProcessorVersion. Will return an Invalid Argument error if this field is set when the underlying ProcessorVersion doesn't support schema override.

Returns:



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'proto_docs/google/cloud/documentai/v1beta3/document_processor_service.rb', line 56

class ProcessOptions
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Serving config for layout parser processor.
  # @!attribute [rw] chunking_config
  #   @return [::Google::Cloud::DocumentAI::V1beta3::ProcessOptions::LayoutConfig::ChunkingConfig]
  #     Optional. Config for chunking in layout parser processor.
  # @!attribute [rw] return_images
  #   @return [::Boolean]
  #     Optional. Whether to include images in layout parser processor response.
  # @!attribute [rw] return_bounding_boxes
  #   @return [::Boolean]
  #     Optional. Whether to include bounding boxes in layout parser processor
  #     response.
  # @!attribute [rw] enable_image_annotation
  #   @return [::Boolean]
  #     Optional. Whether to include image annotations in layout parser response.
  # @!attribute [rw] enable_image_extraction
  #   @return [::Boolean]
  #     Optional. Whether to extract images in layout parser response.
  # @!attribute [rw] enable_llm_layout_parsing
  #   @return [::Boolean]
  #     Optional. Whether to refine PDF layout using LLM.
  # @!attribute [rw] enable_table_annotation
  #   @return [::Boolean]
  #     Optional. Whether to include table annotations in layout parser response.
  class LayoutConfig
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # Serving config for chunking.
    # @!attribute [rw] chunk_size
    #   @return [::Integer]
    #     Optional. The chunk sizes to use when splitting documents, in order of
    #     level.
    # @!attribute [rw] include_ancestor_headings
    #   @return [::Boolean]
    #     Optional. Whether or not to include ancestor headings when splitting.
    # @!attribute [rw] semantic_chunking_group_size
    #   @return [::Boolean]
    #     Optional. The number of tokens to group together when evaluating
    #     semantic similarity. THIS FIELD IS NOT YET USED.
    # @!attribute [rw] breakpoint_percentile_threshold
    #   @return [::Integer]
    #     Optional. The percentile of cosine dissimilarity that must be exceeded
    #     between a group of tokens and the next. The smaller this number is, the
    #     more chunks will be generated. THIS FIELD IS NOT YET USED.
    class ChunkingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end
  end

  # A list of individual page numbers.
  # @!attribute [rw] pages
  #   @return [::Array<::Integer>]
  #     Optional. Indices of the pages (starting from 1).
  class IndividualPageSelector
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end