Class: Google::Cloud::Dataplex::V1::DataDiscoverySpec

Inherits:
Object
  • Object
show all
Extended by:
Protobuf::MessageExts::ClassMethods
Includes:
Protobuf::MessageExts
Defined in:
proto_docs/google/cloud/dataplex/v1/data_discovery.rb

Overview

Spec for a data discovery scan.

Defined Under Namespace

Classes: BigQueryPublishingConfig, StorageConfig

Instance Attribute Summary collapse

Instance Attribute Details

#bigquery_publishing_config::Google::Cloud::Dataplex::V1::DataDiscoverySpec::BigQueryPublishingConfig

Returns Optional. Configuration for metadata publishing.

Returns:



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# File 'proto_docs/google/cloud/dataplex/v1/data_discovery.rb', line 31

class DataDiscoverySpec
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Describes BigQuery publishing configurations.
  # @!attribute [rw] table_type
  #   @return [::Google::Cloud::Dataplex::V1::DataDiscoverySpec::BigQueryPublishingConfig::TableType]
  #     Optional. Determines whether to  publish discovered tables as BigLake
  #     external tables or non-BigLake external tables.
  # @!attribute [rw] connection
  #   @return [::String]
  #     Optional. The BigQuery connection used to create BigLake tables.
  #     Must be in the form
  #     `projects/{project_id}/locations/{location_id}/connections/{connection_id}`
  # @!attribute [rw] location
  #   @return [::String]
  #     Optional. The location of the BigQuery dataset to publish BigLake
  #     external or non-BigLake external tables to.
  #     1. If the Cloud Storage bucket is located in a multi-region bucket, then
  #     BigQuery dataset can be in the same multi-region bucket or any single
  #     region that is included in the same multi-region bucket. The datascan can
  #     be created in any single region that is included in the same multi-region
  #     bucket
  #     2. If the Cloud Storage bucket is located in a dual-region bucket, then
  #     BigQuery dataset can be located in regions that are included in the
  #     dual-region bucket, or in a multi-region that includes the dual-region.
  #     The datascan can be created in any single region that is included in the
  #     same dual-region bucket.
  #     3. If the Cloud Storage bucket is located in a single region, then
  #     BigQuery dataset can be in the same single region or any multi-region
  #     bucket that includes the same single region. The datascan will be created
  #     in the same single region as the bucket.
  #     4. If the BigQuery dataset is in single region, it must be in the same
  #     single region as the datascan.
  #
  #     For supported values, refer to
  #     https://cloud.google.com/bigquery/docs/locations#supported_locations.
  class BigQueryPublishingConfig
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # Determines how discovered tables are published.
    module TableType
      # Table type unspecified.
      TABLE_TYPE_UNSPECIFIED = 0

      # Default. Discovered tables are published as BigQuery external tables
      # whose data is accessed using the credentials of the user querying the
      # table.
      EXTERNAL = 1

      # Discovered tables are published as BigLake external tables whose data
      # is accessed using the credentials of the associated BigQuery
      # connection.
      BIGLAKE = 2
    end
  end

  # Configurations related to Cloud Storage as the data source.
  # @!attribute [rw] include_patterns
  #   @return [::Array<::String>]
  #     Optional. Defines the data to include during discovery when only a subset
  #     of the data should be considered. Provide a list of patterns that
  #     identify the data to include. For Cloud Storage bucket assets, these
  #     patterns are interpreted as glob patterns used to match object names. For
  #     BigQuery dataset assets, these patterns are interpreted as patterns to
  #     match table names.
  # @!attribute [rw] exclude_patterns
  #   @return [::Array<::String>]
  #     Optional. Defines the data to exclude during discovery. Provide a list of
  #     patterns that identify the data to exclude. For Cloud Storage bucket
  #     assets, these patterns are interpreted as glob patterns used to match
  #     object names. For BigQuery dataset assets, these patterns are interpreted
  #     as patterns to match table names.
  # @!attribute [rw] csv_options
  #   @return [::Google::Cloud::Dataplex::V1::DataDiscoverySpec::StorageConfig::CsvOptions]
  #     Optional. Configuration for CSV data.
  # @!attribute [rw] json_options
  #   @return [::Google::Cloud::Dataplex::V1::DataDiscoverySpec::StorageConfig::JsonOptions]
  #     Optional. Configuration for JSON data.
  class StorageConfig
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # Describes CSV and similar semi-structured data formats.
    # @!attribute [rw] header_rows
    #   @return [::Integer]
    #     Optional. The number of rows to interpret as header rows that should be
    #     skipped when reading data rows.
    # @!attribute [rw] delimiter
    #   @return [::String]
    #     Optional. The delimiter that is used to separate values. The default is
    #     `,` (comma).
    # @!attribute [rw] encoding
    #   @return [::String]
    #     Optional. The character encoding of the data. The default is UTF-8.
    # @!attribute [rw] type_inference_disabled
    #   @return [::Boolean]
    #     Optional. Whether to disable the inference of data types for CSV data.
    #     If true, all columns are registered as strings.
    # @!attribute [rw] quote
    #   @return [::String]
    #     Optional. The character used to quote column values. Accepts `"`
    #     (double quotation mark) or `'` (single quotation mark). If unspecified,
    #     defaults to `"` (double quotation mark).
    class CsvOptions
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end

    # Describes JSON data format.
    # @!attribute [rw] encoding
    #   @return [::String]
    #     Optional. The character encoding of the data. The default is UTF-8.
    # @!attribute [rw] type_inference_disabled
    #   @return [::Boolean]
    #     Optional. Whether to disable the inference of data types for JSON data.
    #     If true, all columns are registered as their primitive types
    #     (strings, number, or boolean).
    class JsonOptions
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end
  end
end

#storage_config::Google::Cloud::Dataplex::V1::DataDiscoverySpec::StorageConfig

Returns Cloud Storage related configurations.

Returns:



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# File 'proto_docs/google/cloud/dataplex/v1/data_discovery.rb', line 31

class DataDiscoverySpec
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Describes BigQuery publishing configurations.
  # @!attribute [rw] table_type
  #   @return [::Google::Cloud::Dataplex::V1::DataDiscoverySpec::BigQueryPublishingConfig::TableType]
  #     Optional. Determines whether to  publish discovered tables as BigLake
  #     external tables or non-BigLake external tables.
  # @!attribute [rw] connection
  #   @return [::String]
  #     Optional. The BigQuery connection used to create BigLake tables.
  #     Must be in the form
  #     `projects/{project_id}/locations/{location_id}/connections/{connection_id}`
  # @!attribute [rw] location
  #   @return [::String]
  #     Optional. The location of the BigQuery dataset to publish BigLake
  #     external or non-BigLake external tables to.
  #     1. If the Cloud Storage bucket is located in a multi-region bucket, then
  #     BigQuery dataset can be in the same multi-region bucket or any single
  #     region that is included in the same multi-region bucket. The datascan can
  #     be created in any single region that is included in the same multi-region
  #     bucket
  #     2. If the Cloud Storage bucket is located in a dual-region bucket, then
  #     BigQuery dataset can be located in regions that are included in the
  #     dual-region bucket, or in a multi-region that includes the dual-region.
  #     The datascan can be created in any single region that is included in the
  #     same dual-region bucket.
  #     3. If the Cloud Storage bucket is located in a single region, then
  #     BigQuery dataset can be in the same single region or any multi-region
  #     bucket that includes the same single region. The datascan will be created
  #     in the same single region as the bucket.
  #     4. If the BigQuery dataset is in single region, it must be in the same
  #     single region as the datascan.
  #
  #     For supported values, refer to
  #     https://cloud.google.com/bigquery/docs/locations#supported_locations.
  class BigQueryPublishingConfig
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # Determines how discovered tables are published.
    module TableType
      # Table type unspecified.
      TABLE_TYPE_UNSPECIFIED = 0

      # Default. Discovered tables are published as BigQuery external tables
      # whose data is accessed using the credentials of the user querying the
      # table.
      EXTERNAL = 1

      # Discovered tables are published as BigLake external tables whose data
      # is accessed using the credentials of the associated BigQuery
      # connection.
      BIGLAKE = 2
    end
  end

  # Configurations related to Cloud Storage as the data source.
  # @!attribute [rw] include_patterns
  #   @return [::Array<::String>]
  #     Optional. Defines the data to include during discovery when only a subset
  #     of the data should be considered. Provide a list of patterns that
  #     identify the data to include. For Cloud Storage bucket assets, these
  #     patterns are interpreted as glob patterns used to match object names. For
  #     BigQuery dataset assets, these patterns are interpreted as patterns to
  #     match table names.
  # @!attribute [rw] exclude_patterns
  #   @return [::Array<::String>]
  #     Optional. Defines the data to exclude during discovery. Provide a list of
  #     patterns that identify the data to exclude. For Cloud Storage bucket
  #     assets, these patterns are interpreted as glob patterns used to match
  #     object names. For BigQuery dataset assets, these patterns are interpreted
  #     as patterns to match table names.
  # @!attribute [rw] csv_options
  #   @return [::Google::Cloud::Dataplex::V1::DataDiscoverySpec::StorageConfig::CsvOptions]
  #     Optional. Configuration for CSV data.
  # @!attribute [rw] json_options
  #   @return [::Google::Cloud::Dataplex::V1::DataDiscoverySpec::StorageConfig::JsonOptions]
  #     Optional. Configuration for JSON data.
  class StorageConfig
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # Describes CSV and similar semi-structured data formats.
    # @!attribute [rw] header_rows
    #   @return [::Integer]
    #     Optional. The number of rows to interpret as header rows that should be
    #     skipped when reading data rows.
    # @!attribute [rw] delimiter
    #   @return [::String]
    #     Optional. The delimiter that is used to separate values. The default is
    #     `,` (comma).
    # @!attribute [rw] encoding
    #   @return [::String]
    #     Optional. The character encoding of the data. The default is UTF-8.
    # @!attribute [rw] type_inference_disabled
    #   @return [::Boolean]
    #     Optional. Whether to disable the inference of data types for CSV data.
    #     If true, all columns are registered as strings.
    # @!attribute [rw] quote
    #   @return [::String]
    #     Optional. The character used to quote column values. Accepts `"`
    #     (double quotation mark) or `'` (single quotation mark). If unspecified,
    #     defaults to `"` (double quotation mark).
    class CsvOptions
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end

    # Describes JSON data format.
    # @!attribute [rw] encoding
    #   @return [::String]
    #     Optional. The character encoding of the data. The default is UTF-8.
    # @!attribute [rw] type_inference_disabled
    #   @return [::Boolean]
    #     Optional. Whether to disable the inference of data types for JSON data.
    #     If true, all columns are registered as their primitive types
    #     (strings, number, or boolean).
    class JsonOptions
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end
  end
end