Module: Gitlab::Database::Migrations::BatchedBackgroundMigrationHelpers

Includes:
DynamicModelHelpers
Included in:
Gitlab::Database::MigrationHelpers
Defined in:
lib/gitlab/database/migrations/batched_background_migration_helpers.rb

Overview

BatchedBackgroundMigrations are a new approach to scheduling and executing background migrations, which uses persistent state in the database to track each migration. This avoids having to batch over an entire table and schedule a large number of sidekiq jobs upfront. It also provides for more flexibility as the migration runs, as it can be paused and restarted, and have configuration values like the batch size updated dynamically as the migration runs.

For now, these migrations are not considered ready for general use, for more information see the tracking epic: gitlab.com/groups/gitlab-org/-/epics/6751

Constant Summary collapse

NonExistentMigrationError =
Class.new(StandardError)
BATCH_SIZE =

Number of rows to process per job

1_000
SUB_BATCH_SIZE =

Number of rows to process per sub-batch

100
BATCH_CLASS_NAME =

Default batch class for batched migrations

'PrimaryKeyBatchingStrategy'
BATCH_MIN_VALUE =

Default minimum value for batched migrations

1
BATCH_MIN_DELAY =

Minimum delay between batched migrations

2.minutes.freeze
MIGRATION_NOT_FOUND_MESSAGE =
"Could not find batched background migration for the given configuration: %<configuration>s"
MINIMUM_PAUSE_MS =
100
ENFORCE_EARLY_FINALIZATION_FROM_VERSION =
'20240905124117'
EARLY_FINALIZATION_ERROR =
<<-MESSAGE.squeeze(' ').strip
  Batched migration should be finalized only after at-least one required stop from queuing it.
  This is to ensure that we are not breaking the upgrades for self-managed instances.

  For more info visit: https://docs.gitlab.com/ee/development/database/batched_background_migrations.html#finalize-a-batched-background-migration
MESSAGE

Instance Method Summary collapse

Methods included from DynamicModelHelpers

define_batchable_model, #each_batch, #each_batch_range

Instance Method Details

#delete_batched_background_migration(job_class_name, table_name, column_name, job_arguments) ⇒ Object

Deletes batched background migration for the given configuration.

job_class_name - The background migration job class as a string table_name - The name of the table the migration iterates over column_name - The name of the column the migration will batch over job_arguments - Migration arguments

Example:

delete_batched_background_migration(
  'CopyColumnUsingBackgroundMigrationJob',
  :events,
  :id,
  ['column1', 'column2'])


176
177
178
179
180
181
182
183
184
185
186
# File 'lib/gitlab/database/migrations/batched_background_migration_helpers.rb', line 176

def delete_batched_background_migration(job_class_name, table_name, column_name, job_arguments)
  Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.require_dml_mode!

  Gitlab::Database::BackgroundMigration::BatchedMigration.reset_column_information

  Gitlab::Database::BackgroundMigration::BatchedMigration
    .for_configuration(
      gitlab_schema_from_context, job_class_name, table_name, column_name, job_arguments,
      include_compatible: true
    ).delete_all
end

#ensure_batched_background_migration_is_finished(job_class_name:, table_name:, column_name:, job_arguments:, finalize: true, skip_early_finalization_validation: false) ⇒ Object



196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
# File 'lib/gitlab/database/migrations/batched_background_migration_helpers.rb', line 196

def ensure_batched_background_migration_is_finished(
  job_class_name:,
  table_name:,
  column_name:,
  job_arguments:,
  finalize: true,
  skip_early_finalization_validation: false
)
  Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.require_dml_mode!

  if transaction_open?
    raise 'The `ensure_batched_background_migration_is_finished` cannot be run inside a transaction. ' \
      'You can disable transactions by calling `disable_ddl_transaction!` in the body of ' \
      'your migration class.'
  end

  Gitlab::Database::BackgroundMigration::BatchedMigration.reset_column_information
  migration = Gitlab::Database::BackgroundMigration::BatchedMigration.find_for_configuration(
    gitlab_schema_from_context,
    job_class_name, table_name, column_name, job_arguments,
    include_compatible: true
  )

  configuration = {
    job_class_name: job_class_name,
    table_name: table_name,
    column_name: column_name,
    job_arguments: job_arguments
  }

  migration_not_found_message = format(MIGRATION_NOT_FOUND_MESSAGE, configuration: configuration)

  if ENV['DBLAB_ENVIRONMENT'] && migration.nil?
    raise NonExistentMigrationError, migration_not_found_message
  end

  return Gitlab::AppLogger.warn migration_not_found_message if migration.nil?

  if migration.respond_to?(:queued_migration_version) && !skip_early_finalization_validation
    prevent_early_finalization!(migration.queued_migration_version, version)
  end

  return if migration.finalized?

  if migration.finished?
    migration.confirm_finalize!
    return
  end

  finalize_batched_background_migration(job_class_name: job_class_name, table_name: table_name, column_name: column_name, job_arguments: job_arguments) if finalize

  if migration.reload.finished? # rubocop:disable Cop/ActiveRecordAssociationReload -- TODO: ensure that we have latest version of the batched migration
    migration.confirm_finalize!
    return
  end

  raise "Expected batched background migration for the given configuration to be marked as 'finished', " \
    "but it is '#{migration.status_name}':" \
    "\t#{configuration}" \
    "\n\n" \
    "Finalize it manually by running the following command in a `bash` or `sh` shell:" \
    "\n\n" \
    "\t#{migration.finalize_command}" \
    "\n\n" \
    "For more information, check the documentation" \
    "\n\n" \
    "\thttps://docs.gitlab.com/ee/update/background_migrations.html#database-migrations-failing-because-of-batched-background-migration-not-finished"
end

#finalize_batched_background_migration(job_class_name:, table_name:, column_name:, job_arguments:) ⇒ Object



134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/gitlab/database/migrations/batched_background_migration_helpers.rb', line 134

def finalize_batched_background_migration(job_class_name:, table_name:, column_name:, job_arguments:)
  Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.require_dml_mode!

  if transaction_open?
    raise 'The `finalize_batched_background_migration` cannot be run inside a transaction. ' \
      'You can disable transactions by calling `disable_ddl_transaction!` in the body of ' \
      'your migration class.'
  end

  Gitlab::Database::BackgroundMigration::BatchedMigration.reset_column_information

  migration = Gitlab::Database::BackgroundMigration::BatchedMigration.find_for_configuration(
    gitlab_schema_from_context, job_class_name, table_name, column_name, job_arguments,
    include_compatible: true
  )

  raise 'Could not find batched background migration' if migration.nil?

  with_restored_connection_stack do |restored_connection|
    Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.with_suppressed do
      Gitlab::Database::BackgroundMigration::BatchedMigrationRunner.finalize(
        job_class_name, table_name,
        column_name, job_arguments,
        connection: restored_connection)
    end
  end
end

#gitlab_schema_from_contextObject



188
189
190
191
192
193
194
# File 'lib/gitlab/database/migrations/batched_background_migration_helpers.rb', line 188

def gitlab_schema_from_context
  if respond_to?(:allowed_gitlab_schemas) # Gitlab::Database::Migration::V2_0
    Array(allowed_gitlab_schemas).first
  else                                    # Gitlab::Database::Migration::V1_0
    :gitlab_main
  end
end

#queue_batched_background_migration(job_class_name, batch_table_name, batch_column_name, *job_arguments, job_interval: BATCH_MIN_DELAY, batch_min_value: BATCH_MIN_VALUE, batch_max_value: nil, batch_class_name: BATCH_CLASS_NAME, batch_size: BATCH_SIZE, pause_ms: MINIMUM_PAUSE_MS, max_batch_size: nil, sub_batch_size: SUB_BATCH_SIZE, gitlab_schema: nil, min_cursor: nil, max_cursor: nil) ⇒ Object

Creates a batched background migration for the given table. A batched migration runs one job at a time, computing the bounds of the next batch based on the current migration settings and the previous batch bounds. Each job’s execution status is tracked in the database as the migration runs. The given job class must be present in the Gitlab::BackgroundMigration module, and the batch class (if specified) must be present in the Gitlab::BackgroundMigration::BatchingStrategies module.

If a migration with same job_class_name, table_name, column_name, and job_arguments already exists, this helper will log a warning and not create a new one.

job_class_name - The background migration job class as a string batch_table_name - The name of the table the migration will batch over batch_column_name - The name of the column the migration will batch over job_arguments - Extra arguments to pass to the job instance when the migration runs job_interval - The pause interval between each job’s execution, minimum of 2 minutes, defaults to BATCH_MIN_DELAY batch_min_value - The value in the column the batching will begin at batch_max_value - The value in the column the batching will end at, defaults to ‘SELECT MAX(batch_column)` batch_class_name - The name of the class that will be called to find the range of each next batch batch_size - The maximum number of rows per job sub_batch_size - The maximum number of rows processed per “iteration” within the job

*Returns the created BatchedMigration record*

Example:

queue_batched_background_migration(
  'CopyColumnUsingBackgroundMigrationJob',
  :events,
  :id,
  other_job_arguments: ['column1', 'column2'])

Where the background migration exists:

class Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJob
  def perform(start_id, end_id, batch_table, batch_column, sub_batch_size, *other_args)
    # do something
  end
end


71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/gitlab/database/migrations/batched_background_migration_helpers.rb', line 71

def queue_batched_background_migration( # rubocop:disable Metrics/ParameterLists
  job_class_name,
  batch_table_name,
  batch_column_name,
  *job_arguments,
  job_interval: BATCH_MIN_DELAY,
  batch_min_value: BATCH_MIN_VALUE,
  batch_max_value: nil,
  batch_class_name: BATCH_CLASS_NAME,
  batch_size: BATCH_SIZE,
  pause_ms: MINIMUM_PAUSE_MS,
  max_batch_size: nil,
  sub_batch_size: SUB_BATCH_SIZE,
  gitlab_schema: nil,
  min_cursor: nil,
  max_cursor: nil
)
  Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.require_dml_mode!

  gitlab_schema ||= gitlab_schema_from_context
  queued_migration_version = version

  Gitlab::Database::BackgroundMigration::BatchedMigration.reset_column_information

  if migration_already_exists?(gitlab_schema, job_class_name, batch_table_name, batch_column_name, job_arguments)
    return
  end

  job_interval = normalize_job_interval(job_interval)

  migration = Gitlab::Database::BackgroundMigration::BatchedMigration.new(
    job_class_name: job_class_name,
    table_name: batch_table_name,
    column_name: batch_column_name,
    interval: job_interval,
    pause_ms: pause_ms,
    batch_class_name: batch_class_name,
    batch_size: batch_size,
    sub_batch_size: sub_batch_size
  )

  migration.tap do |m|
    if cursor_based_migration?(m)
      setup_cursor_based_migration!(m, batch_table_name, job_arguments, min_cursor, max_cursor)
    else
      setup_legacy_migration!(m, batch_table_name, batch_min_value, batch_max_value, job_arguments)
    end
  end

  validate_job_arguments!(migration, job_arguments)

  assign_attributes_safely(
    migration,
    max_batch_size,
    batch_table_name,
    gitlab_schema,
    queued_migration_version
  )

  migration.save!
  migration
end