Upgrade from 14.1.7 to 14.2.7 makes gitlab unreachable [DB Migration Issues]

Hi folks!

May be some one already faced an issue with migration from 14.1.7 to 14.2.7.

I’m upgrading it as documented here Upgrading GitLab | GitLab

I checkthat all migrations are finished. (there is no migrations in background )

but I still can’t access gitlab interface, and the following logs syas that it’s timeout.

==> /var/log/gitlab/nginx/gitlab_access.log <==
10.47.0.0 - - [28/Nov/2021:15:59:02 +0000] "POST /api/v4/jobs/request HTTP/1.1" 204 0 "" "gitlab-runner 14.1.0 (14-1-stable; go1.13.8; linux/amd64)" -

==> /var/log/gitlab/gitlab-rails/production.log <==
  
Rack::Timeout::RequestTimeoutException (Request ran for longer than 60000ms):
  
app/models/concerns/cacheable_attributes.rb:19:in `current_without_cache'
app/models/concerns/cacheable_attributes.rb:55:in `current'
lib/gitlab/current_settings.rb:60:in `uncached_application_settings'
lib/gitlab/current_settings.rb:42:in `ensure_application_settings!'
lib/gitlab/current_settings.rb:15:in `block in current_application_settings'
lib/gitlab/safe_request_store.rb:12:in `fetch'
lib/gitlab/current_settings.rb:15:in `current_application_settings'
lib/gitlab/throttle.rb:8:in `settings'
lib/gitlab/rack_attack/request.rb:64:in `throttle_unauthenticated?'
lib/gitlab/rack_attack.rb:86:in `block in configure_throttles'
lib/gitlab/middleware/read_only/controller.rb:50:in `call'
lib/gitlab/middleware/read_only.rb:18:in `call'
lib/gitlab/middleware/same_site_cookies.rb:27:in `call'
lib/gitlab/middleware/handle_malformed_strings.rb:21:in `call'
lib/gitlab/middleware/basic_health_check.rb:25:in `call'
lib/gitlab/middleware/handle_ip_spoof_attack_error.rb:25:in `call'
lib/gitlab/middleware/request_context.rb:21:in `call'
config/initializers/fix_local_cache_middleware.rb:11:in `call'
lib/gitlab/middleware/rack_multipart_tempfile_factory.rb:19:in `call'
lib/gitlab/metrics/requests_rack_middleware.rb:74:in `call'
lib/gitlab/middleware/release_env.rb:12:in `call'

==> /var/log/gitlab/puma/puma_stderr.log <==
source=rack-timeout id=01FNKM918AT01VKE0WC1GH9D1C timeout=60000ms service=60000ms state=timed_out at=error

==> /var/log/gitlab/puma/puma_stdout.log <==
{"timestamp":"2021-11-28T16:08:06.050Z","pid":398,"message":"PumaWorkerKiller: Consuming 6448.4453125 mb with master and 8 workers."}

==> /var/log/gitlab/gitlab-workhorse/current <==
{"correlation_id":"01FNKMXEBXEMWS0SCGWR5AGBR2","duration_ms":50000,"error":"badgateway: failed to receive response: context canceled","level":"error","method":"POST","msg":"","time":"2021-11-28T16:10:02Z","uri":"/api/v4/jobs/request"}
{"content_type":"text/plain","correlation_id":"01FNKMXEBXEMWS0SCGWR5AGBR2","duration_ms":50001,"host":"git.muffs.ru","level":"info","method":"POST","msg":"access","proto":"HTTP/1.1","referrer":"","remote_addr":"127.0.0.1:0","remote_ip":"127.0.0.1","route":"^/api/v4/jobs/request\\z","status":502,"system":"http","time":"2021-11-28T16:10:02Z","ttfb_ms":50001,"uri":"/api/v4/jobs/request","user_agent":"gitlab-runner 14.4.0 (14-4-stable; go1.13.8; linux/amd64)","written_bytes":24}

I can’t see something specific or errors in log except badgateway, that can help me to fix this issue.

==> /var/log/gitlab/gitlab-workhorse/current <==
{"correlation_id":"01FNKMNPYD7M7ZAXY1F8ER16T7","duration_ms":50002,"error":"badgateway: failed to receive response: context canceled","level":"error","method":"GET","msg":"","time":"2021-11-28T16:05:48Z","uri":"/"}
{"content_type":"text/html; charset=utf-8","correlation_id":"01FNKMNPYD7M7ZAXY1F8ER16T7","duration_ms":50002,"host":"git.muffs.ru","level":"info","method":"GET","msg":"access","proto":"HTTP/1.1","referrer":"","remote_addr":"127.0.0.1:0","remote_ip":"127.0.0.1","route":"","status":502,"system":"http","time":"2021-11-28T16:05:48Z","ttfb_ms":50002,"uri":"/","user_agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36","written_bytes":2940}


==> /var/log/gitlab/sidekiq/current <==
{"severity":"INFO","time":"2021-11-28T16:05:15.980Z","queue":"cronjob:user_status_cleanup_batch","args":[],"class":"UserStatusCleanup::BatchWorker","retry":0,"version":0,"queue_namespace":"cronjob","jid":"d32b3e039c98515f41308d9a","created_at":"2021-11-28T16:05:15.978Z","meta.caller_id":"Cronjob","meta.feature_category":"users","correlation_id":"fbab6c15b5d939c651178dae6dc53304","idempotency_key":"resque:gitlab:duplicate:cronjob:user_status_cleanup_batch:63596f84f2aa01599e5d284b58c5b497d0d152817e7aa09bae73984558ee32bf","enqueued_at":"2021-11-28T16:05:15.979Z","job_size_bytes":2,"pid":416,"message":"UserStatusCleanup::BatchWorker JID-d32b3e039c98515f41308d9a: start","job_status":"start","scheduling_latency_s":0.000978}
{"severity":"INFO","time":"2021-11-28T16:05:16.020Z","queue":"cronjob:user_status_cleanup_batch","args":[],"class":"UserStatusCleanup::BatchWorker","retry":0,"version":0,"queue_namespace":"cronjob","jid":"d32b3e039c98515f41308d9a","created_at":"2021-11-28T16:05:15.978Z","meta.caller_id":"Cronjob","meta.feature_category":"users","correlation_id":"fbab6c15b5d939c651178dae6dc53304","idempotency_key":"resque:gitlab:duplicate:cronjob:user_status_cleanup_batch:63596f84f2aa01599e5d284b58c5b497d0d152817e7aa09bae73984558ee32bf","enqueued_at":"2021-11-28T16:05:15.979Z","job_size_bytes":2,"pid":416,"message":"UserStatusCleanup::BatchWorker JID-d32b3e039c98515f41308d9a: done: 0.039638 sec","job_status":"done","scheduling_latency_s":0.000978,"redis_calls":2,"redis_duration_s":0.035593,"redis_read_bytes":2,"redis_write_bytes":202,"redis_queues_calls":2,"redis_queues_duration_s":0.035593,"redis_queues_read_bytes":2,"redis_queues_write_bytes":202,"db_count":1,"db_write_count":0,"db_cached_count":0,"cpu_s":0.002001,"mem_objects":902,"mem_bytes":104824,"mem_mallocs":148,"mem_total_bytes":140904,"duration_s":0.039638,"completed_at":"2021-11-28T16:05:16.020Z","db_duration_s":0.000709}
{"severity":"INFO","time":"2021-11-28T16:05:16.022Z","queue":"cronjob:database_batched_background_migration","args":[],"class":"Database::BatchedBackgroundMigrationWorker","retry":0,"version":0,"queue_namespace":"cronjob","jid":"556a7433de3714bf52123113","created_at":"2021-11-28T16:05:16.018Z","meta.caller_id":"Cronjob","meta.feature_category":"database","correlation_id":"5976ea7a214fb1d3982e2bf9314c8112","idempotency_key":"resque:gitlab:duplicate:cronjob:database_batched_background_migration:592d9619e1997b640b70ce6a22f6713bc7793bb7a4e342b7380d90b691fcd6ae","enqueued_at":"2021-11-28T16:05:16.020Z","job_size_bytes":2,"pid":416,"message":"Database::BatchedBackgroundMigrationWorker JID-556a7433de3714bf52123113: start","job_status":"start","scheduling_latency_s":0.001873}
{"severity":"INFO","time":"2021-11-28T16:05:16.029Z","queue":"cronjob:database_batched_background_migration","args":[],"class":"Database::BatchedBackgroundMigrationWorker","retry":0,"version":0,"queue_namespace":"cronjob","jid":"556a7433de3714bf52123113","created_at":"2021-11-28T16:05:16.018Z","meta.caller_id":"Cronjob","meta.feature_category":"database","correlation_id":"5976ea7a214fb1d3982e2bf9314c8112","idempotency_key":"resque:gitlab:duplicate:cronjob:database_batched_background_migration:592d9619e1997b640b70ce6a22f6713bc7793bb7a4e342b7380d90b691fcd6ae","enqueued_at":"2021-11-28T16:05:16.020Z","job_size_bytes":2,"pid":416,"message":"Database::BatchedBackgroundMigrationWorker JID-556a7433de3714bf52123113: done: 0.007573 sec","job_status":"done","scheduling_latency_s":0.001873,"redis_calls":3,"redis_duration_s":0.0014629999999999999,"redis_read_bytes":124,"redis_write_bytes":249,"redis_cache_calls":1,"redis_cache_duration_s":0.000382,"redis_cache_read_bytes":122,"redis_cache_write_bytes":35,"redis_queues_calls":2,"redis_queues_duration_s":0.001081,"redis_queues_read_bytes":2,"redis_queues_write_bytes":214,"db_count":1,"db_write_count":0,"db_cached_count":0,"cpu_s":0.002275,"mem_objects":1201,"mem_bytes":159880,"mem_mallocs":250,"mem_total_bytes":207920,"duration_s":0.007573,"completed_at":"2021-11-28T16:05:16.029Z","db_duration_s":0.000459}
{"severity":"INFO","time":"2021-11-28T16:05:16.030Z","queue":"cronjob:schedule_merge_request_cleanup_refs","args":[],"class":"ScheduleMergeRequestCleanupRefsWorker","retry":0,"version":0,"queue_namespace":"cronjob","jid":"ca97f9a3889feaa95bc84676","created_at":"2021-11-28T16:05:16.026Z","meta.caller_id":"Cronjob","meta.feature_category":"code_review","correlation_id":"fbde5dc3bc5bf848ef0081428d5f491a","idempotency_key":"resque:gitlab:duplicate:cronjob:schedule_merge_request_cleanup_refs:33e8a9dcd4c9780ad0ea123ad7ccbabde1aa1e90ffcbb928434ba4b5800a5811","enqueued_at":"2021-11-28T16:05:16.028Z","job_size_bytes":2,"pid":416,"message":"ScheduleMergeRequestCleanupRefsWorker JID-ca97f9a3889feaa95bc84676: start","job_status":"start","scheduling_latency_s":0.002402}
{"severity":"INFO","time":"2021-11-28T16:05:16.107Z","queue":"cronjob:schedule_merge_request_cleanup_refs","args":[],"class":"ScheduleMergeRequestCleanupRefsWorker","retry":0,"version":0,"queue_namespace":"cronjob","jid":"ca97f9a3889feaa95bc84676","created_at":"2021-11-28T16:05:16.026Z","meta.caller_id":"Cronjob","meta.feature_category":"code_review","correlation_id":"fbde5dc3bc5bf848ef0081428d5f491a","idempotency_key":"resque:gitlab:duplicate:cronjob:schedule_merge_request_cleanup_refs:33e8a9dcd4c9780ad0ea123ad7ccbabde1aa1e90ffcbb928434ba4b5800a5811","enqueued_at":"2021-11-28T16:05:16.028Z","job_size_bytes":2,"pid":416,"message":"ScheduleMergeRequestCleanupRefsWorker JID-ca97f9a3889feaa95bc84676: done: 0.076687 sec","job_status":"done","scheduling_latency_s":0.002402,"redis_calls":3,"redis_duration_s":0.073952,"redis_read_bytes":203,"redis_write_bytes":273,"redis_cache_calls":1,"redis_cache_duration_s":0.000393,"redis_cache_read_bytes":201,"redis_cache_write_bytes":61,"redis_queues_calls":2,"redis_queues_duration_s":0.073559,"redis_queues_read_bytes":2,"redis_queues_write_bytes":212,"db_count":0,"db_write_count":0,"db_cached_count":0,"cpu_s":0.001578,"mem_objects":883,"mem_bytes":153936,"mem_mallocs":192,"mem_total_bytes":189256,"duration_s":0.076687,"completed_at":"2021-11-28T16:05:16.107Z","db_duration_s":0.0}

Would appreciate any advice, thanks in advance!

I assume it’s google captcha

p.s. nope, not captcha =(

I ended up with tuning up configuration:

It’ really wired when version 14.1.7 worked like a charm, and after migration I needed up with such config ;(

puma['worker_processes'] = 0
sidekiq['max_concurrency'] = 10
gitlab_rails['env'] = {
  'MALLOC_CONF' => 'dirty_decay_ms:1000,muzzy_decay_ms:1000'
}
gitaly['env'] = {
  'LD_PRELOAD' => '/opt/gitlab/embedded/lib/libjemalloc.so',
  'MALLOC_CONF' => 'dirty_decay_ms:1000,muzzy_decay_ms:1000'
}
gitlab_rails['env'] = {
   'GITLAB_RAILS_RACK_TIMEOUT' => 600
 }

puma[‘worker_processes’] = 0 - this works

puma[‘worker_processes’] = default doesn’t

image: gitlab/gitlab-ce:14.2.7-ce.0

WTF, having lot’s of spare resources, ton’s of ram and spare io, gitlab can’t work with multiple puma[‘worker_processes’] :frowning: :confounded:

and fresh installation without previous data also works like a charm :sob: 14.4.2

with multiple - puma[‘worker_processes’]

Then that means you edited your gitlab.rb too much that broke your system. Since fresh install works, this means there is nothing wrong with gitlab, but rather the changes you made in gitlab.rb.

I use the defaults for all puma options, all of them have a # at the beginning of the line. Has worked fine, since 12.9.3 and I’ve upgraded many times since then as well. Some people had problems with the migration from unicorn to puma, and so had to comment the unicorn lines or replace them with puma ones. worker_processes by default is 2 for puma, so it is already multiple processes.

I tried in many ways.

  • Check that there no running background jobs, then migrated with default config, with no changes in puma workers[several_times].
  • Tried to tune up puma[‘worker_processes’] = 0 in many ways.
  • Added and modified resources.
  • Migrated to another node all components of gitlab. redis, postgresqsl cluster etc.

I ended up that nothing helps or I can’t find any specific output in logs, related to issues with DB.

Only one way I currently can see for migration, git clone --mirror in script.

So sad ;(

You can try copying /etc/gitlab/gitlab.rb and gitlab-secrets.json files to new server. Install Gitlab like you did with testing clean install. Before running gitlab-ctl reconfigure, copy the gitlab.rb and gitlab-secrets.json from your old server and put it in /etc/gitlab on the new server.

Then run:

gitlab-ctl reconfigure

if that works, then it would show that you configuration is OK. Then that would mean something not right with your old installation. If it doesn’t work, then it means your gitlab.rb is the problem with a wrong configuration issue or something.

If it does work, then test creating some repos on the new server to see if everything works with them.

thank you, I tried all that possible that I can check with rake tasks

the point is that fresh install work with no issues

Also found that when I go further with upgrade(14.2->14.3 with puma[‘worker_processes’] = 0 and no bg tasks), I get such errors:

rails_migration[gitlab-rails] (gitlab::database_migrations line 51) had an error: Mixlib::ShellOut::ShellCommandFailed: bash[migrate gitlab-rails database] (/opt/gitlab/embedded/cookbooks/cache/cookbooks/gitlab/resources/rails_migration.rb line 16) had an error: Mixlib::ShellOut::ShellCommandFailed: Expected process to exit with [0], but received '1'
---- Begin output of "bash"  "/tmp/chef-script20211129-28-4uc1ob" ----
STDOUT: rake aborted!
StandardError: An error has occurred, all later migrations canceled:

PG::ObjectNotInPrerequisiteState: ERROR:  index "taggings_pkey" is not valid
LINE 1: ...eb,db_config_name:main*/ ALTER TABLE taggings ADD CONSTRAINT...
                                                             ^
/opt/gitlab/embedded/service/gitlab-rails/db/post_migrate/20210806131706_finalize_taggins_bigint_conversion.rb:74:in `block in swap'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/with_lock_retries.rb:123:in `run_block'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/with_lock_retries.rb:134:in `block in run_block_with_lock_timeout'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/load_balancing/connection_proxy.rb:119:in `block in write_using_load_balancer'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/load_balancing/load_balancer.rb:110:in `block in read_write'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/load_balancing/load_balancer.rb:170:in `retry_with_backoff'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/load_balancing/load_balancer.rb:108:in `read_write'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/load_balancing/connection_proxy.rb:118:in `write_using_load_balancer'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/load_balancing/connection_proxy.rb:70:in `transaction'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/with_lock_retries.rb:129:in `run_block_with_lock_timeout'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/with_lock_retries.rb:97:in `run'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/migration_helpers.rb:422:in `with_lock_retries'
/opt/gitlab/embedded/service/gitlab-rails/db/post_migrate/20210806131706_finalize_taggins_bigint_conversion.rb:44:in `swap'
/opt/gitlab/embedded/service/gitlab-rails/db/post_migrate/20210806131706_finalize_taggins_bigint_conversion.rb:18:in `up'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/migrations/lock_retry_mixin.rb:31:in `ddl_transaction'
/opt/gitlab/embedded/service/gitlab-rails/lib/tasks/gitlab/db.rake:61:in `block (3 levels) in <top (required)>'
/opt/gitlab/embedded/bin/bundle:23:in `load'
/opt/gitlab/embedded/bin/bundle:23:in `<main>'

Caused by:
ActiveRecord::StatementInvalid: PG::ObjectNotInPrerequisiteState: ERROR:  index "taggings_pkey" is not valid
LINE 1: ...eb,db_config_name:main*/ ALTER TABLE taggings ADD CONSTRAINT...
                                                             ^
/opt/gitlab/embedded/service/gitlab-rails/db/post_migrate/20210806131706_finalize_taggins_bigint_conversion.rb:74:in `block in swap'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/with_lock_retries.rb:123:in `run_block'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/with_lock_retries.rb:134:in `block in run_block_with_lock_timeout'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/load_balancing/connection_proxy.rb:119:in `block in write_using_load_balancer'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/load_balancing/load_balancer.rb:110:in `block in read_write'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/load_balancing/load_balancer.rb:170:in `retry_with_backoff'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/load_balancing/load_balancer.rb:108:in `read_write'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/load_balancing/connection_proxy.rb:118:in `write_using_load_balancer'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/load_balancing/connection_proxy.rb:70:in `transaction'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/with_lock_retries.rb:129:in `run_block_with_lock_timeout'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/with_lock_retries.rb:97:in `run'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/migration_helpers.rb:422:in `with_lock_retries'
/opt/gitlab/embedded/service/gitlab-rails/db/post_migrate/20210806131706_finalize_taggins_bigint_conversion.rb:44:in `swap'
/opt/gitlab/embedded/service/gitlab-rails/db/post_migrate/20210806131706_finalize_taggins_bigint_conversion.rb:18:in `up'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/migrations/lock_retry_mixin.rb:31:in `ddl_transaction'
/opt/gitlab/embedded/service/gitlab-rails/lib/tasks/gitlab/db.rake:61:in `block (3 levels) in <top (required)>'
/opt/gitlab/embedded/bin/bundle:23:in `load'
/opt/gitlab/embedded/bin/bundle:23:in `<main>'

Caused by:
PG::ObjectNotInPrerequisiteState: ERROR:  index "taggings_pkey" is not valid
LINE 1: ...eb,db_config_name:main*/ ALTER TABLE taggings ADD CONSTRAINT...
                                                             ^
/opt/gitlab/embedded/service/gitlab-rails/db/post_migrate/20210806131706_finalize_taggins_bigint_conversion.rb:74:in `block in swap'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/with_lock_retries.rb:123:in `run_block'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/with_lock_retries.rb:134:in `block in run_block_with_lock_timeout'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/load_balancing/connection_proxy.rb:119:in `block in write_using_load_balancer'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/load_balancing/load_balancer.rb:110:in `block in read_write'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/load_balancing/load_balancer.rb:170:in `retry_with_backoff'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/load_balancing/load_balancer.rb:108:in `read_write'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/load_balancing/connection_proxy.rb:118:in `write_using_load_balancer'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/load_balancing/connection_proxy.rb:70:in `transaction'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/with_lock_retries.rb:129:in `run_block_with_lock_timeout'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/with_lock_retries.rb:97:in `run'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/migration_helpers.rb:422:in `with_lock_retries'
/opt/gitlab/embedded/service/gitlab-rails/db/post_migrate/20210806131706_finalize_taggins_bigint_conversion.rb:44:in `swap'
/opt/gitlab/embedded/service/gitlab-rails/db/post_migrate/20210806131706_finalize_taggins_bigint_conversion.rb:18:in `up'
/opt/gitlab/embedded/service/gitlab-rails/lib/gitlab/database/migrations/lock_retry_mixin.rb:31:in `ddl_transaction'
/opt/gitlab/embedded/service/gitlab-rails/lib/tasks/gitlab/db.rake:61:in `block (3 levels) in <top (required)>'
/opt/gitlab/embedded/bin/bundle:23:in `load'
/opt/gitlab/embedded/bin/bundle:23:in `<main>'
Tasks: TOP => db:migrate
(See full trace by running task with --trace)