I’m not DevOps for my organization but would like to make a case to them for allowing me to rewrite Git history. Our company has some people who’ve changed their identity and it would be a nice gesture to them for someone to go through and rewrite the history to properly reflect those identities. There are philosophical reasons not to do this but I’ve considered and am not even remotely concerned with them; this only modifies names and emails.
No, I’m ultimately only concerned with are the mechanical side-effects of this. I know that it means rewriting old commits and will require people to update their local repos to accommodate. That said, most of the people affected by this would be willing to deal with it for the sake of our friends.
What I don’t know enough about is what it might break. I know one possible breakage would be in images kept in our registry that have their commit hash as a tag, if any of those images came from a commit that has since been rebuilt… Maybe I could create a map during the rewrite that would let me then amend the hash references in places like that registry?
I know that there’s a lot of variability in this and I was hoping just as much to open up a dicussion on the issue as I was to get an easy answer. I intend to be active with this and I look forward to your input!
Other details:
- The GitLab instance in question has repos with history across many previous Git hosts including GitHub, so references that are now unused aren’t so much of a concern for me.
- In most of the cases where history would be rewritten the commits start more than a year ago; I wouldn’t be making any changes in anything actively or even recently developed.
Now, the platform info:
- The instance in question is self-hosted and running
12.10-ee
(I know, we’re working on it) - We have (at least) 9 environments running the following
gitlab-runner
versions:- Build, Testing, and Lint/Analytics:
13.4.1
- Primary Dev, Staging, and Enterprise Dev instances:
13.10.0
- Secondary and Tertiary Dev instances:
13.11.0
- Production:
13.11.0
- Build, Testing, and Lint/Analytics:
The .gitlab-ci.yml
for our most widely used repo (sorry it’s so large):
stages:
- build
- test
- merged_coverage
- integration
- release
- production
- jobs
variables:
SLACK_HOOK: https://hooks.slack.com/services/$DEPLOYBOT
SIGNALFX_LOG_PATH: STDOUT
.registry: ®istry
image: docker:latest
services:
- name: docker:dind
before_script:
- docker login -u gitlab-ci-token -p $CI_JOB_TOKEN registry.company.dev
"Build":
<<: *registry
stage: build
script:
- docker pull $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG || (docker pull $CI_REGISTRY_IMAGE:master && docker tag $CI_REGISTRY_IMAGE:master $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG)
- docker build --cache-from $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG -t "$CI_REGISTRY_IMAGE:$CI_COMMIT_SHA" -t "$CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG" ./
- docker push "$CI_REGISTRY_IMAGE:$CI_COMMIT_SHA"
- docker push "$CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG"
"Lint":
image: "$CI_REGISTRY_IMAGE:$CI_COMMIT_SHA"
stage: test
script:
- bin/rubo_lint
.test: &test
image: "$CI_REGISTRY_IMAGE:$CI_COMMIT_SHA"
stage: test
variables: &test_variables
RACK_ENV: test
RAILS_ENV: test
ENVIRON: test
ALS_DB_HOST: postgres
POSTGRES_DB: cobra_test
services:
- name: postgres:9.6.3-alpine
artifacts:
paths:
- coverage/*.results.json
coverage: '/\(\d+\.\d+\%\) covered/'
"Test node 0":
<<: *test
variables:
<<: *test_variables
CI_NODE_TOTAL: 2
CI_NODE_INDEX: 0
script:
- bin/test
"Test node 1":
<<: *test
variables:
<<: *test_variables
CI_NODE_TOTAL: 2
CI_NODE_INDEX: 1
script:
- bin/test
"Engine tests":
<<: *test
script:
- bin/test_engines
"Generate Merged Coverage":
image: "$CI_REGISTRY_IMAGE:$CI_COMMIT_SHA"
stage: merged_coverage
when: manual
variables:
RACK_ENV: test
RAILS_ENV: test
artifacts:
name: "Coverage-$CI_COMMIT_REF_NAME"
paths:
- coverage
dependencies:
- "Engine tests"
- "Test node 0"
- "Test node 1"
coverage: '/\(\d+\.\d+\%\) covered/'
script:
- bin/merge_coverage_results
"codeclimate":
image: docker:stable
stage: test
when: manual
variables:
DOCKER_DRIVER: overlay
services:
- name: docker:dind
script:
- docker pull codeclimate/codeclimate:0.72.0
- docker run --env CODECLIMATE_CODE="$PWD" --volume "$PWD":/code --volume /var/run/docker.sock:/var/run/docker.sock --volume /tmp/cc:/tmp/cc codeclimate/codeclimate:0.72.0 analyze -f html > codeclimate.html
artifacts:
paths: [codeclimate.html]
"Release":
<<: *registry
stage: release
script:
- export TIME_TAG=`date +"%Y.%m.%d.%H%M"`
- docker pull "$CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG"
- docker tag "$CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG" "$CI_REGISTRY_IMAGE:$TIME_TAG"
- docker tag "$CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG" "$CI_REGISTRY_IMAGE"
- docker push "$CI_REGISTRY_IMAGE:$TIME_TAG"
- docker push "$CI_REGISTRY_IMAGE"
only:
- master
.deploy: &deploy
image: "$CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG"
stage: integration
when: manual
script:
- eval $(ssh-agent)
- ssh-add ~/.ssh/id_rsa
- cap $CI_ENVIRONMENT_NAME deploy BRANCH=$CI_COMMIT_SHA
.deployk8s: &deployk8s
stage: integration
when: manual
script:
- bin/kubernetes/migrate_database
- bin/kubernetes/deploy
- bin/kubernetes/deploy.sidekiq
- bin/kubernetes/deploy.sidekiq.low
- bin/kubernetes/deploy.helper
- bin/kubernetes/deploy.grpc
- touch $CI_PROJECT_DIR/deploy_success
"INT1 (k8s)":
<<: *deployk8s
environment:
name: integration1
url: https://app.int1.company.dev
tags:
- k8-int1
"INT2 (k8s)":
<<: *deployk8s
environment:
name: integration2
url: https://app.int2.company.dev
tags:
- k8-int2
"INT3 (k8s)":
<<: *deployk8s
environment:
name: integration3
url: https://app.int3.company.dev
tags:
- k8-int3
"STAGING (k8s)":
<<: *deployk8s
environment:
name: staging
url: https://app.staging.company.dev
only:
- master
tags:
- k8-staging
"SANDBOX (k8s)":
<<: *deployk8s
environment:
name: sandbox
url: https://app.sandbox.company.dev
only:
- master
tags:
- k8-sandbox
"PROD (k8s)":
<<: *deployk8s
stage: production
environment:
name: production
url: https://app.company.com
tags:
- k8-prod
only:
- master
before_script:
- bin/notify_slack started
after_script:
- bin/kubernetes/deploy.cron
- |
if [ -e deploy_success ]; then
bin/notify_slack successful
else
bin/notify_slack failure
fi
pages:
image: "$CI_REGISTRY_IMAGE:$CI_COMMIT_SHA"
stage: test
variables:
RACK_ENV: test
RAILS_ENV: test
ALS_DB_HOST: postgres
POSTGRES_DB: cobra_test
services:
- name: postgres:9.6.3-alpine
script:
- rm -rf public
- mv doc/api public
artifacts:
paths:
- public
only:
- master
# ============================================================
# one-off tasks that have already been performed in the past
.job: &job
stage: jobs
before_script:
- export IMAGE_NAME="$CI_REGISTRY_IMAGE:$CI_COMMIT_SHA"
- export TIMESTAMP=`date +%s`
- export JOB_NAME=cobra-$(echo "$TASK_NAME-$TIMESTAMP" | sed "s;_;-;g; s;:;-;g" | head -c 51)
- sed "s;<IMAGE_NAME>;$IMAGE_NAME;g; s;<JOB_NAME>;$JOB_NAME;g; s;<TASK_NAME>;$TASK_NAME;g;" config/kubernetes/jobs/rake-task.template.yml > config/kubernetes/jobs/$JOB_NAME-pre-variables.yml
- echo $K8S_EMAIL_FILTER
- echo $K8S_MEMBER_IDS
- echo $K8S_DRY_RUN
- bin/replace_k8s_rake_variables config/kubernetes/jobs/$JOB_NAME-pre-variables.yml > config/kubernetes/jobs/$JOB_NAME.yml
- yq m -ixd "*" config/kubernetes/jobs/$JOB_NAME.yml config/kubernetes/annotations.yml
script:
- cat bin/cobra.txt
- cat config/kubernetes/jobs/$JOB_NAME.yml
- kubectl create -f config/kubernetes/jobs/$JOB_NAME.yml
- bin/await $JOB_NAME || true
- kubectl logs -lapp=cobra -lcomponent=$JOB_NAME -c $JOB_NAME --tail=-1
when: manual
int1:member_default_business_fill:
<<: *job
environment:
name: integration1
variables:
TASK_NAME: fill_default_businesses
tags:
- k8-int1
prod:member_default_business_fill:
<<: *job
only:
- master
environment:
name: production
variables:
TASK_NAME: fill_default_businesses
tags:
- k8-prod
int1:alerts_biz_refresh:
<<: *job
except:
- master
environment:
name: integration1
variables:
TASK_NAME: alerts:business_refresh
tags:
- k8-int1
prod:alerts_biz_refresh:
<<: *job
only:
- master
environment:
name: production
variables:
TASK_NAME: alerts:business_refresh
tags:
- k8-prod
int1:backfill_ownerships:
<<: *job
except:
- master
environment:
name: integration1
variables:
TASK_NAME: backfill_ownerships
tags:
- k8-int1
int2:backfill_ownerships:
<<: *job
except:
- master
environment:
name: integration2
variables:
TASK_NAME: backfill_ownerships
tags:
- k8-int2
int3:backfill_ownerships:
<<: *job
except:
- master
environment:
name: integration3
variables:
TASK_NAME: backfill_ownerships
tags:
- k8-int3
staging:backfill_ownerships:
<<: *job
except:
- master
environment:
name: staging
variables:
TASK_NAME: backfill_ownerships
tags:
- k8-staging
sandbox:backfill_ownerships:
<<: *job
only:
- master
environment:
name: sandbox
variables:
TASK_NAME: backfill_ownerships
tags:
- k8-sandbox
prod:backfill_ownerships:
<<: *job
only:
- master
environment:
name: production
variables:
TASK_NAME: backfill_ownerships
tags:
- k8-prod
int1:member_ids:
<<: *job
except:
- master
environment:
name: integration1
variables:
TASK_NAME: backfill_member_ids
tags:
- k8-int1
int2:member_ids:
<<: *job
except:
- master
environment:
name: integration2
variables:
TASK_NAME: backfill_member_ids
tags:
- k8-int2
int3:member_ids:
<<: *job
except:
- master
environment:
name: integration3
variables:
TASK_NAME: backfill_member_ids
tags:
- k8-int3
staging:member_ids:
<<: *job
except:
- master
environment:
name: staging
variables:
TASK_NAME: backfill_member_ids
tags:
- k8-staging
sandbox:member_ids:
<<: *job
only:
- master
environment:
name: sandbox
variables:
TASK_NAME: backfill_member_ids
tags:
- k8-sandbox
prod:member_ids:
<<: *job
only:
- master
environment:
name: production
variables:
TASK_NAME: backfill_member_ids
tags:
- k8-prod
prod:regenerate_report:org-a:
<<: *job
only:
- master
environment:
name: production
tags:
- k8-prod
variables:
TASK_NAME: reporting:regenerate-org-a
prod:regenerate_report:org-b:
<<: *job
only:
- master
environment:
name: production
tags:
- k8-prod
variables:
TASK_NAME: reporting:regenerate-org-b
prod:regenerate_report:org-c:
<<: *job
only:
- master
environment:
name: production
tags:
- k8-prod
variables:
TASK_NAME: reporting:regenerate-org-c