Hello all.
Currently i manage the stack which uses gitlab setup created from helm chart. Recently workers started to fail during docker building jobs. When there are like 15 services building simultaneusly is almost 100% chance that 4-6 of them will fail. Re-running failed jobs finishes successfully. Because i havent seen any serious load on CPU or RAM i suspect that it must be something related with disks load, like in this case
But first i need to understand where exactly /var/lib/docker is stored. Am i right that it is stored on the cluster node disk? Ram? Or?
values.yml
global:
hosts:
domain: my-site.com
externalIP: 1.2.3.4
minio:
name: minio-gl.my-site.com
smtp:
enabled: true
address: "otc-de-out.mms.t-systems-service.com"
port: 25
user_name: "stmp-user"
## https://docs.gitlab.com/charts/installation/secrets#smtp-password
password:
secret: "smtp-creds"
key: password
domain: ""
authentication: "login"
starttls_auto: false
# openssl_verify_mode: "peer"
openssl_verify_mode: "none"
pool: false
tls: false
## https://docs.gitlab.com/charts/installation/deployment#outgoing-email
## Email persona used in email sent by GitLab
email:
from: "gitlab@my-site.com"
display_name: "GitLab"
reply_to: 'noreply@my-site.com'
subject_suffix: '[Gitlab]'
smime:
enabled: false
# secretName: ""
# keyName: "tls.key"
# certName: "tls.crt"
appConfig:
terraformState:
enabled: true
bucket: gitlab-terraform-state
connection: {}
gitlab:
sidekiq:
resources:
limits:
memory: 5G
requests:
memory: 1G
cpu: 500m
webservice:
resources:
limits:
memory: 3G
requests:
memory: 1G
cpu: 300m
gitaly:
persistence:
size: 100Gi
minio:
persistence:
size: 50Gi
gitlab-runner:
# install: true
# rbac:
# create: true
runners:
# locked: false
config: |
[[runners]]
[runners.kubernetes]
dns_policy = "none"
[runners.kubernetes.dns_config]
nameservers = [
"1.1.1.1",
"8.8.8.8"
]
[[runners.kubernetes.volumes.empty_dir]]
name = "docker-certs"
mount_path = "/certs/client"
medium = "Memory"
[[runners.kubernetes.volumes.empty_dir]]
name = "dind-storage"
mount_path = "/var/lib/docker"
{{- if .Values.global.minio.enabled }}
[runners.cache]
Type = "s3"
Path = "gitlab-runner"
Shared = true
[runners.cache.s3]
ServerAddress = {{ include "gitlab-runner.cache-tpl.s3ServerAddress" . }}
BucketName = "runner-cache"
BucketLocation = "us-east-1"
Insecure = false
{{ end }}
# tags: "myproject-k8s"
# name: "gitlab-runner-k8s"
privileged: true
nginx-ingress:
controller:
service:
type: NodePort
certmanager-issuer:
email: alerts@my-site.com
installCRDs: true
gitlab runner deployment
apiVersion: apps/v1
kind: Deployment
metadata:
name: gitlab-gitlab-runner
namespace: gitlab
selfLink: /apis/apps/v1/namespaces/gitlab/deployments/gitlab-gitlab-runner
uid: 248c23b7-9534-4eca-9dee-e1d1c9268c80
resourceVersion: '49797576'
generation: 9
creationTimestamp: '2022-03-22T08:47:54Z'
labels:
app: gitlab-gitlab-runner
app.kubernetes.io/managed-by: Helm
chart: gitlab-runner-0.39.0
heritage: Helm
release: gitlab
annotations:
deployment.kubernetes.io/revision: '9'
meta.helm.sh/release-name: gitlab
meta.helm.sh/release-namespace: gitlab
status:
observedGeneration: 9
replicas: 1
updatedReplicas: 1
readyReplicas: 1
availableReplicas: 1
conditions:
- type: Available
status: 'True'
lastUpdateTime: '2022-03-22T13:09:44Z'
lastTransitionTime: '2022-03-22T13:09:44Z'
reason: MinimumReplicasAvailable
message: Deployment has minimum availability.
- type: Progressing
status: 'True'
lastUpdateTime: '2022-06-09T17:16:13Z'
lastTransitionTime: '2022-03-22T08:47:54Z'
reason: NewReplicaSetAvailable
message: >-
ReplicaSet "gitlab-gitlab-runner-7bd4454f97" has successfully
progressed.
spec:
replicas: 1
selector:
matchLabels:
app: gitlab-gitlab-runner
template:
metadata:
creationTimestamp: null
labels:
app: gitlab-gitlab-runner
chart: gitlab-runner-0.39.0
heritage: Helm
release: gitlab
annotations:
checksum/configmap: 5856ea3b9118eed5892a012c9836b8a6d5fa47747e61f3120f76d2a0395903d4
checksum/secrets: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
gitlab.com/prometheus_port: '9252'
gitlab.com/prometheus_scrape: 'true'
kubectl.kubernetes.io/restartedAt: '2022-06-09T21:17:50+04:00'
spec:
volumes:
- name: runner-secrets
emptyDir:
medium: Memory
- name: etc-gitlab-runner
emptyDir:
medium: Memory
- name: init-runner-secrets
projected:
sources:
- secret:
name: gitlab-minio-secret
- secret:
name: gitlab-gitlab-runner-secret
items:
- key: runner-registration-token
path: runner-registration-token
- key: runner-token
path: runner-token
defaultMode: 420
- name: configmaps
configMap:
name: gitlab-gitlab-runner
defaultMode: 420
initContainers:
- name: configure
image: gitlab/gitlab-runner:alpine-v14.9.0
command:
- sh
- /configmaps/configure
env:
- name: CI_SERVER_URL
value: https://gitlab.my-project.com
- name: CLONE_URL
- name: RUNNER_EXECUTOR
value: kubernetes
- name: REGISTER_LOCKED
value: 'false'
- name: RUNNER_TAG_LIST
- name: KUBERNETES_PRIVILEGED
value: 'true'
- name: KUBERNETES_NAMESPACE
value: gitlab
resources: {}
volumeMounts:
- name: runner-secrets
mountPath: /secrets
- name: configmaps
readOnly: true
mountPath: /configmaps
- name: init-runner-secrets
readOnly: true
mountPath: /init-secrets
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false
containers:
- name: gitlab-gitlab-runner
image: gitlab/gitlab-runner:alpine-v14.9.0
command:
- /usr/bin/dumb-init
- '--'
- /bin/bash
- /configmaps/entrypoint
ports:
- name: metrics
containerPort: 9252
protocol: TCP
env:
- name: CI_SERVER_URL
value: https://gitlab.my-project.com
- name: CLONE_URL
- name: RUNNER_EXECUTOR
value: kubernetes
- name: REGISTER_LOCKED
value: 'false'
- name: RUNNER_TAG_LIST
- name: KUBERNETES_PRIVILEGED
value: 'true'
- name: KUBERNETES_NAMESPACE
value: gitlab
resources: {}
volumeMounts:
- name: runner-secrets
mountPath: /secrets
- name: etc-gitlab-runner
mountPath: /home/gitlab-runner/.gitlab-runner
- name: configmaps
mountPath: /configmaps
livenessProbe:
exec:
command:
- /bin/bash
- /configmaps/check-live
initialDelaySeconds: 60
timeoutSeconds: 1
periodSeconds: 10
successThreshold: 1
failureThreshold: 3
readinessProbe:
exec:
command:
- /usr/bin/pgrep
- gitlab.*runner
initialDelaySeconds: 10
timeoutSeconds: 1
periodSeconds: 10
successThreshold: 1
failureThreshold: 3
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false
restartPolicy: Always
terminationGracePeriodSeconds: 3600
dnsPolicy: ClusterFirst
serviceAccountName: gitlab-gitlab-runner
serviceAccount: gitlab-gitlab-runner
securityContext:
runAsUser: 100
fsGroup: 65533
schedulerName: default-scheduler
strategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 25%
maxSurge: 25%
revisionHistoryLimit: 10
progressDeadlineSeconds: 600
node stats
$ kubectl get node 10.0.1.111 -o jsonpath='{.status.capacity}'
{"cce/eni":"1","cpu":"2","ephemeral-storage":"10251540Ki","hugepages-1Gi":"0","hugepages-2Mi":"0","memory":"8174020Ki","pods":"110"}