How to provide manual option when a job is failed?

I have a pipeline as below.

if I keep allow_failure value true for teardown job, the next job is running directly.

If I keep allow_failure value false for teardown job, the next job is set in a state of skipped, and I can’t run it manually also later.

The play button now doesn’t work for last job.

How to set in such a way, when the teardown job fails, we can come and run it manually the next job(destroy-admin-server) later.

Please suggest.

@snim2 can you comment on this

Can you post the YAML for those jobs?

It is generic requirement, not specific to my pipeline.
But still below is the portion from my yaml.



#to tearn down a cluster deployed using the admin server
.ifteardown:
  rules:
    - if: "$TEARDOWNANDDESTROY && $PREVIOUS_JOB_ID" #only teardown and destroy is triggered
    # checking whether ACTION variable exists not its value
    # Removed $PACKAGE_URL as it is not mandatory in the destroy part
    - if: '$CI_PIPELINE_SOURCE == "schedule" && $ACTION && $TEARDOWNANDDESTROY && $ACTION && $REGION && $ROLE_ARN && $ACTION == "new" && $CLOUD_PLATFORM == "aws" && $ROLE_ARN != "" && $REGION != ""'
      #when: always
    - if: '$CI_PIPELINE_SOURCE == "trigger" && $ACTION && $TEARDOWNANDDESTROY && $ACTION && $REGION && $ROLE_ARN && $ACTION == "new" && $CLOUD_PLATFORM == "aws" && $ROLE_ARN != "" && $REGION != ""'
      #when: always
    # checking whether ACTION variable exists not its value
    #$CI_PIPELINE_SOURCE == "web" is not kept below as want to keep it manual for any source point.
    # Removed $PACKAGE_URL as it is not mandatory in the destroy part
    - if: '$REGION && $ROLE_ARN && $ACTION && $CLOUD_PLATFORM == "aws" && $ROLE_ARN != "" && $REGION != ""'
      when: manual
    - when: never

# to destroy admin server, rule set same as teardown but with ROLE_ARN additional
# $PREVIOUS_JOB_ID is provided, this is required as connect_admin_server can't fetch state file to destroy.
.ifTEARDOWNANDDESTROY:
  rules:
    - if: '$CLOUD_PLATFORM == "aws" && $ROLE_ARN && $TEARDOWNANDDESTROY && $PREVIOUS_JOB_ID' #only teardown and destroy is triggered
    - if: '$ADMIN_SERVER_IP && $ADMIN_SERVER_IP != ""' #when it is connect_admin_server, it won't trigger
      when: never
    # Removed $PACKAGE_URL as it is not mandatory in the destroy part
    - if: '$CI_PIPELINE_SOURCE == "schedule" && $TEARDOWNANDDESTROY && $ACTION && $REGION && $ROLE_ARN && $CLOUD_PLATFORM == "aws" && $ROLE_ARN != "" && $REGION != ""'
      #when: always
    - if: '$CI_PIPELINE_SOURCE == "trigger" && $TEARDOWNANDDESTROY && $ACTION && $REGION && $ROLE_ARN && $CLOUD_PLATFORM == "aws" && $ROLE_ARN != "" && $REGION != ""'
      #when: always
    #$CI_PIPELINE_SOURCE == "web" is not kept below as want to keep it manual for any source point.
    # Removed $PACKAGE_URL as it is not mandatory in the destroy part
    - if: '$ACTION && $REGION && $ROLE_ARN && $CLOUD_PLATFORM == "aws" && $ROLE_ARN != "" && $REGION != ""'
      when: manual
    #- when: never

download-previous-job-artifacts:
  stage: build
  rules:
    - if: "$TEARDOWNANDDESTROY && $PREVIOUS_JOB_ID"
  image:
    name: alpine:3.16
    entrypoint: [""]
  script:
    - apk add --no-cache openssh curl zip unzip
    - 'curl --location --output artifacts.zip --header "JOB-TOKEN: $CI_JOB_TOKEN" "https://gitlab.com/api/v4/projects/$CI_PROJECT_ID/jobs/$PREVIOUS_JOB_ID/artifacts"'
    - unzip artifacts.zip -d artifacts/
    - mv artifacts/* $CI_PROJECT_DIR/
    - ls -la artifacts
  artifacts:
    untracked: true
    when: always
    paths:
      - "$CI_PROJECT_DIR/ssh_key.pem"
      - "$CI_PROJECT_DIR/ssh_command.txt"
      - "$CI_PROJECT_DIR/scp_command.txt"
      - "$CI_PROJECT_DIR/terraform.tfstate"
      - "$CI_PROJECT_DIR/admin_server.tfvars"

teardown:
  stage: teardown
  #allow_failure: true
  extends:
    - .ifteardown
  image:
    name: alpine:3.16 # May need to change this image
    # only really needs to copy files and ssh
    entrypoint: [""]
  script:
    - *functions
    - apk add --no-cache openssh
    - chmod -R 777 $CI_PROJECT_DIR
    - test_pre_command="$(cat "$CI_PROJECT_DIR/ssh_command.txt") -o StrictHostKeyChecking=no"
    - test_command="$(echo $test_pre_command | sed -r 's/(ssh )(.*)/\1-tt \2/')"
    - login_command="$test_command</dev/tty"
    - redirect_command="${test_command}"
    - rm -rf ~/.ssh/known_hosts
    - chmod 400 *.pem
    - echo "projectn ls; exit 0" | $test_command
    - |+
      until eval "$test_command 'command -v projectn >/dev/null'" 2>log; do
        echo "$(cat log)"
        echo "Waiting for the Project N package to finish installing..."
        sleep 10
      done
    - |+
      # Prepare to deploy
      match='export PATH=~/.local/bin:\$PATH'
      until eval "$test_command 'grep -q '\''$match'\'' /home/ec2-user/.bash_profile'" 2> log
      do
        echo "$(cat log)"
        echo "Waiting for the AWS CLI to finish updating..."
        sleep 10
      done
    - echo "projectn teardown --auto-approve 2>&1 | tee outfile ; exit 0" | $test_command
    - copy_remote_file "outfile"
    - echo "Now checking log file for returnCode"
    - check_remote_file "outfile"

destroy-admin-server:
  <<: *yum-cache
  stage: cleanup
  extends:
    - .ifTEARDOWNANDDESTROY
  allow_failure: false
  interruptible: false
  image:
    name: amazon/aws-cli:latest
    entrypoint: [""]
  script:
    - TF_IN_AUTOMATION=true
    - *yum-update #yum update -y
    - ${YUM} install git unzip gettext -y
    - *configure_aws_cli
    - aws sts get-caller-identity
    - git clone "https://project-n-setup:$(echo $PERSONAL_GITLAB_TOKEN)@gitlab.com/projectn-oss/project-n-setup.git"
    # Install tfenv
    - git clone https://github.com/tfutils/tfenv.git ~/.tfenv
    - ln -s ~/.tfenv /root/.tfenv
    - ln -s ~/.tfenv/bin/* /usr/local/bin
    # Install terraform 1.1.9 through tfenv
    - tfenv install $tfenv_version
    - tfenv use $tfenv_version
    # Substitute in all the environment variables into the temp file, creating the main var file.
    # Copy state and var file from create-admin-server to terraform directory
    - cp $CI_PROJECT_DIR/terraform.tfstate $CI_PROJECT_DIR/project-n-setup/$CLOUD_PLATFORM
    - cp $CI_PROJECT_DIR/admin_server.tfvars $CI_PROJECT_DIR/project-n-setup/$CLOUD_PLATFORM
    - cd $CI_PROJECT_DIR/project-n-setup/$CLOUD_PLATFORM
    - terraform init -input=false
    - terraform destroy -var-file=admin_server.tfvars -auto-approve
  after_script:
    - aws s3 ls --profile $ACCOUNT_NAME
    - echo ${CI_COMMIT_SHORT_SHA}
    - account_id=`aws sts get-caller-identity --profile $ACCOUNT_NAME --query "Account" --output text`
    - aws s3 rm s3://projectn-rpm-${account_id}-${CI_COMMIT_SHORT_SHA} --profile $ACCOUNT_NAME --recursive
    - aws s3 rb --force s3://projectn-rpm-${account_id}-${CI_COMMIT_SHORT_SHA} --profile $ACCOUNT_NAME