Configure OIDC, make idempotent, fix bugs. Claude.ai

This commit is contained in:
2026-02-25 13:20:12 -05:00
parent 995b7c4070
commit d981b69669
23 changed files with 2269 additions and 760 deletions

View File

@@ -0,0 +1,389 @@
---
# Generate Agent ISO and deploy SNO (agent-based installer).
#
# Uses `openshift-install agent create image` — no SaaS API, no SSO required.
# The pull secret is the only Red Hat credential needed.
# Credentials (kubeconfig, kubeadmin-password) are generated locally under
# sno_install_dir/auth/ by openshift-install itself.
#
# Idempotency: If the cluster API is already responding, all install steps
# are skipped. Credentials on Proxmox host are never overwritten once saved.
# ------------------------------------------------------------------
# Step 0: Ensure sno_vm_id and sno_mac are populated.
# These are set as cacheable facts by create_vm.yml, but in ephemeral
# EEs or when running --tags sno_deploy_install alone the cache is empty.
# ------------------------------------------------------------------
- name: Retrieve VM info from Proxmox (needed when fact cache is empty)
community.proxmox.proxmox_vm_info:
api_host: "{{ hostvars['proxmox_api']['ansible_host'] }}"
api_user: "{{ proxmox_api_user }}"
api_port: "{{ hostvars['proxmox_api']['ansible_port'] }}"
api_token_id: "{{ proxmox_api_token_id }}"
api_token_secret: "{{ proxmox_api_token_secret }}"
validate_certs: "{{ proxmox_validate_certs }}"
node: "{{ proxmox_node }}"
name: "{{ sno_vm_name }}"
type: qemu
config: current
register: __sno_deploy_vm_info
when: (sno_vm_id | default('')) == '' or (sno_mac | default('')) == ''
- name: Set sno_vm_id and sno_mac from live Proxmox query
ansible.builtin.set_fact:
sno_vm_id: "{{ __sno_deploy_vm_info.proxmox_vms[0].vmid }}"
sno_mac: >-
{{ __sno_deploy_vm_info.proxmox_vms[0].config.net0
| regex_search('([0-9A-Fa-f]{2}(?::[0-9A-Fa-f]{2}){5})', '\1')
| first }}
cacheable: true
when: __sno_deploy_vm_info is not skipped
# ------------------------------------------------------------------
# Step 0b: Check if OpenShift is already deployed and responding.
# If the API is reachable, skip ISO generation, boot, and install.
# ------------------------------------------------------------------
- name: Check if OpenShift cluster is already responding
ansible.builtin.uri:
url: "https://api.{{ ocp_cluster_name }}.{{ ocp_base_domain }}:6443/readyz"
method: GET
validate_certs: false
status_code: [200, 401, 403]
timeout: 10
register: __sno_deploy_cluster_alive
ignore_errors: true
- name: Set cluster deployed flag
ansible.builtin.set_fact:
__sno_deploy_cluster_deployed: "{{ __sno_deploy_cluster_alive is success }}"
- name: Display cluster status
ansible.builtin.debug:
msg: >-
{{ 'OpenShift cluster is already deployed and responding — skipping install steps.'
if __sno_deploy_cluster_deployed | bool
else 'OpenShift cluster is not yet deployed — proceeding with installation.' }}
- name: Ensure local install directories exist
ansible.builtin.file:
path: "{{ item }}"
state: directory
mode: "0750"
loop:
- "{{ sno_install_dir }}"
- "{{ sno_install_dir }}/auth"
# ------------------------------------------------------------------
# Step 0c: When cluster is already deployed, ensure a valid kubeconfig
# exists so post-install tasks can authenticate to the API.
# Try in order: local file → Proxmox host backup → SSH to SNO node.
# After obtaining a kubeconfig, validate it against the API and fall
# through to the next source if credentials are expired.
# ------------------------------------------------------------------
- name: Check if local kubeconfig already exists
ansible.builtin.stat:
path: "{{ __sno_deploy_kubeconfig }}"
register: __sno_deploy_local_kubeconfig
when: __sno_deploy_cluster_deployed | bool
- name: Validate local kubeconfig against API
ansible.builtin.command:
cmd: "oc whoami --kubeconfig={{ __sno_deploy_kubeconfig }} --insecure-skip-tls-verify"
register: __sno_deploy_local_kubeconfig_valid
ignore_errors: true
changed_when: false
when:
- __sno_deploy_cluster_deployed | bool
- __sno_deploy_local_kubeconfig.stat.exists | default(false)
- name: Check if kubeconfig exists on Proxmox host
ansible.builtin.stat:
path: "{{ sno_credentials_dir }}/kubeconfig"
delegate_to: proxmox_host
register: __sno_deploy_proxmox_kubeconfig
when:
- __sno_deploy_cluster_deployed | bool
- not (__sno_deploy_local_kubeconfig.stat.exists | default(false)) or
(__sno_deploy_local_kubeconfig_valid is failed)
- name: Recover kubeconfig from Proxmox host
ansible.builtin.fetch:
src: "{{ sno_credentials_dir }}/kubeconfig"
dest: "{{ __sno_deploy_kubeconfig }}"
flat: true
delegate_to: proxmox_host
when:
- __sno_deploy_cluster_deployed | bool
- not (__sno_deploy_local_kubeconfig.stat.exists | default(false)) or
(__sno_deploy_local_kubeconfig_valid is failed)
- __sno_deploy_proxmox_kubeconfig.stat.exists | default(false)
- name: Validate recovered Proxmox kubeconfig against API
ansible.builtin.command:
cmd: "oc whoami --kubeconfig={{ __sno_deploy_kubeconfig }} --insecure-skip-tls-verify"
register: __sno_deploy_proxmox_kubeconfig_valid
ignore_errors: true
changed_when: false
when:
- __sno_deploy_cluster_deployed | bool
- not (__sno_deploy_local_kubeconfig.stat.exists | default(false)) or
(__sno_deploy_local_kubeconfig_valid is failed)
- __sno_deploy_proxmox_kubeconfig.stat.exists | default(false)
- name: Set flag - need SSH recovery
ansible.builtin.set_fact:
__sno_deploy_need_ssh_recovery: >-
{{
(__sno_deploy_cluster_deployed | bool) and
(
(not (__sno_deploy_local_kubeconfig.stat.exists | default(false)) and
not (__sno_deploy_proxmox_kubeconfig.stat.exists | default(false)))
or
((__sno_deploy_local_kubeconfig_valid | default({})) is failed and
(__sno_deploy_proxmox_kubeconfig_valid | default({})) is failed)
or
(not (__sno_deploy_local_kubeconfig.stat.exists | default(false)) and
(__sno_deploy_proxmox_kubeconfig_valid | default({})) is failed)
)
}}
- name: Recover kubeconfig from SNO node via SSH
ansible.builtin.command:
cmd: >-
ssh -o StrictHostKeyChecking=no core@{{ sno_ip }}
sudo cat /etc/kubernetes/static-pod-resources/kube-apiserver-certs/secrets/node-kubeconfigs/lb-ext.kubeconfig
register: __sno_deploy_recovered_kubeconfig
when: __sno_deploy_need_ssh_recovery | bool
- name: Write recovered kubeconfig from SNO node
ansible.builtin.copy:
content: "{{ __sno_deploy_recovered_kubeconfig.stdout }}"
dest: "{{ __sno_deploy_kubeconfig }}"
mode: "0600"
when:
- __sno_deploy_recovered_kubeconfig is not skipped
- __sno_deploy_recovered_kubeconfig.rc == 0
- name: Update kubeconfig backup on Proxmox host
ansible.builtin.copy:
src: "{{ __sno_deploy_kubeconfig }}"
dest: "{{ sno_credentials_dir }}/kubeconfig"
mode: "0600"
backup: true
delegate_to: proxmox_host
when:
- __sno_deploy_recovered_kubeconfig is not skipped
- __sno_deploy_recovered_kubeconfig.rc == 0
- name: Fail if no valid kubeconfig could be obtained
ansible.builtin.fail:
msg: >-
Cluster is deployed but no valid kubeconfig could be obtained.
Tried: local file, Proxmox host ({{ sno_credentials_dir }}/kubeconfig),
and SSH to core@{{ sno_ip }}. Cannot proceed with post-install tasks.
when:
- __sno_deploy_need_ssh_recovery | bool
- __sno_deploy_recovered_kubeconfig is skipped or __sno_deploy_recovered_kubeconfig.rc != 0
# ------------------------------------------------------------------
# Step 1: Check whether a fresh ISO already exists on Proxmox
# AND the local openshift-install state dir is intact.
# ------------------------------------------------------------------
- name: Check if ISO already exists on Proxmox and is less than 24 hours old
ansible.builtin.stat:
path: "{{ proxmox_iso_dir }}/{{ sno_iso_filename }}"
get_checksum: false
delegate_to: proxmox_host
register: __sno_deploy_iso_stat
when: not __sno_deploy_cluster_deployed | bool
- name: Check if local openshift-install state directory exists
ansible.builtin.stat:
path: "{{ sno_install_dir }}/.openshift_install_state"
get_checksum: false
register: __sno_deploy_state_stat
when: not __sno_deploy_cluster_deployed | bool
- name: Set fact - skip ISO build if recent ISO exists on Proxmox and local state is intact
ansible.builtin.set_fact:
__sno_deploy_iso_fresh: >-
{{
not (__sno_deploy_cluster_deployed | bool) and
__sno_deploy_iso_stat.stat.exists | default(false) and
(now(utc=true).timestamp() | int - __sno_deploy_iso_stat.stat.mtime | default(0) | int) < 86400 and
__sno_deploy_state_stat.stat.exists | default(false)
}}
# ------------------------------------------------------------------
# Step 2: Get openshift-install binary
# Always ensure the binary is present — needed for both ISO generation
# and wait-for-install-complete regardless of __sno_deploy_iso_fresh.
# ------------------------------------------------------------------
- name: Download openshift-install tarball
ansible.builtin.get_url:
url: "https://mirror.openshift.com/pub/openshift-v4/clients/ocp/stable-{{ ocp_version }}/openshift-install-linux.tar.gz"
dest: "{{ sno_install_dir }}/openshift-install-{{ ocp_version }}.tar.gz"
mode: "0644"
checksum: "{{ ocp_install_checksum | default(omit) }}"
register: __sno_deploy_install_tarball
when: not __sno_deploy_cluster_deployed | bool
- name: Extract openshift-install binary
ansible.builtin.unarchive:
src: "{{ sno_install_dir }}/openshift-install-{{ ocp_version }}.tar.gz"
dest: "{{ sno_install_dir }}"
remote_src: false
include:
- openshift-install
when: not __sno_deploy_cluster_deployed | bool and (__sno_deploy_install_tarball.changed or not (sno_install_dir ~ '/openshift-install') is file)
- name: Download openshift-client tarball
ansible.builtin.get_url:
url: "https://mirror.openshift.com/pub/openshift-v4/clients/ocp/stable-{{ ocp_version }}/openshift-client-linux.tar.gz"
dest: "{{ sno_install_dir }}/openshift-client-{{ ocp_version }}.tar.gz"
mode: "0644"
checksum: "{{ ocp_client_checksum | default(omit) }}"
register: __sno_deploy_client_tarball
when: not __sno_deploy_cluster_deployed | bool
- name: Extract oc binary
ansible.builtin.unarchive:
src: "{{ sno_install_dir }}/openshift-client-{{ ocp_version }}.tar.gz"
dest: "{{ sno_install_dir }}"
remote_src: false
include:
- oc
when: not __sno_deploy_cluster_deployed | bool and (__sno_deploy_client_tarball.changed or not (sno_install_dir ~ '/oc') is file)
# ------------------------------------------------------------------
# Step 3: Template agent installer config files (skipped if ISO is fresh)
# ------------------------------------------------------------------
- name: Template install-config.yaml
ansible.builtin.template:
src: install-config.yaml.j2
dest: "{{ sno_install_dir }}/install-config.yaml"
mode: "0640"
when: not __sno_deploy_cluster_deployed | bool and not __sno_deploy_iso_fresh | bool
no_log: true
- name: Template agent-config.yaml
ansible.builtin.template:
src: agent-config.yaml.j2
dest: "{{ sno_install_dir }}/agent-config.yaml"
mode: "0640"
when: not __sno_deploy_cluster_deployed | bool and not __sno_deploy_iso_fresh | bool
# ------------------------------------------------------------------
# Step 4: Generate discovery ISO (skipped if ISO is fresh)
# ------------------------------------------------------------------
- name: Generate agent-based installer ISO
ansible.builtin.command:
cmd: "{{ sno_install_dir }}/openshift-install agent create image --dir {{ sno_install_dir }}"
when: not __sno_deploy_cluster_deployed | bool and not __sno_deploy_iso_fresh | bool
# ------------------------------------------------------------------
# Step 5: Upload ISO to Proxmox and attach to VM
# ------------------------------------------------------------------
- name: Copy discovery ISO to Proxmox ISO storage
ansible.builtin.copy:
src: "{{ sno_install_dir }}/{{ sno_iso_filename }}"
dest: "{{ proxmox_iso_dir }}/{{ sno_iso_filename }}"
mode: "0644"
delegate_to: proxmox_host
when: not __sno_deploy_cluster_deployed | bool and not __sno_deploy_iso_fresh | bool
- name: Attach ISO to VM as CDROM
ansible.builtin.command:
cmd: "qm set {{ sno_vm_id }} --ide2 {{ proxmox_iso_storage }}:iso/{{ sno_iso_filename }},media=cdrom"
delegate_to: proxmox_host
changed_when: true
when: not __sno_deploy_cluster_deployed | bool
- name: Ensure boot order prefers disk, falls back to CDROM
ansible.builtin.command:
cmd: "qm set {{ sno_vm_id }} --boot order=scsi0;ide2"
delegate_to: proxmox_host
changed_when: true
when: not __sno_deploy_cluster_deployed | bool
# ------------------------------------------------------------------
# Step 6: Boot the VM
# ------------------------------------------------------------------
- name: Start SNO VM
community.proxmox.proxmox_kvm:
api_host: "{{ hostvars['proxmox_api']['ansible_host'] }}"
api_user: "{{ proxmox_api_user }}"
api_port: "{{ hostvars['proxmox_api']['ansible_port'] }}"
api_token_id: "{{ proxmox_api_token_id }}"
api_token_secret: "{{ proxmox_api_token_secret }}"
validate_certs: "{{ proxmox_validate_certs }}"
node: "{{ proxmox_node }}"
name: "{{ sno_vm_name }}"
state: started
when: not __sno_deploy_cluster_deployed | bool
# ------------------------------------------------------------------
# Step 7: Wait for installation to complete (~60-90 min)
# ------------------------------------------------------------------
- name: Wait for SNO installation to complete
ansible.builtin.command:
cmd: "{{ sno_install_dir }}/openshift-install agent wait-for install-complete --dir {{ sno_install_dir }} --log-level=info"
async: 5400
poll: 30
when: not __sno_deploy_cluster_deployed | bool
# ------------------------------------------------------------------
# Step 8: Persist credentials to Proxmox host
# Only copy if credentials do not already exist on the remote host,
# to prevent overwriting valid credentials on re-runs.
# ------------------------------------------------------------------
- name: Create credentials directory on Proxmox host
ansible.builtin.file:
path: "{{ sno_credentials_dir }}"
state: directory
mode: "0700"
delegate_to: proxmox_host
- name: Check if credentials already exist on Proxmox host
ansible.builtin.stat:
path: "{{ sno_credentials_dir }}/kubeadmin-password"
delegate_to: proxmox_host
register: __sno_deploy_remote_creds
- name: Copy kubeconfig to Proxmox host
ansible.builtin.copy:
src: "{{ sno_install_dir }}/auth/kubeconfig"
dest: "{{ sno_credentials_dir }}/kubeconfig"
mode: "0600"
backup: true
delegate_to: proxmox_host
when: not __sno_deploy_remote_creds.stat.exists
- name: Copy kubeadmin-password to Proxmox host
ansible.builtin.copy:
src: "{{ sno_install_dir }}/auth/kubeadmin-password"
dest: "{{ sno_credentials_dir }}/kubeadmin-password"
mode: "0600"
backup: true
delegate_to: proxmox_host
when: not __sno_deploy_remote_creds.stat.exists
# ------------------------------------------------------------------
# Step 9: Eject CDROM so the VM never boots the agent ISO again
# ------------------------------------------------------------------
- name: Eject CDROM after successful installation
ansible.builtin.command:
cmd: "qm set {{ sno_vm_id }} --ide2 none,media=cdrom"
delegate_to: proxmox_host
changed_when: true
when: not __sno_deploy_cluster_deployed | bool
- name: Display post-install info
ansible.builtin.debug:
msg:
- "SNO installation complete!"
- "API URL : https://api.{{ ocp_cluster_name }}.{{ ocp_base_domain }}:6443"
- "Console : https://console-openshift-console.apps.{{ ocp_cluster_name }}.{{ ocp_base_domain }}"
- "Kubeconfig : {{ sno_credentials_dir }}/kubeconfig (on proxmox_host)"
- "kubeadmin pass : {{ sno_credentials_dir }}/kubeadmin-password (on proxmox_host)"
verbosity: 1