Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 15 additions & 12 deletions proxlb/models/balancing.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def exec_rebalancing_vm(self, proxmox_api: ProxmoxApi, proxlb_data: ProxLbData,

try:
logger.info(f"Balancing: Starting to migrate VM guest {guest_name} from {guest_node_current} to {guest_node_target}.")
job_id = proxmox_api.nodes(guest_node_current).qemu(guest_id).migrate().post(**migration_options)
job_id = proxmox_api.nodes(guest_node_current).qemu(guest_id).migrate.post.model(**migration_options)
except proxmoxer.core.ResourceException as proxmox_api_error:
logger.critical(f"Balancing: Failed to migrate guest {guest_name} of type VM due to some Proxmox errors. Please check if resource is locked or similar.")
logger.debug(f"Balancing: Failed to migrate guest {guest_name} of type VM due to some Proxmox errors: {proxmox_api_error}")
Expand Down Expand Up @@ -206,7 +206,7 @@ def exec_rebalancing_ct(self, proxmox_api: ProxmoxApi, proxlb_data: ProxLbData,

try:
logger.info(f"Balancing: Starting to migrate CT guest {guest_name} from {guest_node_current} to {guest_node_target}.")
job_id = proxmox_api.nodes(guest_node_current).lxc(guest_id).migrate().post(target=guest_node_target, restart=1)
job_id = proxmox_api.nodes(guest_node_current).lxc(guest_id).migrate.post(target=guest_node_target, restart=1)
except proxmoxer.core.ResourceException as proxmox_api_error:
logger.critical(f"Balancing: Failed to migrate guest {guest_name} of type CT due to some Proxmox errors. Please check if resource is locked or similar.")
logger.debug(f"Balancing: Failed to migrate guest {guest_name} of type CT due to some Proxmox errors: {proxmox_api_error}")
Expand All @@ -230,25 +230,28 @@ def get_rebalancing_job_status(self, proxmox_api: ProxmoxApi, proxlb_data: ProxL
bool: True if the job completed successfully, False otherwise.
"""
logger.debug("Starting: get_rebalancing_job_status.")
job = proxmox_api.nodes(guest_current_node).tasks(job_id).status().get()
job = proxmox_api.nodes(guest_current_node).tasks(job_id).status.get.model()

job_status: str | None = job.status

# Fetch actual migration job status if this got spawned by a HA job
if job["type"] == "hamigrate":
if job.type == "hamigrate":
logger.debug(f"Balancing: Job ID {job_id} (guest: {guest_name}) is a HA migration job. Fetching underlying migration job...")
time.sleep(1)
vm_id = int(job["id"])
qm_migrate_jobs = proxmox_api.nodes(guest_current_node).tasks.get(typefilter="qmigrate", vmid=vm_id, start=0, source="active", limit=1)
vm_id = int(job.id)
qm_migrate_jobs = proxmox_api.nodes(guest_current_node).tasks.get.model(typefilter="qmigrate", vmid=vm_id, start=0, source="active", limit=1)

if len(qm_migrate_jobs) > 0:
job = qm_migrate_jobs[0]
job_id = job["upid"]
logger.debug(f'Overwriting job polling for: ID {job_id} (guest: {guest_name}) by {job}')
qmjob = qm_migrate_jobs[0]
job_id = qmjob.upid
job_status = qmjob.status
logger.debug(f'Overwriting job polling for: ID {job_id} (guest: {guest_name}) by {qmjob}')
else:
logger.debug(f"Balancing: Job ID {job_id} (guest: {guest_name}) is a standard migration job. Proceeding with status check.")

# Watch job id until it finalizes
# Note: Unsaved jobs are delivered in uppercase from Proxmox API
if job.get("status", "").lower() == "running":
if job_status and job_status.lower() == "running":
# Do not hammer the API while
# watching the job status
time.sleep(10)
Expand All @@ -264,9 +267,9 @@ def get_rebalancing_job_status(self, proxmox_api: ProxmoxApi, proxlb_data: ProxL
return False

# Validate job output for errors when finished
if job["status"] == "stopped":
if job_status == "stopped":

if job["exitstatus"] == "OK":
if job.exitstatus == "OK":
logger.debug(f"Balancing: Job ID {job_id} (guest: {guest_name}) was successfully.")
logger.debug("Finished: get_rebalancing_job_status.")
return True
Expand Down
126 changes: 69 additions & 57 deletions proxlb/models/guests.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,43 +71,49 @@ def get_guests(proxmox_api: ProxmoxApi, pools: Dict[str, ProxLbData.Pool], ha_ru
# VM objects: Iterate over all VMs on the current node by the qemu API object.
# Unlike the nodes we need to keep them even when being ignored to create proper
# resource metrics for rebalancing to ensure that we do not overprovisiong the node.
for guest in proxmox_api.nodes(node).qemu.get():
if guest['status'] == 'running':
for qemu_guest in proxmox_api.nodes(node).qemu.get.model():
assert qemu_guest.cpus is not None
assert qemu_guest.disk is not None
assert qemu_guest.maxdisk is not None
assert qemu_guest.maxmem is not None
assert qemu_guest.mem is not None
assert qemu_guest.name is not None
if qemu_guest.status == 'running':

guest_tags = Tags.get_tags_from_guests(proxmox_api, node, guest['vmid'], GuestType.Vm)
guest_pools = Pools.get_pools_for_guest(guest['name'], pools)
guest_ha_rules = HaRules.get_ha_rules_for_guest(guest['name'], ha_rules, guest['vmid'])
guest_tags = Tags.get_tags_from_guests(proxmox_api, node, qemu_guest.vmid, GuestType.Vm)
guest_pools = Pools.get_pools_for_guest(qemu_guest.name, pools)
guest_ha_rules = HaRules.get_ha_rules_for_guest(qemu_guest.name, ha_rules, qemu_guest.vmid)

guests[guest['name']] = ProxLbData.Guest(
name=guest['name'],
guests[qemu_guest.name] = ProxLbData.Guest(
name=qemu_guest.name,
cpu=ProxLbData.Guest.Metric(
total=int(guest['cpus']),
used=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'cpu', None),
pressure_some_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'cpu', 'some'),
pressure_full_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'cpu', 'full'),
pressure_some_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'cpu', 'some', spikes=True),
pressure_full_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'cpu', 'full', spikes=True),
total=int(qemu_guest.cpus),
used=Guests.get_guest_rrd_data(proxmox_api, node, qemu_guest.vmid, qemu_guest.name, 'cpu', None),
pressure_some_percent=Guests.get_guest_rrd_data(proxmox_api, node, qemu_guest.vmid, qemu_guest.name, 'cpu', 'some'),
pressure_full_percent=Guests.get_guest_rrd_data(proxmox_api, node, qemu_guest.vmid, qemu_guest.name, 'cpu', 'full'),
pressure_some_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, qemu_guest.vmid, qemu_guest.name, 'cpu', 'some', spikes=True),
pressure_full_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, qemu_guest.vmid, qemu_guest.name, 'cpu', 'full', spikes=True),
pressure_hot=False,
),
disk=ProxLbData.Guest.Metric(
total=guest['maxdisk'],
used=guest['disk'],
pressure_some_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'disk', 'some'),
pressure_full_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'disk', 'full'),
pressure_some_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'disk', 'some', spikes=True),
pressure_full_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'disk', 'full', spikes=True),
total=qemu_guest.maxdisk,
used=qemu_guest.disk,
pressure_some_percent=Guests.get_guest_rrd_data(proxmox_api, node, qemu_guest.vmid, qemu_guest.name, 'disk', 'some'),
pressure_full_percent=Guests.get_guest_rrd_data(proxmox_api, node, qemu_guest.vmid, qemu_guest.name, 'disk', 'full'),
pressure_some_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, qemu_guest.vmid, qemu_guest.name, 'disk', 'some', spikes=True),
pressure_full_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, qemu_guest.vmid, qemu_guest.name, 'disk', 'full', spikes=True),
pressure_hot=False,
),
memory=ProxLbData.Guest.Metric(
total=guest['maxmem'],
used=guest['mem'],
pressure_some_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'memory', 'some'),
pressure_full_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'memory', 'full'),
pressure_some_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'memory', 'some', spikes=True),
pressure_full_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'memory', 'full', spikes=True),
total=qemu_guest.maxmem,
used=qemu_guest.mem,
pressure_some_percent=Guests.get_guest_rrd_data(proxmox_api, node, qemu_guest.vmid, qemu_guest.name, 'memory', 'some'),
pressure_full_percent=Guests.get_guest_rrd_data(proxmox_api, node, qemu_guest.vmid, qemu_guest.name, 'memory', 'full'),
pressure_some_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, qemu_guest.vmid, qemu_guest.name, 'memory', 'some', spikes=True),
pressure_full_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, qemu_guest.vmid, qemu_guest.name, 'memory', 'full', spikes=True),
pressure_hot=False,
),
id=guest['vmid'],
id=qemu_guest.vmid,
node_current=node,
node_target=node,
processed=False,
Expand All @@ -123,50 +129,56 @@ def get_guests(proxmox_api: ProxmoxApi, pools: Dict[str, ProxLbData.Pool], ha_ru
type=GuestType.Vm,
)

logger.debug(f"Resources of Guest {guest['name']} (type VM) added: {guests[guest['name']]}")
logger.debug(f"Resources of Guest {qemu_guest.name} (type VM) added: {guests[qemu_guest.name]}")
else:
logger.debug(f'Metric for VM {guest["name"]} ignored because VM is not running.')
logger.debug(f'Metric for VM {qemu_guest.name} ignored because VM is not running.')

# CT objects: Iterate over all VMs on the current node by the lxc API object.
# Unlike the nodes we need to keep them even when being ignored to create proper
# resource metrics for rebalancing to ensure that we do not overprovisiong the node.
for guest in proxmox_api.nodes(node).lxc.get():
if guest['status'] == 'running':
for lxc_guest in proxmox_api.nodes(node).lxc.get.model():
assert lxc_guest.cpus is not None
assert lxc_guest.disk is not None
assert lxc_guest.maxdisk is not None
assert lxc_guest.maxmem is not None
assert lxc_guest.mem is not None
assert lxc_guest.name is not None
if lxc_guest.status == 'running':

guest_tags = Tags.get_tags_from_guests(proxmox_api, node, guest['vmid'], GuestType.Ct)
guest_pools = Pools.get_pools_for_guest(guest['name'], pools)
guest_ha_rules = HaRules.get_ha_rules_for_guest(guest['name'], ha_rules, guest['vmid'])
guest_tags = Tags.get_tags_from_guests(proxmox_api, node, lxc_guest.vmid, GuestType.Ct)
guest_pools = Pools.get_pools_for_guest(lxc_guest.name, pools)
guest_ha_rules = HaRules.get_ha_rules_for_guest(lxc_guest.name, ha_rules, lxc_guest.vmid)

guests[guest['name']] = ProxLbData.Guest(
guests[lxc_guest.name] = ProxLbData.Guest(
cpu=ProxLbData.Guest.Metric(
total=int(guest['cpus']),
used=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'cpu', None),
pressure_some_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'cpu', 'some'),
pressure_full_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'cpu', 'full'),
pressure_some_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'cpu', 'some', spikes=True),
pressure_full_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'cpu', 'full', spikes=True),
total=int(lxc_guest.cpus),
used=Guests.get_guest_rrd_data(proxmox_api, node, lxc_guest.vmid, lxc_guest.name, 'cpu', None),
pressure_some_percent=Guests.get_guest_rrd_data(proxmox_api, node, lxc_guest.vmid, lxc_guest.name, 'cpu', 'some'),
pressure_full_percent=Guests.get_guest_rrd_data(proxmox_api, node, lxc_guest.vmid, lxc_guest.name, 'cpu', 'full'),
pressure_some_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, lxc_guest.vmid, lxc_guest.name, 'cpu', 'some', spikes=True),
pressure_full_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, lxc_guest.vmid, lxc_guest.name, 'cpu', 'full', spikes=True),
pressure_hot=False,
),
disk=ProxLbData.Guest.Metric(
total=guest['maxdisk'],
used=guest['disk'],
pressure_some_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'disk', 'some'),
pressure_full_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'disk', 'full'),
pressure_some_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'disk', 'some', spikes=True),
pressure_full_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'disk', 'full', spikes=True),
total=lxc_guest.maxdisk,
used=lxc_guest.disk,
pressure_some_percent=Guests.get_guest_rrd_data(proxmox_api, node, lxc_guest.vmid, lxc_guest.name, 'disk', 'some'),
pressure_full_percent=Guests.get_guest_rrd_data(proxmox_api, node, lxc_guest.vmid, lxc_guest.name, 'disk', 'full'),
pressure_some_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, lxc_guest.vmid, lxc_guest.name, 'disk', 'some', spikes=True),
pressure_full_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, lxc_guest.vmid, lxc_guest.name, 'disk', 'full', spikes=True),
pressure_hot=False,
),
memory=ProxLbData.Guest.Metric(
total=guest['maxmem'],
used=guest['mem'],
pressure_some_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'memory', 'some'),
pressure_full_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'memory', 'full'),
pressure_some_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'memory', 'some', spikes=True),
pressure_full_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, guest['vmid'], guest['name'], 'memory', 'full', spikes=True),
total=lxc_guest.maxmem,
used=lxc_guest.mem,
pressure_some_percent=Guests.get_guest_rrd_data(proxmox_api, node, lxc_guest.vmid, lxc_guest.name, 'memory', 'some'),
pressure_full_percent=Guests.get_guest_rrd_data(proxmox_api, node, lxc_guest.vmid, lxc_guest.name, 'memory', 'full'),
pressure_some_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, lxc_guest.vmid, lxc_guest.name, 'memory', 'some', spikes=True),
pressure_full_spikes_percent=Guests.get_guest_rrd_data(proxmox_api, node, lxc_guest.vmid, lxc_guest.name, 'memory', 'full', spikes=True),
pressure_hot=False,
),
name=guest['name'],
id=guest['vmid'],
name=lxc_guest.name,
id=lxc_guest.vmid,
node_current=node,
node_target=node,
processed=False,
Expand All @@ -182,9 +194,9 @@ def get_guests(proxmox_api: ProxmoxApi, pools: Dict[str, ProxLbData.Pool], ha_ru
type=GuestType.Ct,
)

logger.debug(f"Resources of Guest {guest['name']} (type CT) added: {guests[guest['name']]}")
logger.debug(f"Resources of Guest {lxc_guest.name} (type CT) added: {guests[lxc_guest.name]}")
else:
logger.debug(f'Metric for CT {guest["name"]} ignored because CT is not running.')
logger.debug(f'Metric for CT {lxc_guest.name} ignored because CT is not running.')

logger.debug("Finished: get_guests.")
return guests
Expand Down Expand Up @@ -228,7 +240,7 @@ def get_guest_rrd_data(proxmox_api: ProxmoxApi, node_name: str, vm_id: int, vm_n
# RRD data is collected every minute, so we look at the last 6 entries
# and take the maximum value to represent the spike
logger.debug(f"Getting RRD data (spike: {spikes}) of pressure for {object_name} {object_type} from guest: {vm_name}.")
_rrd_data_value = [row.get(lookup_key) for row in guest_data_rrd if row.get(lookup_key) is not None]
_rrd_data_value = [row[lookup_key] for row in guest_data_rrd if lookup_key in row and row[lookup_key] is not None]
rrd_data_value = max(_rrd_data_value[-6:], default=0.0)
else:
# Calculate the average value from the RRD data entries
Expand Down
Loading