import discord from discord.ext import commands, tasks from typing import Dict, Set from proxmoxer import ProxmoxAPI import config class Alerts(commands.Cog): """Background monitoring and DM alerts.""" def __init__(self, bot: commands.Bot): self.bot = bot # Track alert states to prevent spam self._cpu_alert_active = False self._memory_alert_active = False self._stopped_vms: Set[int] = set() # VMIDs that were reported as stopped self._last_known_running: Set[int] = set() # VMIDs that were running last check self._seen_backup_tasks: Set[str] = set() # UPIDs of already-notified backup tasks def get_proxmox(self) -> ProxmoxAPI: """Get a Proxmox API connection.""" return ProxmoxAPI( config.PROXMOX_HOST, user=config.PROXMOX_USER, token_name=config.PROXMOX_TOKEN_NAME, token_value=config.PROXMOX_TOKEN_VALUE, verify_ssl=config.PROXMOX_VERIFY_SSL, ) async def cog_load(self): """Called when cog is loaded.""" self.monitor_loop.start() async def cog_unload(self): """Called when cog is unloaded.""" self.monitor_loop.cancel() async def _send_alert(self, message: str, embed: discord.Embed = None): """Send an alert DM to the owner.""" try: owner = await self.bot.fetch_user(config.OWNER_ID) if embed: await owner.send(content=message, embed=embed) else: await owner.send(message) except Exception as e: print(f"Failed to send alert: {e}") @tasks.loop(seconds=60) async def monitor_loop(self): """Background task that checks system status.""" try: proxmox = self.get_proxmox() await self._check_node_resources(proxmox) await self._check_vm_status(proxmox) await self._check_backup_tasks(proxmox) except Exception as e: print(f"Monitor loop error: {e}") @monitor_loop.before_loop async def before_monitor_loop(self): """Wait for bot to be ready before starting loop.""" await self.bot.wait_until_ready() # Initialize known running VMs try: proxmox = self.get_proxmox() self._last_known_running = await self._get_running_vmids(proxmox) except Exception: pass async def _get_running_vmids(self, proxmox: ProxmoxAPI) -> Set[int]: """Get set of all running VM/LXC IDs.""" running = set() try: vms = proxmox.nodes(config.PROXMOX_NODE).qemu.get() for vm in vms: if vm.get("status") == "running": running.add(vm.get("vmid")) except Exception: pass try: lxcs = proxmox.nodes(config.PROXMOX_NODE).lxc.get() for lxc in lxcs: if lxc.get("status") == "running": running.add(lxc.get("vmid")) except Exception: pass return running async def _check_node_resources(self, proxmox: ProxmoxAPI): """Check CPU and memory usage, alert if above threshold.""" try: node_status = proxmox.nodes(config.PROXMOX_NODE).status.get() # CPU check cpu_percent = node_status["cpu"] * 100 if cpu_percent >= config.ALERT_CPU_THRESHOLD: if not self._cpu_alert_active: self._cpu_alert_active = True embed = discord.Embed( title=":warning: High CPU Usage Alert", description=f"CPU usage on **{config.PROXMOX_NODE}** is at **{cpu_percent:.1f}%**", color=discord.Color.orange(), ) await self._send_alert("", embed=embed) else: self._cpu_alert_active = False # Memory check mem_used = node_status["memory"]["used"] mem_total = node_status["memory"]["total"] mem_percent = (mem_used / mem_total) * 100 if mem_total > 0 else 0 if mem_percent >= config.ALERT_MEMORY_THRESHOLD: if not self._memory_alert_active: self._memory_alert_active = True embed = discord.Embed( title=":warning: High Memory Usage Alert", description=f"Memory usage on **{config.PROXMOX_NODE}** is at **{mem_percent:.1f}%**\n" f"({mem_used / (1024**3):.1f} GB / {mem_total / (1024**3):.1f} GB)", color=discord.Color.orange(), ) await self._send_alert("", embed=embed) else: self._memory_alert_active = False except Exception as e: print(f"Resource check error: {e}") async def _check_vm_status(self, proxmox: ProxmoxAPI): """Check for VMs/LXCs that have stopped unexpectedly.""" try: current_running = await self._get_running_vmids(proxmox) # Find VMs that were running but are now stopped newly_stopped = self._last_known_running - current_running for vmid in newly_stopped: if vmid not in self._stopped_vms: self._stopped_vms.add(vmid) # Try to get VM name vm_name = f"ID {vmid}" try: # Check if it's a QEMU VM vms = proxmox.nodes(config.PROXMOX_NODE).qemu.get() for vm in vms: if vm.get("vmid") == vmid: vm_name = vm.get("name", vm_name) break else: # Check LXC lxcs = proxmox.nodes(config.PROXMOX_NODE).lxc.get() for lxc in lxcs: if lxc.get("vmid") == vmid: vm_name = lxc.get("name", vm_name) break except Exception: pass embed = discord.Embed( title=":red_circle: VM/Container Stopped", description=f"**{vm_name}** (VMID: {vmid}) has stopped running.", color=discord.Color.red(), ) await self._send_alert("", embed=embed) # Clear stopped alerts for VMs that are running again self._stopped_vms = self._stopped_vms - current_running # Update last known state self._last_known_running = current_running except Exception as e: print(f"VM status check error: {e}") async def _check_backup_tasks(self, proxmox: ProxmoxAPI): """Check for completed backup tasks.""" try: tasks = proxmox.nodes(config.PROXMOX_NODE).tasks.get() for task in tasks: # Look for vzdump (backup) tasks if task.get("type") == "vzdump" and task.get("status") == "OK": upid = task.get("upid") if upid and upid not in self._seen_backup_tasks: self._seen_backup_tasks.add(upid) # Keep set from growing too large if len(self._seen_backup_tasks) > 100: self._seen_backup_tasks = set(list(self._seen_backup_tasks)[-50:]) embed = discord.Embed( title=":white_check_mark: Backup Completed", description=f"Backup task completed successfully.\n" f"**Node:** {task.get('node', 'unknown')}\n" f"**Started:** {task.get('starttime', 'unknown')}\n" f"**Ended:** {task.get('endtime', 'unknown')}", color=discord.Color.green(), ) await self._send_alert("", embed=embed) except Exception as e: print(f"Backup check error: {e}") async def setup(bot: commands.Bot): await bot.add_cog(Alerts(bot))