aboutsummaryrefslogtreecommitdiff
path: root/cogs/alerts.py
diff options
context:
space:
mode:
Diffstat (limited to 'cogs/alerts.py')
-rw-r--r--cogs/alerts.py208
1 files changed, 208 insertions, 0 deletions
diff --git a/cogs/alerts.py b/cogs/alerts.py
new file mode 100644
index 0000000..9e2c71c
--- /dev/null
+++ b/cogs/alerts.py
@@ -0,0 +1,208 @@
+import discord
+from discord.ext import commands, tasks
+from typing import Dict, Set
+from proxmoxer import ProxmoxAPI
+import config
+
+
+class Alerts(commands.Cog):
+ """Background monitoring and DM alerts."""
+
+ def __init__(self, bot: commands.Bot):
+ self.bot = bot
+ # Track alert states to prevent spam
+ self._cpu_alert_active = False
+ self._memory_alert_active = False
+ self._stopped_vms: Set[int] = set() # VMIDs that were reported as stopped
+ self._last_known_running: Set[int] = set() # VMIDs that were running last check
+ self._seen_backup_tasks: Set[str] = set() # UPIDs of already-notified backup tasks
+
+ def get_proxmox(self) -> ProxmoxAPI:
+ """Get a Proxmox API connection."""
+ return ProxmoxAPI(
+ config.PROXMOX_HOST,
+ user=config.PROXMOX_USER,
+ token_name=config.PROXMOX_TOKEN_NAME,
+ token_value=config.PROXMOX_TOKEN_VALUE,
+ verify_ssl=config.PROXMOX_VERIFY_SSL,
+ )
+
+ async def cog_load(self):
+ """Called when cog is loaded."""
+ self.monitor_loop.start()
+
+ async def cog_unload(self):
+ """Called when cog is unloaded."""
+ self.monitor_loop.cancel()
+
+ async def _send_alert(self, message: str, embed: discord.Embed = None):
+ """Send an alert DM to the owner."""
+ try:
+ owner = await self.bot.fetch_user(config.OWNER_ID)
+ if embed:
+ await owner.send(content=message, embed=embed)
+ else:
+ await owner.send(message)
+ except Exception as e:
+ print(f"Failed to send alert: {e}")
+
+ @tasks.loop(seconds=60)
+ async def monitor_loop(self):
+ """Background task that checks system status."""
+ try:
+ proxmox = self.get_proxmox()
+ await self._check_node_resources(proxmox)
+ await self._check_vm_status(proxmox)
+ await self._check_backup_tasks(proxmox)
+ except Exception as e:
+ print(f"Monitor loop error: {e}")
+
+ @monitor_loop.before_loop
+ async def before_monitor_loop(self):
+ """Wait for bot to be ready before starting loop."""
+ await self.bot.wait_until_ready()
+ # Initialize known running VMs
+ try:
+ proxmox = self.get_proxmox()
+ self._last_known_running = await self._get_running_vmids(proxmox)
+ except Exception:
+ pass
+
+ async def _get_running_vmids(self, proxmox: ProxmoxAPI) -> Set[int]:
+ """Get set of all running VM/LXC IDs."""
+ running = set()
+ try:
+ vms = proxmox.nodes(config.PROXMOX_NODE).qemu.get()
+ for vm in vms:
+ if vm.get("status") == "running":
+ running.add(vm.get("vmid"))
+ except Exception:
+ pass
+
+ try:
+ lxcs = proxmox.nodes(config.PROXMOX_NODE).lxc.get()
+ for lxc in lxcs:
+ if lxc.get("status") == "running":
+ running.add(lxc.get("vmid"))
+ except Exception:
+ pass
+
+ return running
+
+ async def _check_node_resources(self, proxmox: ProxmoxAPI):
+ """Check CPU and memory usage, alert if above threshold."""
+ try:
+ node_status = proxmox.nodes(config.PROXMOX_NODE).status.get()
+
+ # CPU check
+ cpu_percent = node_status["cpu"] * 100
+ if cpu_percent >= config.ALERT_CPU_THRESHOLD:
+ if not self._cpu_alert_active:
+ self._cpu_alert_active = True
+ embed = discord.Embed(
+ title=":warning: High CPU Usage Alert",
+ description=f"CPU usage on **{config.PROXMOX_NODE}** is at **{cpu_percent:.1f}%**",
+ color=discord.Color.orange(),
+ )
+ await self._send_alert("", embed=embed)
+ else:
+ self._cpu_alert_active = False
+
+ # Memory check
+ mem_used = node_status["memory"]["used"]
+ mem_total = node_status["memory"]["total"]
+ mem_percent = (mem_used / mem_total) * 100 if mem_total > 0 else 0
+
+ if mem_percent >= config.ALERT_MEMORY_THRESHOLD:
+ if not self._memory_alert_active:
+ self._memory_alert_active = True
+ embed = discord.Embed(
+ title=":warning: High Memory Usage Alert",
+ description=f"Memory usage on **{config.PROXMOX_NODE}** is at **{mem_percent:.1f}%**\n"
+ f"({mem_used / (1024**3):.1f} GB / {mem_total / (1024**3):.1f} GB)",
+ color=discord.Color.orange(),
+ )
+ await self._send_alert("", embed=embed)
+ else:
+ self._memory_alert_active = False
+
+ except Exception as e:
+ print(f"Resource check error: {e}")
+
+ async def _check_vm_status(self, proxmox: ProxmoxAPI):
+ """Check for VMs/LXCs that have stopped unexpectedly."""
+ try:
+ current_running = await self._get_running_vmids(proxmox)
+
+ # Find VMs that were running but are now stopped
+ newly_stopped = self._last_known_running - current_running
+
+ for vmid in newly_stopped:
+ if vmid not in self._stopped_vms:
+ self._stopped_vms.add(vmid)
+ # Try to get VM name
+ vm_name = f"ID {vmid}"
+ try:
+ # Check if it's a QEMU VM
+ vms = proxmox.nodes(config.PROXMOX_NODE).qemu.get()
+ for vm in vms:
+ if vm.get("vmid") == vmid:
+ vm_name = vm.get("name", vm_name)
+ break
+ else:
+ # Check LXC
+ lxcs = proxmox.nodes(config.PROXMOX_NODE).lxc.get()
+ for lxc in lxcs:
+ if lxc.get("vmid") == vmid:
+ vm_name = lxc.get("name", vm_name)
+ break
+ except Exception:
+ pass
+
+ embed = discord.Embed(
+ title=":red_circle: VM/Container Stopped",
+ description=f"**{vm_name}** (VMID: {vmid}) has stopped running.",
+ color=discord.Color.red(),
+ )
+ await self._send_alert("", embed=embed)
+
+ # Clear stopped alerts for VMs that are running again
+ self._stopped_vms = self._stopped_vms - current_running
+
+ # Update last known state
+ self._last_known_running = current_running
+
+ except Exception as e:
+ print(f"VM status check error: {e}")
+
+ async def _check_backup_tasks(self, proxmox: ProxmoxAPI):
+ """Check for completed backup tasks."""
+ try:
+ tasks = proxmox.nodes(config.PROXMOX_NODE).tasks.get()
+
+ for task in tasks:
+ # Look for vzdump (backup) tasks
+ if task.get("type") == "vzdump" and task.get("status") == "OK":
+ upid = task.get("upid")
+ if upid and upid not in self._seen_backup_tasks:
+ self._seen_backup_tasks.add(upid)
+ # Keep set from growing too large
+ if len(self._seen_backup_tasks) > 100:
+ self._seen_backup_tasks = set(list(self._seen_backup_tasks)[-50:])
+
+ embed = discord.Embed(
+ title=":white_check_mark: Backup Completed",
+ description=f"Backup task completed successfully.\n"
+ f"**Node:** {task.get('node', 'unknown')}\n"
+ f"**Started:** {task.get('starttime', 'unknown')}\n"
+ f"**Ended:** {task.get('endtime', 'unknown')}",
+ color=discord.Color.green(),
+ )
+ await self._send_alert("", embed=embed)
+
+ except Exception as e:
+ print(f"Backup check error: {e}")
+
+
+async def setup(bot: commands.Bot):
+ await bot.add_cog(Alerts(bot))