1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
|
import discord
from discord.ext import commands, tasks
from typing import Dict, Set
from proxmoxer import ProxmoxAPI
import config
class Alerts(commands.Cog):
"""Background monitoring and DM alerts."""
def __init__(self, bot: commands.Bot):
self.bot = bot
# Track alert states to prevent spam
self._cpu_alert_active = False
self._memory_alert_active = False
self._stopped_vms: Set[int] = set() # VMIDs that were reported as stopped
self._last_known_running: Set[int] = set() # VMIDs that were running last check
self._seen_backup_tasks: Set[str] = set() # UPIDs of already-notified backup tasks
def get_proxmox(self) -> ProxmoxAPI:
"""Get a Proxmox API connection."""
return ProxmoxAPI(
config.PROXMOX_HOST,
user=config.PROXMOX_USER,
token_name=config.PROXMOX_TOKEN_NAME,
token_value=config.PROXMOX_TOKEN_VALUE,
verify_ssl=config.PROXMOX_VERIFY_SSL,
)
async def cog_load(self):
"""Called when cog is loaded."""
self.monitor_loop.start()
async def cog_unload(self):
"""Called when cog is unloaded."""
self.monitor_loop.cancel()
async def _send_alert(self, message: str, embed: discord.Embed = None):
"""Send an alert DM to the owner."""
try:
owner = await self.bot.fetch_user(config.OWNER_ID)
if embed:
await owner.send(content=message, embed=embed)
else:
await owner.send(message)
except Exception as e:
print(f"Failed to send alert: {e}")
@tasks.loop(seconds=60)
async def monitor_loop(self):
"""Background task that checks system status."""
try:
proxmox = self.get_proxmox()
await self._check_node_resources(proxmox)
await self._check_vm_status(proxmox)
await self._check_backup_tasks(proxmox)
except Exception as e:
print(f"Monitor loop error: {e}")
@monitor_loop.before_loop
async def before_monitor_loop(self):
"""Wait for bot to be ready before starting loop."""
await self.bot.wait_until_ready()
# Initialize known running VMs
try:
proxmox = self.get_proxmox()
self._last_known_running = await self._get_running_vmids(proxmox)
except Exception:
pass
async def _get_running_vmids(self, proxmox: ProxmoxAPI) -> Set[int]:
"""Get set of all running VM/LXC IDs."""
running = set()
try:
vms = proxmox.nodes(config.PROXMOX_NODE).qemu.get()
for vm in vms:
if vm.get("status") == "running":
running.add(vm.get("vmid"))
except Exception:
pass
try:
lxcs = proxmox.nodes(config.PROXMOX_NODE).lxc.get()
for lxc in lxcs:
if lxc.get("status") == "running":
running.add(lxc.get("vmid"))
except Exception:
pass
return running
async def _check_node_resources(self, proxmox: ProxmoxAPI):
"""Check CPU and memory usage, alert if above threshold."""
try:
node_status = proxmox.nodes(config.PROXMOX_NODE).status.get()
# CPU check
cpu_percent = node_status["cpu"] * 100
if cpu_percent >= config.ALERT_CPU_THRESHOLD:
if not self._cpu_alert_active:
self._cpu_alert_active = True
embed = discord.Embed(
title=":warning: High CPU Usage Alert",
description=f"CPU usage on **{config.PROXMOX_NODE}** is at **{cpu_percent:.1f}%**",
color=discord.Color.orange(),
)
await self._send_alert("", embed=embed)
else:
self._cpu_alert_active = False
# Memory check
mem_used = node_status["memory"]["used"]
mem_total = node_status["memory"]["total"]
mem_percent = (mem_used / mem_total) * 100 if mem_total > 0 else 0
if mem_percent >= config.ALERT_MEMORY_THRESHOLD:
if not self._memory_alert_active:
self._memory_alert_active = True
embed = discord.Embed(
title=":warning: High Memory Usage Alert",
description=f"Memory usage on **{config.PROXMOX_NODE}** is at **{mem_percent:.1f}%**\n"
f"({mem_used / (1024**3):.1f} GB / {mem_total / (1024**3):.1f} GB)",
color=discord.Color.orange(),
)
await self._send_alert("", embed=embed)
else:
self._memory_alert_active = False
except Exception as e:
print(f"Resource check error: {e}")
async def _check_vm_status(self, proxmox: ProxmoxAPI):
"""Check for VMs/LXCs that have stopped unexpectedly."""
try:
current_running = await self._get_running_vmids(proxmox)
# Find VMs that were running but are now stopped
newly_stopped = self._last_known_running - current_running
for vmid in newly_stopped:
if vmid not in self._stopped_vms:
self._stopped_vms.add(vmid)
# Try to get VM name
vm_name = f"ID {vmid}"
try:
# Check if it's a QEMU VM
vms = proxmox.nodes(config.PROXMOX_NODE).qemu.get()
for vm in vms:
if vm.get("vmid") == vmid:
vm_name = vm.get("name", vm_name)
break
else:
# Check LXC
lxcs = proxmox.nodes(config.PROXMOX_NODE).lxc.get()
for lxc in lxcs:
if lxc.get("vmid") == vmid:
vm_name = lxc.get("name", vm_name)
break
except Exception:
pass
embed = discord.Embed(
title=":red_circle: VM/Container Stopped",
description=f"**{vm_name}** (VMID: {vmid}) has stopped running.",
color=discord.Color.red(),
)
await self._send_alert("", embed=embed)
# Clear stopped alerts for VMs that are running again
self._stopped_vms = self._stopped_vms - current_running
# Update last known state
self._last_known_running = current_running
except Exception as e:
print(f"VM status check error: {e}")
async def _check_backup_tasks(self, proxmox: ProxmoxAPI):
"""Check for completed backup tasks."""
try:
tasks = proxmox.nodes(config.PROXMOX_NODE).tasks.get()
for task in tasks:
# Look for vzdump (backup) tasks
if task.get("type") == "vzdump" and task.get("status") == "OK":
upid = task.get("upid")
if upid and upid not in self._seen_backup_tasks:
self._seen_backup_tasks.add(upid)
# Keep set from growing too large
if len(self._seen_backup_tasks) > 100:
self._seen_backup_tasks = set(list(self._seen_backup_tasks)[-50:])
embed = discord.Embed(
title=":white_check_mark: Backup Completed",
description=f"Backup task completed successfully.\n"
f"**Node:** {task.get('node', 'unknown')}\n"
f"**Started:** {task.get('starttime', 'unknown')}\n"
f"**Ended:** {task.get('endtime', 'unknown')}",
color=discord.Color.green(),
)
await self._send_alert("", embed=embed)
except Exception as e:
print(f"Backup check error: {e}")
async def setup(bot: commands.Bot):
await bot.add_cog(Alerts(bot))
|