Compare commits

..

14 Commits
master ... dev

Author SHA1 Message Date
608cc9253d Reset dev deploy behavior
All checks were successful
Deploy Stacks / deploy-prod (push) Has been skipped
Deploy Stacks / deploy-dev (push) Successful in 1s
2025-12-31 20:14:24 -05:00
3357d96cb0 Trigger dev full deploy
All checks were successful
Deploy Stacks / deploy-prod (push) Has been skipped
Deploy Stacks / deploy-dev (push) Successful in 1s
2025-12-31 20:12:34 -05:00
aeac252491 Deploy all stacks on dev
Some checks failed
Deploy Stacks / deploy-prod (push) Has been skipped
Deploy Stacks / deploy-dev (push) Failing after 17s
2025-12-31 20:12:13 -05:00
13989e2b59 Add dev stacks 2025-12-31 20:11:44 -05:00
0bcfed8fb8 Match dev runner label
All checks were successful
Deploy Stacks / deploy-prod (push) Has been skipped
Deploy Stacks / deploy-dev (push) Successful in 2s
2025-12-31 20:07:59 -05:00
cb78589dd7 Use host runner label for dev
Some checks failed
Deploy Stacks / deploy-prod (push) Has been skipped
Deploy Stacks / deploy-dev (push) Has been cancelled
2025-12-31 19:58:17 -05:00
9d60438d05 Use ubuntu-dev runner label 2025-12-31 19:44:16 -05:00
0f97410583 Trigger dev deploy 4
Some checks failed
Deploy Stacks / deploy-prod (push) Has been skipped
Deploy Stacks / deploy-dev (push) Has been cancelled
2025-12-31 19:36:48 -05:00
9f7d3583ec Trigger dev deploy 3
Some checks failed
Deploy Stacks / deploy-prod (push) Has been skipped
Deploy Stacks / deploy-dev (push) Has been cancelled
2025-12-31 19:35:03 -05:00
c72df103fd Trigger dev deploy 2
Some checks failed
Deploy Stacks / deploy-prod (push) Has been skipped
Deploy Stacks / deploy-dev (push) Has been cancelled
2025-12-31 19:33:36 -05:00
68d48339b9 Trigger dev deploy
Some checks failed
Deploy Stacks / deploy-prod (push) Has been skipped
Deploy Stacks / deploy-dev (push) Has been cancelled
2025-12-31 19:32:22 -05:00
3c65cb903a Run dev deploys on host
Some checks failed
Deploy Stacks / deploy-prod (push) Has been skipped
Deploy Stacks / deploy-dev (push) Has been cancelled
2025-12-31 19:30:03 -05:00
150ec1f575 Use sudo in deploy workflow
Some checks failed
Deploy Stacks / deploy-prod (push) Has been skipped
Deploy Stacks / deploy-dev (push) Failing after 0s
2025-12-31 19:28:34 -05:00
c04d3a5842 Add dev runner deploy target
Some checks failed
Deploy Stacks / deploy-prod (push) Has been skipped
Deploy Stacks / deploy-dev (push) Failing after 6s
2025-12-31 19:26:17 -05:00
50 changed files with 4949 additions and 1 deletions

View File

@ -63,7 +63,7 @@ jobs:
deploy-dev:
if: ${{ github.ref == 'refs/heads/dev' }}
runs-on: ubuntu-dev:host
runs-on: ubuntu-dev
steps:
- uses: actions/checkout@v4
with:

View File

@ -0,0 +1,8 @@
MESHTASTIC_NODE_IP=192.168.5.242
ALLOWED_ORIGINS=http://docker-dev:8383,https://meshmon.ghost.tel
RATE_LIMIT_API=20000
NODE_ENV=production
SESSION_SECRET=change-me
TRUST_PROXY=true
COOKIE_SECURE=true
DISABLE_ANONYMOUS=true

View File

@ -0,0 +1,14 @@
services:
meshmonitor:
image: ghcr.io/yeraze/meshmonitor:latest
container_name: meshmonitor
ports:
- "8383:3001"
restart: unless-stopped
volumes:
- meshmonitor-data:/data
env_file:
- .env
volumes:
meshmonitor-data:
driver: local

View File

@ -0,0 +1,7 @@
services:
meshtastic-web:
image: ghcr.io/meshtastic/web:latest
container_name: meshtastic-web
restart: unless-stopped
ports:
- "8585:8080"

View File

@ -0,0 +1,7 @@
.venv
__pycache__/
*.pyc
.git
.gitignore
.env
tmp/

View File

@ -0,0 +1,14 @@
OPENROUTER_API_KEY=
OPENROUTER_MODEL=openai/gpt-5.2-codex-max
TRIAGE_ENABLE_COMMANDS=1
TRIAGE_COMMAND_RUNNER=local
TRIAGE_VERBOSE_LOGS=1
TRIAGE_EMAIL_ENABLED=1
TRIAGE_EMAIL_FROM=alertai@example.com
TRIAGE_EMAIL_TO=admin@example.com
TRIAGE_SMTP_HOST=smtp.example.com
TRIAGE_SMTP_PORT=465
TRIAGE_SMTP_USER=alertai@example.com
TRIAGE_SMTP_PASSWORD=
TRIAGE_SMTP_SSL=1
TRIAGE_SMTP_STARTTLS=0

View File

@ -0,0 +1,14 @@
OPENROUTER_API_KEY=
OPENROUTER_MODEL=openai/gpt-5.2-codex-max
TRIAGE_ENABLE_COMMANDS=1
TRIAGE_COMMAND_RUNNER=local
TRIAGE_VERBOSE_LOGS=1
TRIAGE_EMAIL_ENABLED=1
TRIAGE_EMAIL_FROM=alertai@example.com
TRIAGE_EMAIL_TO=admin@example.com
TRIAGE_SMTP_HOST=smtp.example.com
TRIAGE_SMTP_PORT=465
TRIAGE_SMTP_USER=alertai@example.com
TRIAGE_SMTP_PASSWORD=
TRIAGE_SMTP_SSL=1
TRIAGE_SMTP_STARTTLS=0

View File

@ -0,0 +1,20 @@
FROM python:3.11-slim
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1
WORKDIR /var/core/mlLogWatcher
RUN apt-get update && \
apt-get install -y --no-install-recommends openssh-client && \
rm -rf /var/lib/apt/lists/*
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY alert_runbook.yaml ./alert_runbook.yaml
COPY scripts ./scripts
EXPOSE 8081
CMD ["uvicorn", "scripts.grafana_alert_webhook:app", "--host", "0.0.0.0", "--port", "8081"]

120
stacks/mllogwatcher/README.md Executable file
View File

@ -0,0 +1,120 @@
# ML Log Watcher Utilities
This repository now contains two automation entry points that work together to
triage Elasticsearch logs and Grafana alerts with the help of OpenRouter-hosted
language models.
## 1. `scripts/log_monitor.py`
Existing script that queries Elasticsearch indices, pulls a recent window of
logs, and asks an LLM for anomaly highlights. Run it ad-hoc or schedule via
cron/systemd.
```
ELASTIC_HOST=https://casper.localdomain:9200 \
ELASTIC_API_KEY=... \
OPENROUTER_API_KEY=... \
python3 scripts/log_monitor.py --index 'log*' --minutes 30
```
## 2. `scripts/grafana_alert_webhook.py`
A FastAPI web server that accepts Grafana alert webhooks, finds the matching
entry in `alert_runbook.yaml`, renders the LLM prompt, and posts it to
OpenRouter. The response text is returned to Grafana (or any caller) immediately
so automation can fan out to chat, ticketing, etc.
### Dependencies
```
python3 -m venv .venv
.venv/bin/pip install fastapi uvicorn pyyaml requests langchain
```
### Environment
- `OPENROUTER_API_KEY` required.
- `OPENROUTER_MODEL` optional (default `openai/gpt-4o-mini`).
- `RUNBOOK_PATH` optional (default `alert_runbook.yaml` in repo root).
- `ANSIBLE_HOSTS_PATH` optional (default `/etc/ansible/hosts`). When set, the webhook auto-loads the Ansible inventory so alerts targeting known hosts inherit their SSH user/port/key information.
- `OPENROUTER_REFERER` / `OPENROUTER_TITLE` forwarded headers if needed.
- `TRIAGE_ENABLE_COMMANDS` set to `1` to let the webhook execute runbook commands (default `0` keeps it in read-only mode).
- `TRIAGE_COMMAND_RUNNER` `ssh` (default) or `local`. When using ssh, also set `TRIAGE_SSH_USER` and optional `TRIAGE_SSH_OPTIONS`.
- `TRIAGE_COMMAND_TIMEOUT`, `TRIAGE_MAX_COMMANDS`, `TRIAGE_OUTPUT_LIMIT`, `TRIAGE_DEFAULT_OS` tune execution behavior.
- `TRIAGE_VERBOSE_LOGS` set to `1` to stream the entire LLM dialogue, prompts, and command outputs to the webhook logs for debugging.
- `TRIAGE_EMAIL_ENABLED` when `1`, the webhook emails the final LLM summary per alert. Requires `TRIAGE_EMAIL_FROM`, `TRIAGE_EMAIL_TO` (comma-separated), `TRIAGE_SMTP_HOST`, and optional `TRIAGE_SMTP_PORT`, `TRIAGE_SMTP_USER`, `TRIAGE_SMTP_PASSWORD`, `TRIAGE_SMTP_STARTTLS`, `TRIAGE_SMTP_SSL`.
### Running
```
source .venv/bin/activate
export OPENROUTER_API_KEY=...
uvicorn scripts.grafana_alert_webhook:app --host 0.0.0.0 --port 8081
```
The server loads the runbook at startup and exposes:
- `POST /alerts` Grafana webhook target.
- `POST /reload-runbook` force runbook reload without restarting.
When `TRIAGE_ENABLE_COMMANDS=1`, the server executes the relevant triage commands
for each alert (via SSH or locally), captures stdout/stderr, and appends the
results to both the OpenRouter prompt and the HTTP response JSON. This lets you
automate evidence gathering directly from the runbook instructions. Use
environment variables to control which user/host the commands target and to
limit timeouts/output size. LangChain powers the multi-turn investigation flow:
the LLM can call the provided tools (`run_local_command`, `run_ssh_command`) to
gather additional evidence until its ready to deliver a final summary.
When `/etc/ansible/hosts` (or `ANSIBLE_HOSTS_PATH`) is available the server
automatically enriches the alert context with SSH metadata (user, host, port,
identity file, and common args) so runbook commands default to using SSH against
the alerting host instead of the webhook server.
### Running with Docker Compose
1. Copy `.env.example` to `.env` and fill in your OpenRouter key, email SMTP
settings, and other toggles.
2. Place any SSH keys the webhook needs inside `./.ssh/` (the compose file
mounts this directory read-only inside the container).
3. Run `docker compose up -d` to build and launch the webhook. It listens on
port `8081` by default and uses the mounted `alert_runbook.yaml` plus the
host `/etc/ansible/hosts`.
4. Use `docker compose logs -f` to watch verbose LangChain output or restart
with `docker compose restart` when updating the code/runbook.
### Sample payload
```
curl -X POST http://localhost:8081/alerts \
-H 'Content-Type: application/json' \
-d '{
"status":"firing",
"ruleUid":"edkmsdmlay2o0c",
"ruleUrl":"http://casper:3000/alerting/grafana/edkmsdmlay2o0c/view",
"alerts":[
{
"status":"firing",
"labels":{
"alertname":"High Mem.",
"host":"unit-02",
"rule_uid":"edkmsdmlay2o0c"
},
"annotations":{
"summary":"Memory usage above 95% for 10m",
"value":"96.2%"
},
"startsAt":"2025-09-22T17:20:00Z",
"endsAt":"0001-01-01T00:00:00Z"
}
]
}'
```
With a valid OpenRouter key this returns a JSON body containing the LLM summary
per alert plus any unmatched alerts (missing runbook entries or rule UIDs).
### Testing without OpenRouter
Set `OPENROUTER_API_KEY=dummy` and point the DNS entry to a mock (e.g. mitmproxy)
if you need to capture outbound requests. Otherwise, hits will fail fast with
HTTP 502 so Grafana knows the automation need to be retried.

View File

@ -0,0 +1,254 @@
# Grafana alert triage playbook for the HomeLab telemetry stack.
# Each entry contains the alert metadata, what the signal means,
# the evidence to capture automatically, and the manual / scripted steps.
metadata:
generated: "2025-09-22T00:00:00Z"
grafana_url: "http://casper:3000"
datasource: "InfluxDB telegraf (uid=P951FEA4DE68E13C5)"
llm_provider: "OpenRouter"
alerts:
- name: "Data Stale"
rule_uid: "fdk9orif6fytcf"
description: "No CPU usage_user metrics have arrived for non-unit hosts within 5 minutes."
signal:
metric: "cpu.usage_user"
condition: "count(host samples over 5m) < 1"
impact: "Host is no longer reporting to Telegraf/Influx -> monitoring blind spot."
evidence_to_collect:
- "Influx: `from(bucket:\"telegraf\") |> range(start:-10m) |> filter(fn:(r)=>r._measurement==\"cpu\" and r.host==\"{{ host }}\") |> count()`"
- "Telegraf log tail"
- "System journal for network/auth errors"
triage:
- summary: "Verify Telegraf agent health"
linux: "sudo systemctl status telegraf && sudo journalctl -u telegraf -n 100"
windows: "Get-Service telegraf; Get-Content 'C:\\Program Files\\telegraf\\telegraf.log' -Tail 100"
- summary: "Check connectivity from host to Influx (`casper:8086`)"
linux: "curl -sSf http://casper:8086/ping"
windows: "Invoke-WebRequest -UseBasicParsing http://casper:8086/ping"
- summary: "Confirm host clock drift <5s (important for Influx line protocol timestamps)"
linux: "chronyc tracking"
windows: "w32tm /query /status"
remediation:
- "Restart Telegraf after config validation: `sudo telegraf --test --config /etc/telegraf/telegraf.conf` then `sudo systemctl restart telegraf`."
- "Re-apply Ansible telemetry playbook if multiple hosts fail."
llm_prompt: >
Alert {{ alertname }} fired for {{ host }}. Telegraf stopped sending cpu.usage_user metrics. Given the collected logs and command output, identify root causes (agent down, auth failures, firewall, time skew) and list the next action.
- name: "High CPU"
rule_uid: "fdkms407ubmdcc"
description: "Mean CPU usage_system over the last 10 minutes exceeds 85%."
signal:
metric: "cpu.usage_system"
condition: "mean over 10m > 85%"
impact: "Host is near saturation; scheduler latency and queueing likely."
evidence_to_collect:
- "Top CPU processes snapshot (Linux: `ps -eo pid,cmd,%cpu --sort=-%cpu | head -n 15`; Windows: `Get-Process | Sort-Object CPU -Descending | Select -First 15`)"
- "Load vs CPU core count"
- "Recent deploys / cron jobs metadata"
triage:
- summary: "Confirm sustained CPU pressure"
linux: "uptime && mpstat 1 5"
windows: "typeperf \"\\Processor(_Total)\\% Processor Time\" -sc 15"
- summary: "Check offending processes/services"
linux: "sudo ps -eo pid,user,comm,%cpu,%mem --sort=-%cpu | head"
windows: "Get-Process | Sort-Object CPU -Descending | Select -First 10 Name,CPU"
- summary: "Inspect cgroup / VM constraints if on Proxmox"
linux: "sudo pct status {{ vmid }} && sudo pct config {{ vmid }}"
remediation:
- "Throttle or restart runaway service; scale workload or tune limits."
- "Consider moving noisy neighbors off shared hypervisor."
llm_prompt: >
High CPU alert for {{ host }}. Review process table, recent deploys, and virtualization context; determine why cpu.usage_system stayed above 85% and recommend mitigation.
- name: "High Mem."
rule_uid: "edkmsdmlay2o0c"
description: "Mean memory used_percent over 10 minutes > 95% (excluding hosts jhci/nerv*/magi*)."
signal:
metric: "mem.used_percent"
condition: "mean over 10m > 95%"
impact: "OOM risk and swap thrash."
evidence_to_collect:
- "Free/available memory snapshot"
- "Top consumers (Linux: `sudo smem -rt rss | head`; Windows: `Get-Process | Sort-Object WorkingSet -Descending`)"
- "Swap in/out metrics"
triage:
- summary: "Validate actual memory pressure"
linux: "free -m && vmstat -SM 5 5"
windows: "Get-Counter '\\Memory\\Available MBytes'"
- summary: "Identify leaking services"
linux: "sudo ps -eo pid,user,comm,%mem,rss --sort=-%mem | head"
windows: "Get-Process | Sort-Object WS -Descending | Select -First 10 ProcessName,WS"
- summary: "Check recent kernel/OOM logs"
linux: "sudo dmesg | tail -n 50"
windows: "Get-WinEvent -LogName System -MaxEvents 50 | ? { $_.Message -match 'memory' }"
remediation:
- "Restart or reconfigure offender; add swap as stop-gap; increase VM memory allocation."
llm_prompt: >
High Mem alert for {{ host }}. After reviewing free memory, swap activity, and top processes, explain the likely cause and propose remediation steps with priority.
- name: "High Disk IO"
rule_uid: "bdkmtaru7ru2od"
description: "Mean merged_reads/writes per second converted to GB/s exceeds 10."
signal:
metric: "diskio.merged_reads + merged_writes"
condition: "mean over 10m > 10 GB/s"
impact: "Storage controller saturated; latency spikes, possible backlog."
evidence_to_collect:
- "iostat extended output"
- "Process level IO (pidstat/nethogs equivalent)"
- "ZFS/MDADM status for relevant pools"
triage:
- summary: "Inspect device queues"
linux: "iostat -xzd 5 3"
windows: "Get-WmiObject -Class Win32_PerfFormattedData_PerfDisk_LogicalDisk | Format-Table Name,DiskWritesPersec,DiskReadsPersec,AvgDisksecPerTransfer"
- summary: "Correlate to filesystem / VM"
linux: "sudo lsof +D /mnt/critical -u {{ user }}"
- summary: "Check backup or replication windows"
linux: "journalctl -u pvebackup -n 50"
remediation:
- "Pause heavy jobs, move backups off-peak, evaluate faster storage tiers."
llm_prompt: >
High Disk IO on {{ host }}. With iostat/pidstat output provided, decide whether activity is expected (backup, scrub) or abnormal and list mitigations.
- name: "Low Uptime"
rule_uid: "ddkmuadxvkm4ge"
description: "System uptime converted to minutes is below 10 -> host rebooted recently."
signal:
metric: "system.uptime"
condition: "last uptime_minutes < 10"
impact: "Unexpected reboot or crash; may need RCA."
evidence_to_collect:
- "Boot reason logs"
- "Last patch/maintenance window from Ansible inventory"
- "Smart log excerpt for power events"
triage:
- summary: "Confirm uptime and reason"
linux: "uptime && last -x | head"
windows: "Get-WinEvent -LogName System -MaxEvents 50 | ? { $_.Id -in 41,6006,6008 }"
- summary: "Check kernel panic or watchdog traces"
linux: "sudo journalctl -k -b -1 | tail -n 200"
- summary: "Validate patch automation logs"
linux: "sudo tail -n 100 /var/log/ansible-pull.log"
remediation:
- "Schedule deeper diagnostics if crash; reschedule workloads once stable."
llm_prompt: >
Low Uptime alert: host restarted within 10 minutes. Inspect boot reason logs and recommend whether this is maintenance or a fault needing follow-up.
- name: "High Load"
rule_uid: "ddkmul9x8gcn4d"
description: "system.load5 > 6 for 5 minutes."
signal:
metric: "system.load5"
condition: "last value > 6"
impact: "Runnable queue more than CPU threads -> latency growth."
evidence_to_collect:
- "Load vs CPU count (`nproc`)"
- "Process states (D/R blocked tasks)"
- "IO wait percentage"
triage:
- summary: "Correlate load to CPU and IO"
linux: "uptime && vmstat 1 5"
- summary: "Identify stuck IO"
linux: "sudo pidstat -d 1 5"
- summary: "Check Proxmox scheduler for resource contention"
linux: "pveperf && qm list"
remediation:
- "Reduce cron concurrency, add CPU, or fix IO bottleneck causing runnable queue growth."
llm_prompt: >
High Load alert on {{ host }}. Based on vmstat/pidstat output, explain whether CPU saturation, IO wait, or runnable pile-up is at fault and propose actions.
- name: "High Network Traffic (Download)"
rule_uid: "cdkpct82a7g8wd"
description: "Derivative of bytes_recv > 50 MB/s on any interface over last hour."
signal:
metric: "net.bytes_recv"
condition: "mean download throughput > 50 MB/s"
impact: "Link saturation, potential DDoS or backup window."
evidence_to_collect:
- "Interface counters (Linux: `ip -s link show {{ iface }}`; Windows: `Get-NetAdapterStatistics`)"
- "Top talkers (Linux: `sudo nethogs {{ iface }}` or `iftop -i {{ iface }}`)"
- "Firewall/IDS logs"
triage:
- summary: "Confirm interface experiencing spike"
linux: "sar -n DEV 1 5 | grep {{ iface }}"
windows: "Get-Counter -Counter '\\Network Interface({{ iface }})\\Bytes Received/sec' -Continuous -SampleInterval 1 -MaxSamples 5"
- summary: "Identify process or remote peer"
linux: "sudo ss -ntu state established | sort -k4"
windows: "Get-NetTCPConnection | Sort-Object -Property LocalPort"
remediation:
- "Throttle offending transfers, move backup replication, verify no compromised service."
llm_prompt: >
High download throughput on {{ host }} interface {{ iface }}. Review interface counters and connection list to determine if traffic is expected and advise throttling or blocking steps.
- name: "High Network Traffic (Upload)"
rule_uid: "aec650pbtvzswa"
description: "Derivative of bytes_sent > 30 MB/s for an interface."
signal:
metric: "net.bytes_sent"
condition: "mean upload throughput > 30 MB/s"
impact: "Excess upstream usage; may saturate ISP uplink."
evidence_to_collect:
- "Interface statistics"
- "NetFlow sample if available (`/var/log/telegraf/netflow.log`)"
- "List of active transfers"
triage:
- summary: "Measure upload curve"
linux: "bmon -p {{ iface }} -o ascii"
windows: "Get-Counter '\\Network Interface({{ iface }})\\Bytes Sent/sec' -Continuous -SampleInterval 1 -MaxSamples 5"
- summary: "Find process generating traffic"
linux: "sudo iftop -i {{ iface }} -t -s 30"
windows: "Get-NetAdapterStatistics -Name {{ iface }}"
remediation:
- "Pause replication jobs, confirm backups not stuck, search for data exfiltration."
llm_prompt: >
High upload alert for {{ host }} interface {{ iface }}. Using captured traffic samples, determine whether replication/backup explains the pattern or if anomalous traffic needs blocking.
- name: "High Disk Usage"
rule_uid: "cdma6i5k2gem8d"
description: "Disk used_percent >= 95% for Linux devices (filters out unwanted devices)."
signal:
metric: "disk.used_percent"
condition: "last value > 95%"
impact: "Filesystem full -> service crashes or write failures."
evidence_to_collect:
- "`df -h` or `Get-Volume` output for device"
- "Largest directories snapshot (Linux: `sudo du -xhd1 /path`; Windows: `Get-ChildItem | Sort Length`)"
- "Recent deploy or backup expansion logs"
triage:
- summary: "Validate usage"
linux: "df -h {{ mountpoint }}"
windows: "Get-Volume -FileSystemLabel {{ volume }}"
- summary: "Identify growth trend"
linux: "sudo journalctl -u telegraf -g 'disk usage' -n 20"
- summary: "Check for stale docker volumes"
linux: "docker system df && docker volume ls"
remediation:
- "Prune temp artifacts, expand disk/VM, move logs to remote storage."
llm_prompt: >
High Disk Usage alert on {{ host }} device {{ device }}. Summarize what consumed the space and recommend reclaim or expansion actions with priority.
- name: "CPU Heartbeat"
rule_uid: "eec62gqn3oetcf"
description: "Counts cpu.usage_system samples per host; fires if <1 sample arrives within window."
signal:
metric: "cpu.usage_system"
condition: "sample count within 10m < 1"
impact: "Indicates host stopped reporting metrics entirely (telemetry silent)."
evidence_to_collect:
- "Influx query for recent cpu samples"
- "Telegraf service and logs"
- "Network reachability from host to casper"
triage:
- summary: "Check host alive and reachable"
linux: "ping -c 3 {{ host }} && ssh {{ host }} uptime"
windows: "Test-Connection {{ host }} -Count 3"
- summary: "Inspect Telegraf state"
linux: "sudo systemctl status telegraf && sudo tail -n 100 /var/log/telegraf/telegraf.log"
windows: "Get-Service telegraf; Get-EventLog -LogName Application -Newest 50 | ? { $_.Source -match 'Telegraf' }"
- summary: "Validate API key / Influx auth"
linux: "sudo grep -n 'outputs.influxdb' -n /etc/telegraf/telegraf.conf"
remediation:
- "Re-issue Telegraf credentials, run `ansible-playbook telemetry.yml -l {{ host }}`."
- "If host intentionally offline, silence alert via Grafana maintenance window."
llm_prompt: >
CPU Heartbeat for {{ host }} indicates telemetry silent. Use connectivity tests and Telegraf logs to determine if host is down or just metrics disabled; propose fixes.

View File

@ -0,0 +1,14 @@
version: "3.9"
services:
grafana-alert-webhook:
build: .
env_file:
- .env
ports:
- "8081:8081"
volumes:
- ./alert_runbook.yaml:/var/core/mlLogWatcher/alert_runbook.yaml:ro
- /etc/ansible/hosts:/etc/ansible/hosts:ro
- ./.ssh:/var/core/mlLogWatcher/.ssh:ro
restart: unless-stopped

View File

@ -0,0 +1,5 @@
fastapi==0.115.5
uvicorn[standard]==0.32.0
pyyaml==6.0.2
requests==2.32.3
langchain==0.2.15

View File

@ -0,0 +1,988 @@
#!/usr/bin/env python3
"""
Minimal FastAPI web server that accepts Grafana alert webhooks, looks up the
matching runbook entry, builds an LLM prompt, and calls OpenRouter to return a
triage summary.
Run with:
uvicorn scripts.grafana_alert_webhook:app --host 0.0.0.0 --port 8081
Environment variables:
RUNBOOK_PATH Path to alert_runbook.yaml (default: ./alert_runbook.yaml)
OPENROUTER_API_KEY Required; API token for https://openrouter.ai
OPENROUTER_MODEL Optional; default openai/gpt-4o-mini
OPENROUTER_REFERER Optional referer header
OPENROUTER_TITLE Optional title header (default: Grafana Alert Webhook)
"""
from __future__ import annotations
import logging
import os
import re
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
import json
import shlex
import subprocess
from textwrap import indent
import smtplib
from email.message import EmailMessage
import requests
import yaml
from fastapi import FastAPI, HTTPException, Request
from langchain.llms.base import LLM
LOGGER = logging.getLogger("grafana_webhook")
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
RUNBOOK_PATH = Path(os.environ.get("RUNBOOK_PATH", "alert_runbook.yaml"))
ANSIBLE_HOSTS_PATH = Path(os.environ.get("ANSIBLE_HOSTS_PATH", "/etc/ansible/hosts"))
OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY")
OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "openai/gpt-4o-mini")
OPENROUTER_REFERER = os.environ.get("OPENROUTER_REFERER")
OPENROUTER_TITLE = os.environ.get("OPENROUTER_TITLE", "Grafana Alert Webhook")
TRIAGE_ENABLE_COMMANDS = os.environ.get("TRIAGE_ENABLE_COMMANDS", "0").lower() in {"1", "true", "yes", "on"}
TRIAGE_COMMAND_RUNNER = os.environ.get("TRIAGE_COMMAND_RUNNER", "ssh").lower()
TRIAGE_SSH_USER = os.environ.get("TRIAGE_SSH_USER", "root")
TRIAGE_SSH_OPTIONS = shlex.split(
os.environ.get("TRIAGE_SSH_OPTIONS", "-o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=5")
)
TRIAGE_COMMAND_TIMEOUT = int(os.environ.get("TRIAGE_COMMAND_TIMEOUT", "60"))
TRIAGE_DEFAULT_OS = os.environ.get("TRIAGE_DEFAULT_OS", "linux").lower()
TRIAGE_MAX_COMMANDS = int(os.environ.get("TRIAGE_MAX_COMMANDS", "3"))
TRIAGE_OUTPUT_LIMIT = int(os.environ.get("TRIAGE_OUTPUT_LIMIT", "1200"))
# LangChain-driven investigation loop
TRIAGE_MAX_ITERATIONS = int(os.environ.get("TRIAGE_MAX_ITERATIONS", "3"))
TRIAGE_FOLLOWUP_MAX_COMMANDS = int(os.environ.get("TRIAGE_FOLLOWUP_MAX_COMMANDS", "4"))
TRIAGE_SYSTEM_PROMPT = os.environ.get(
"TRIAGE_SYSTEM_PROMPT",
(
"You are assisting with on-call investigations. Always reply with JSON containing:\n"
"analysis: your findings and next steps.\n"
"followup_commands: list of command specs (summary, command, optional runner/os) to gather more data.\n"
"complete: true when sufficient information is gathered.\n"
"Request commands only when more evidence is required."
),
)
TRIAGE_VERBOSE_LOGS = os.environ.get("TRIAGE_VERBOSE_LOGS", "0").lower() in {"1", "true", "yes", "on"}
TRIAGE_EMAIL_ENABLED = os.environ.get("TRIAGE_EMAIL_ENABLED", "0").lower() in {"1", "true", "yes", "on"}
TRIAGE_EMAIL_FROM = os.environ.get("TRIAGE_EMAIL_FROM")
TRIAGE_EMAIL_TO = [addr.strip() for addr in os.environ.get("TRIAGE_EMAIL_TO", "").split(",") if addr.strip()]
TRIAGE_SMTP_HOST = os.environ.get("TRIAGE_SMTP_HOST")
TRIAGE_SMTP_PORT = int(os.environ.get("TRIAGE_SMTP_PORT", "587"))
TRIAGE_SMTP_USER = os.environ.get("TRIAGE_SMTP_USER")
TRIAGE_SMTP_PASSWORD = os.environ.get("TRIAGE_SMTP_PASSWORD")
TRIAGE_SMTP_STARTTLS = os.environ.get("TRIAGE_SMTP_STARTTLS", "1").lower() in {"1", "true", "yes", "on"}
TRIAGE_SMTP_SSL = os.environ.get("TRIAGE_SMTP_SSL", "0").lower() in {"1", "true", "yes", "on"}
TRIAGE_SMTP_TIMEOUT = int(os.environ.get("TRIAGE_SMTP_TIMEOUT", "20"))
def log_verbose(title: str, content: Any) -> None:
"""Emit structured verbose logs when TRIAGE_VERBOSE_LOGS is enabled."""
if not TRIAGE_VERBOSE_LOGS:
return
if isinstance(content, (dict, list)):
text = json.dumps(content, indent=2, sort_keys=True)
else:
text = str(content)
LOGGER.info("%s:\n%s", title, text)
def email_notifications_configured() -> bool:
if not TRIAGE_EMAIL_ENABLED:
return False
if not (TRIAGE_SMTP_HOST and TRIAGE_EMAIL_FROM and TRIAGE_EMAIL_TO):
LOGGER.warning(
"Email notifications requested but TRIAGE_SMTP_HOST/TRIAGE_EMAIL_FROM/TRIAGE_EMAIL_TO are incomplete."
)
return False
return True
def format_command_results_for_email(results: List[Dict[str, Any]]) -> str:
if not results:
return "No automation commands were executed."
lines: List[str] = []
for result in results:
lines.append(f"- {result.get('summary')} [{result.get('status')}] {result.get('command')}")
stdout = result.get("stdout")
stderr = result.get("stderr")
error = result.get("error")
if stdout:
lines.append(indent(truncate_text(stdout, 800), " stdout: "))
if stderr:
lines.append(indent(truncate_text(stderr, 800), " stderr: "))
if error and result.get("status") != "ok":
lines.append(f" error: {error}")
return "\n".join(lines)
def build_email_body(alert: Dict[str, Any], result: Dict[str, Any], context: Dict[str, Any]) -> str:
lines = [
f"Alert: {result.get('alertname')} ({result.get('rule_uid')})",
f"Host: {result.get('host') or context.get('host')}",
f"Status: {alert.get('status')}",
f"Value: {alert.get('value') or alert.get('annotations', {}).get('value')}",
f"Grafana Rule: {context.get('rule_url')}",
"",
"LLM Summary:",
result.get("llm_summary") or "(no summary returned)",
"",
"Command Results:",
format_command_results_for_email(result.get("command_results") or []),
]
return "\n".join(lines)
def send_summary_email(alert: Dict[str, Any], result: Dict[str, Any], context: Dict[str, Any]) -> None:
if not email_notifications_configured():
return
subject_host = result.get("host") or context.get("host") or "(unknown host)"
subject = f"[Grafana] {result.get('alertname')} - {subject_host}"
body = build_email_body(alert, result, context)
message = EmailMessage()
message["Subject"] = subject
message["From"] = TRIAGE_EMAIL_FROM
message["To"] = ", ".join(TRIAGE_EMAIL_TO)
message.set_content(body)
try:
smtp_class = smtplib.SMTP_SSL if TRIAGE_SMTP_SSL else smtplib.SMTP
with smtp_class(TRIAGE_SMTP_HOST, TRIAGE_SMTP_PORT, timeout=TRIAGE_SMTP_TIMEOUT) as client:
if TRIAGE_SMTP_STARTTLS and not TRIAGE_SMTP_SSL:
client.starttls()
if TRIAGE_SMTP_USER:
client.login(TRIAGE_SMTP_USER, TRIAGE_SMTP_PASSWORD or "")
client.send_message(message)
LOGGER.info("Sent summary email to %s for host %s", ", ".join(TRIAGE_EMAIL_TO), subject_host)
except Exception as exc: # pylint: disable=broad-except
LOGGER.exception("Failed to send summary email: %s", exc)
app = FastAPI(title="Grafana Alert Webhook", version="1.0.0")
_RUNBOOK_INDEX: Dict[str, Dict[str, Any]] = {}
_INVENTORY_INDEX: Dict[str, Dict[str, Any]] = {}
_INVENTORY_GROUP_VARS: Dict[str, Dict[str, str]] = {}
_TEMPLATE_PATTERN = re.compile(r"{{\s*([a-zA-Z0-9_]+)\s*}}")
DEFAULT_SYSTEM_PROMPT = TRIAGE_SYSTEM_PROMPT
class OpenRouterLLM(LLM):
"""LangChain-compatible LLM that calls OpenRouter chat completions."""
api_key: str
model_name: str
def __init__(self, api_key: str, model_name: str, **kwargs: Any) -> None:
super().__init__(api_key=api_key, model_name=model_name, **kwargs)
@property
def _llm_type(self) -> str:
return "openrouter"
def __call__(self, prompt: str, stop: Optional[List[str]] = None) -> str:
return self._call(prompt, stop=stop)
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
payload = {
"model": self.model_name,
"messages": [
{"role": "system", "content": DEFAULT_SYSTEM_PROMPT},
{"role": "user", "content": prompt},
],
}
log_verbose("OpenRouter request payload", payload)
if stop:
payload["stop"] = stop
LOGGER.info("Posting to OpenRouter model=%s via LangChain", self.model_name)
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
if OPENROUTER_REFERER:
headers["HTTP-Referer"] = OPENROUTER_REFERER
if OPENROUTER_TITLE:
headers["X-Title"] = OPENROUTER_TITLE
response = requests.post("https://openrouter.ai/api/v1/chat/completions", json=payload, headers=headers, timeout=90)
if response.status_code >= 400:
try:
detail = response.json()
except ValueError:
detail = response.text
raise RuntimeError(f"OpenRouter error {response.status_code}: {detail}")
data = response.json()
log_verbose("OpenRouter raw response", data)
choices = data.get("choices")
if not choices:
raise RuntimeError("OpenRouter returned no choices")
return choices[0]["message"]["content"].strip()
def load_runbook() -> Dict[str, Dict[str, Any]]:
"""Load runbook YAML into a dict keyed by rule_uid."""
if not RUNBOOK_PATH.exists():
raise FileNotFoundError(f"Runbook file not found: {RUNBOOK_PATH}")
with RUNBOOK_PATH.open("r", encoding="utf-8") as handle:
data = yaml.safe_load(handle) or {}
alerts = data.get("alerts", [])
index: Dict[str, Dict[str, Any]] = {}
for entry in alerts:
uid = entry.get("rule_uid")
if uid:
index[str(uid)] = entry
LOGGER.info("Loaded %d runbook entries from %s", len(index), RUNBOOK_PATH)
return index
def _normalize_host_key(host: str) -> str:
return host.strip().lower()
def _parse_key_value_tokens(tokens: List[str]) -> Dict[str, str]:
data: Dict[str, str] = {}
for token in tokens:
if "=" not in token:
continue
key, value = token.split("=", 1)
data[key] = value
return data
def load_ansible_inventory() -> Tuple[Dict[str, Dict[str, Any]], Dict[str, Dict[str, str]]]:
"""Parse a simple INI-style Ansible hosts file into host/group maps."""
if not ANSIBLE_HOSTS_PATH.exists():
LOGGER.warning("Ansible inventory not found at %s", ANSIBLE_HOSTS_PATH)
return {}, {}
hosts: Dict[str, Dict[str, Any]] = {}
group_vars: Dict[str, Dict[str, str]] = {}
current_group: Optional[str] = None
current_section: str = "hosts"
with ANSIBLE_HOSTS_PATH.open("r", encoding="utf-8") as handle:
for raw_line in handle:
line = raw_line.strip()
if not line or line.startswith("#"):
continue
if line.startswith("[") and line.endswith("]"):
header = line[1:-1].strip()
if ":" in header:
group_name, suffix = header.split(":", 1)
current_group = group_name
current_section = suffix
else:
current_group = header
current_section = "hosts"
group_vars.setdefault(current_group, {})
continue
cleaned = line.split("#", 1)[0].strip()
if not cleaned:
continue
tokens = shlex.split(cleaned)
if not tokens:
continue
if current_section == "vars":
vars_dict = _parse_key_value_tokens(tokens)
group_vars.setdefault(current_group or "all", {}).update(vars_dict)
continue
host_token = tokens[0]
host_key = _normalize_host_key(host_token)
entry = hosts.setdefault(host_key, {"name": host_token, "definitions": [], "groups": set()})
vars_dict = _parse_key_value_tokens(tokens[1:])
entry["definitions"].append({"group": current_group, "vars": vars_dict})
if current_group:
entry["groups"].add(current_group)
LOGGER.info("Loaded %d Ansible inventory hosts from %s", len(hosts), ANSIBLE_HOSTS_PATH)
return hosts, group_vars
def _lookup_inventory(host: Optional[str]) -> Optional[Dict[str, Any]]:
if not host:
return None
key = _normalize_host_key(host)
entry = _INVENTORY_INDEX.get(key)
if entry:
return entry
# try stripping domain suffix
short = key.split(".", 1)[0]
if short != key:
return _INVENTORY_INDEX.get(short)
return None
def _merge_group_vars(groups: List[str], host_os: Optional[str]) -> Dict[str, str]:
merged: Dict[str, str] = {}
global_vars = _INVENTORY_GROUP_VARS.get("all")
if global_vars:
merged.update(global_vars)
normalized_os = (host_os or "").lower()
for group in groups:
vars_dict = _INVENTORY_GROUP_VARS.get(group)
if not vars_dict:
continue
connection = (vars_dict.get("ansible_connection") or "").lower()
if connection == "winrm" and normalized_os == "linux":
continue
merged.update(vars_dict)
return merged
def _should_include_definition(group: Optional[str], vars_dict: Dict[str, str], host_os: Optional[str]) -> bool:
if not vars_dict:
return False
normalized_os = (host_os or "").lower()
connection = (vars_dict.get("ansible_connection") or "").lower()
if connection == "winrm" and normalized_os != "windows":
return False
if connection == "local":
return True
if group and "windows" in group.lower() and normalized_os == "linux" and not connection:
return False
return True
def apply_inventory_context(context: Dict[str, Any]) -> None:
"""Augment the alert context with SSH metadata from the Ansible inventory."""
host = context.get("host")
entry = _lookup_inventory(host)
if not entry:
return
merged_vars = _merge_group_vars(list(entry.get("groups", [])), context.get("host_os"))
for definition in entry.get("definitions", []):
group_name = definition.get("group")
vars_dict = definition.get("vars", {})
if _should_include_definition(group_name, vars_dict, context.get("host_os")):
merged_vars.update(vars_dict)
ansible_host = merged_vars.get("ansible_host") or entry.get("name")
ansible_user = merged_vars.get("ansible_user")
ansible_port = merged_vars.get("ansible_port")
ssh_common_args = merged_vars.get("ansible_ssh_common_args")
ssh_key = merged_vars.get("ansible_ssh_private_key_file")
connection = (merged_vars.get("ansible_connection") or "").lower()
host_os = (context.get("host_os") or "").lower()
if connection == "winrm" and host_os != "windows":
for key in (
"ansible_connection",
"ansible_port",
"ansible_password",
"ansible_winrm_server_cert_validation",
"ansible_winrm_scheme",
):
merged_vars.pop(key, None)
connection = ""
context.setdefault("ssh_host", ansible_host or host)
if ansible_user:
context["ssh_user"] = ansible_user
if ansible_port:
context["ssh_port"] = ansible_port
if ssh_common_args:
context["ssh_common_args"] = ssh_common_args
if ssh_key:
context["ssh_identity_file"] = ssh_key
context.setdefault("inventory_groups", list(entry.get("groups", [])))
if connection == "local":
context.setdefault("preferred_runner", "local")
elif connection in {"", "ssh", "smart"}:
context.setdefault("preferred_runner", "ssh")
context.setdefault("inventory_groups", list(entry.get("groups", [])))
def render_template(template: str, context: Dict[str, Any]) -> str:
"""Very small mustache-style renderer for {{ var }} placeholders."""
def replace(match: re.Match[str]) -> str:
key = match.group(1)
return str(context.get(key, match.group(0)))
return _TEMPLATE_PATTERN.sub(replace, template)
def extract_rule_uid(alert: Dict[str, Any], parent_payload: Dict[str, Any]) -> Optional[str]:
"""Grafana webhooks may include rule UID in different fields."""
candidates: List[Any] = [
alert.get("ruleUid"),
alert.get("rule_uid"),
alert.get("ruleId"),
alert.get("uid"),
alert.get("labels", {}).get("rule_uid"),
alert.get("labels", {}).get("ruleUid"),
parent_payload.get("ruleUid"),
parent_payload.get("rule_uid"),
parent_payload.get("ruleId"),
]
for candidate in candidates:
if candidate:
return str(candidate)
# Fall back to Grafana URL parsing if present
url = (
alert.get("ruleUrl")
or parent_payload.get("ruleUrl")
or alert.get("generatorURL")
or parent_payload.get("generatorURL")
)
if url and "/alerting/" in url:
return url.rstrip("/").split("/")[-2]
return None
def derive_fallback_rule_uid(alert: Dict[str, Any], parent_payload: Dict[str, Any]) -> str:
"""Construct a deterministic identifier when Grafana omits rule UIDs."""
labels = alert.get("labels", {})
candidates = [
alert.get("fingerprint"),
labels.get("alertname"),
labels.get("host"),
labels.get("instance"),
parent_payload.get("groupKey"),
parent_payload.get("title"),
]
for candidate in candidates:
if candidate:
return str(candidate)
return "unknown-alert"
def build_fallback_runbook_entry(alert: Dict[str, Any], parent_payload: Dict[str, Any]) -> Dict[str, Any]:
"""Return a generic runbook entry so every alert can be processed."""
labels = alert.get("labels", {})
alertname = labels.get("alertname") or parent_payload.get("title") or "Grafana Alert"
host = labels.get("host") or labels.get("instance") or "(unknown host)"
return {
"name": f"{alertname} (auto)",
"llm_prompt": (
"Grafana alert {{ alertname }} fired for {{ host }}.\n"
"No dedicated runbook entry exists. Use the payload details, command outputs, "
"and your own reasoning to propose likely causes, evidence to gather, and remediation steps."
),
"triage": [],
"evidence_to_collect": [],
"remediation": [],
"metadata": {"host": host},
}
def summarize_dict(prefix: str, data: Optional[Dict[str, Any]]) -> str:
if not data:
return f"{prefix}: (none)"
parts = ", ".join(f"{key}={value}" for key, value in sorted(data.items()))
return f"{prefix}: {parts}"
def determine_host_os(alert: Dict[str, Any]) -> str:
"""Infer host operating system from labels or defaults."""
labels = alert.get("labels", {})
candidates = [
labels.get("os"),
labels.get("platform"),
labels.get("system"),
alert.get("os"),
]
for candidate in candidates:
if candidate:
value = str(candidate).lower()
if "win" in value:
return "windows"
if any(token in value for token in ("linux", "unix", "darwin")):
return "linux"
host = (labels.get("host") or labels.get("instance") or "").lower()
if host.startswith("win") or host.endswith(".localdomain") and "win" in host:
return "windows"
inventory_os = infer_os_from_inventory(labels.get("host") or labels.get("instance"))
if inventory_os:
return inventory_os
return TRIAGE_DEFAULT_OS
def infer_os_from_inventory(host: Optional[str]) -> Optional[str]:
if not host:
return None
entry = _lookup_inventory(host)
if not entry:
return None
for definition in entry.get("definitions", []):
vars_dict = definition.get("vars", {}) or {}
connection = (vars_dict.get("ansible_connection") or "").lower()
if connection == "winrm":
return "windows"
for group in entry.get("groups", []):
if "windows" in (group or "").lower():
return "windows"
return None
def truncate_text(text: str, limit: int = TRIAGE_OUTPUT_LIMIT) -> str:
"""Trim long outputs to keep prompts manageable."""
if not text:
return ""
cleaned = text.strip()
if len(cleaned) <= limit:
return cleaned
return cleaned[:limit] + "... [truncated]"
def gather_command_specs(entry: Dict[str, Any], host_os: str) -> List[Dict[str, Any]]:
"""Collect command specs from triage steps and optional automation sections."""
specs: List[Dict[str, Any]] = []
for step in entry.get("triage", []):
cmd = step.get(host_os)
if not cmd:
continue
specs.append(
{
"summary": step.get("summary") or entry.get("name") or "triage",
"shell": cmd,
"runner": step.get("runner"),
"os": host_os,
}
)
for item in entry.get("automation_commands", []):
target_os = item.get("os", host_os)
if target_os and target_os.lower() != host_os:
continue
specs.append(item)
if TRIAGE_MAX_COMMANDS > 0:
return specs[:TRIAGE_MAX_COMMANDS]
return specs
def build_runner_command(
rendered_command: str,
runner: str,
context: Dict[str, Any],
spec: Dict[str, Any],
) -> Tuple[Any, str, bool, str]:
"""Return the subprocess args, display string, shell flag, and runner label."""
runner = runner or TRIAGE_COMMAND_RUNNER
runner = runner.lower()
if runner == "ssh":
host = spec.get("host") or context.get("ssh_host") or context.get("host")
if not host:
raise RuntimeError("Host not provided for ssh runner.")
ssh_user = spec.get("ssh_user") or context.get("ssh_user") or TRIAGE_SSH_USER
ssh_target = spec.get("ssh_target") or f"{ssh_user}@{host}"
ssh_options = list(TRIAGE_SSH_OPTIONS)
common_args = spec.get("ssh_common_args") or context.get("ssh_common_args")
if common_args:
ssh_options.extend(shlex.split(common_args))
ssh_port = spec.get("ssh_port") or context.get("ssh_port")
if ssh_port:
ssh_options.extend(["-p", str(ssh_port)])
identity_file = spec.get("ssh_identity_file") or context.get("ssh_identity_file")
if identity_file:
ssh_options.extend(["-i", identity_file])
command_list = ["ssh", *ssh_options, ssh_target, rendered_command]
display = " ".join(shlex.quote(part) for part in command_list)
return command_list, display, False, "ssh"
# default to local shell execution
display = rendered_command
return rendered_command, display, True, "local"
def run_subprocess_command(
command: Any,
display: str,
summary: str,
use_shell: bool,
runner_label: str,
) -> Dict[str, Any]:
"""Execute subprocess command and capture results."""
LOGGER.info("Executing command (%s) via %s: %s", summary, runner_label, display)
try:
completed = subprocess.run(
command,
capture_output=True,
text=True,
timeout=TRIAGE_COMMAND_TIMEOUT,
shell=use_shell,
check=False,
)
result = {
"summary": summary,
"command": display,
"runner": runner_label,
"exit_code": completed.returncode,
"stdout": (completed.stdout or "").strip(),
"stderr": (completed.stderr or "").strip(),
"status": "ok" if completed.returncode == 0 else "failed",
}
log_verbose(f"Command result ({summary})", result)
return result
except subprocess.TimeoutExpired as exc:
result = {
"summary": summary,
"command": display,
"runner": runner_label,
"exit_code": None,
"stdout": truncate_text((exc.stdout or "").strip()),
"stderr": truncate_text((exc.stderr or "").strip()),
"status": "timeout",
"error": f"Command timed out after {TRIAGE_COMMAND_TIMEOUT}s",
}
log_verbose(f"Command timeout ({summary})", result)
return result
except Exception as exc: # pylint: disable=broad-except
LOGGER.exception("Command execution failed (%s): %s", summary, exc)
result = {
"summary": summary,
"command": display,
"runner": runner_label,
"exit_code": None,
"stdout": "",
"stderr": "",
"status": "error",
"error": str(exc),
}
log_verbose(f"Command error ({summary})", result)
return result
def run_command_spec(spec: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
summary = spec.get("summary") or spec.get("name") or "command"
shell_cmd = spec.get("shell")
if not shell_cmd:
return {"summary": summary, "status": "skipped", "error": "No shell command provided."}
rendered = render_template(shell_cmd, context)
preferred_runner = context.get("preferred_runner")
runner_choice = (spec.get("runner") or preferred_runner or TRIAGE_COMMAND_RUNNER).lower()
try:
command, display, use_shell, runner_label = build_runner_command(rendered, runner_choice, context, spec)
except RuntimeError as exc:
LOGGER.warning("Skipping command '%s': %s", summary, exc)
return {"summary": summary, "status": "skipped", "error": str(exc), "command": rendered}
return run_subprocess_command(command, display, summary, use_shell, runner_label)
def execute_triage_commands(entry: Dict[str, Any], alert: Dict[str, Any], context: Dict[str, Any]) -> List[Dict[str, Any]]:
host_os = context.get("host_os") or determine_host_os(alert)
context["host_os"] = host_os
specs = gather_command_specs(entry, host_os)
if not specs:
LOGGER.info("No triage commands defined for host_os=%s", host_os)
return []
if not TRIAGE_ENABLE_COMMANDS:
LOGGER.info("Command execution disabled; %d commands queued but skipped.", len(specs))
return []
LOGGER.info("Executing up to %d triage commands for host_os=%s", len(specs), host_os)
results = []
for spec in specs:
results.append(run_command_spec(spec, context))
return results
def format_command_results_for_llm(results: List[Dict[str, Any]]) -> str:
lines: List[str] = []
for idx, result in enumerate(results, start=1):
lines.append(f"{idx}. {result.get('summary')} [{result.get('status')}] {result.get('command')}")
stdout = result.get("stdout")
stderr = result.get("stderr")
error = result.get("error")
if stdout:
lines.append(" stdout:")
lines.append(indent(truncate_text(stdout), " "))
if stderr:
lines.append(" stderr:")
lines.append(indent(truncate_text(stderr), " "))
if error and result.get("status") != "ok":
lines.append(f" error: {error}")
if not lines:
return "No command results were available."
return "\n".join(lines)
def parse_structured_response(text: str) -> Optional[Dict[str, Any]]:
cleaned = text.strip()
try:
return json.loads(cleaned)
except json.JSONDecodeError:
start = cleaned.find("{")
end = cleaned.rfind("}")
if start != -1 and end != -1 and end > start:
snippet = cleaned[start : end + 1]
try:
return json.loads(snippet)
except json.JSONDecodeError:
return None
return None
def normalize_followup_command(item: Dict[str, Any]) -> Dict[str, Any]:
return {
"summary": item.get("summary") or item.get("name") or "Follow-up command",
"shell": item.get("command") or item.get("shell"),
"runner": item.get("runner"),
"host": item.get("host") or item.get("target"),
"ssh_user": item.get("ssh_user"),
"os": (item.get("os") or item.get("platform") or "").lower() or None,
}
def investigate_with_langchain(
entry: Dict[str, Any],
alert: Dict[str, Any],
parent_payload: Dict[str, Any],
context: Dict[str, Any],
initial_outputs: List[Dict[str, Any]],
) -> Tuple[str, List[Dict[str, Any]]]:
command_outputs = list(initial_outputs)
prompt = build_prompt(entry, alert, parent_payload, context, command_outputs)
log_verbose("Initial investigation prompt", prompt)
if not OPENROUTER_API_KEY:
return "OPENROUTER_API_KEY is not configured; unable to analyze alert.", command_outputs
llm = OpenRouterLLM(api_key=OPENROUTER_API_KEY, model_name=OPENROUTER_MODEL)
dialogue = (
prompt
+ "\n\nRespond with JSON containing fields analysis, followup_commands, and complete. "
"Request commands only when more evidence is required."
)
total_followup = 0
final_summary = ""
for iteration in range(TRIAGE_MAX_ITERATIONS):
log_verbose(f"LLM dialogue iteration {iteration + 1}", dialogue)
llm_text = llm(dialogue)
log_verbose(f"LLM iteration {iteration + 1} output", llm_text)
dialogue += f"\nAssistant:\n{llm_text}\n"
parsed = parse_structured_response(llm_text)
if parsed:
log_verbose(f"LLM iteration {iteration + 1} parsed response", parsed)
if not parsed:
final_summary = llm_text
break
analysis = parsed.get("analysis") or ""
followups = parsed.get("followup_commands") or parsed.get("commands") or []
final_summary = analysis
complete_flag = bool(parsed.get("complete"))
if complete_flag or not followups:
break
log_verbose(f"LLM iteration {iteration + 1} requested follow-ups", followups)
allowed = max(0, TRIAGE_FOLLOWUP_MAX_COMMANDS - total_followup)
if not TRIAGE_ENABLE_COMMANDS or allowed <= 0:
dialogue += (
"\nUser:\nCommand execution is disabled or budget exhausted. Provide final analysis with JSON format.\n"
)
continue
normalized_cmds: List[Dict[str, Any]] = []
for raw in followups:
if not isinstance(raw, dict):
continue
normalized = normalize_followup_command(raw)
if not normalized.get("shell"):
continue
cmd_os = normalized.get("os")
if cmd_os and cmd_os != context.get("host_os"):
continue
normalized_cmds.append(normalized)
log_verbose(f"Normalized follow-up commands (iteration {iteration + 1})", normalized_cmds)
if not normalized_cmds:
dialogue += "\nUser:\nNo valid commands to run. Finalize analysis in JSON format.\n"
continue
normalized_cmds = normalized_cmds[:allowed]
executed_batch: List[Dict[str, Any]] = []
for spec in normalized_cmds:
executed = run_command_spec(spec, context)
command_outputs.append(executed)
executed_batch.append(executed)
total_followup += 1
result_text = "Follow-up command results:\n" + format_command_results_for_llm(executed_batch)
dialogue += (
"\nUser:\n"
+ result_text
+ "\nUpdate your analysis and respond with JSON (analysis, followup_commands, complete).\n"
)
log_verbose("Executed follow-up commands", result_text)
else:
final_summary = final_summary or "Reached maximum iterations without a conclusive response."
if not final_summary:
final_summary = "LLM did not return a valid analysis."
log_verbose("Final LLM summary", final_summary)
return final_summary, command_outputs
def build_context(alert: Dict[str, Any], parent_payload: Dict[str, Any]) -> Dict[str, Any]:
labels = alert.get("labels", {})
annotations = alert.get("annotations", {})
context = {
"alertname": labels.get("alertname") or alert.get("title") or parent_payload.get("title") or parent_payload.get("ruleName"),
"host": labels.get("host") or labels.get("instance"),
"iface": labels.get("interface"),
"device": labels.get("device"),
"vmid": labels.get("vmid"),
"status": alert.get("status") or parent_payload.get("status"),
"value": alert.get("value") or annotations.get("value"),
"rule_url": alert.get("ruleUrl") or parent_payload.get("ruleUrl"),
}
context.setdefault("ssh_user", TRIAGE_SSH_USER)
return context
def build_prompt(
entry: Dict[str, Any],
alert: Dict[str, Any],
parent_payload: Dict[str, Any],
context: Dict[str, Any],
command_outputs: Optional[List[Dict[str, Any]]] = None,
) -> str:
template = entry.get("llm_prompt", "Alert {{ alertname }} fired for {{ host }}.")
rendered_template = render_template(template, {k: v or "" for k, v in context.items()})
evidence = entry.get("evidence_to_collect", [])
triage_steps = entry.get("triage", [])
remediation = entry.get("remediation", [])
lines = [
rendered_template.strip(),
"",
"Alert payload summary:",
f"- Status: {context.get('status') or alert.get('status')}",
f"- Host: {context.get('host')}",
f"- Value: {context.get('value')}",
f"- StartsAt: {alert.get('startsAt')}",
f"- EndsAt: {alert.get('endsAt')}",
f"- RuleURL: {context.get('rule_url')}",
f"- Host OS (inferred): {context.get('host_os')}",
"- Note: All timestamps are UTC/RFC3339 as provided by Grafana.",
summarize_dict("- Labels", alert.get("labels")),
summarize_dict("- Annotations", alert.get("annotations")),
]
if evidence:
lines.append("")
lines.append("Evidence to gather (for automation reference):")
for item in evidence:
lines.append(f"- {item}")
if triage_steps:
lines.append("")
lines.append("Suggested manual checks:")
for step in triage_steps:
summary = step.get("summary")
linux = step.get("linux")
windows = step.get("windows")
lines.append(f"- {summary}")
if linux:
lines.append(f" Linux: {linux}")
if windows:
lines.append(f" Windows: {windows}")
if remediation:
lines.append("")
lines.append("Remediation ideas:")
for item in remediation:
lines.append(f"- {item}")
if command_outputs:
lines.append("")
lines.append("Command execution results:")
for result in command_outputs:
status = result.get("status", "unknown")
cmd_display = result.get("command", "")
lines.append(f"- {result.get('summary')} [{status}] {cmd_display}")
stdout = result.get("stdout")
stderr = result.get("stderr")
error = result.get("error")
if stdout:
lines.append(" stdout:")
lines.append(indent(truncate_text(stdout), " "))
if stderr:
lines.append(" stderr:")
lines.append(indent(truncate_text(stderr), " "))
if error and status != "ok":
lines.append(f" error: {error}")
return "\n".join(lines).strip()
def get_alerts(payload: Dict[str, Any]) -> List[Dict[str, Any]]:
alerts = payload.get("alerts")
if isinstance(alerts, list) and alerts:
return alerts
return [payload]
@app.on_event("startup")
def startup_event() -> None:
global _RUNBOOK_INDEX, _INVENTORY_INDEX, _INVENTORY_GROUP_VARS
_RUNBOOK_INDEX = load_runbook()
_INVENTORY_INDEX, _INVENTORY_GROUP_VARS = load_ansible_inventory()
LOGGER.info(
"Alert webhook server ready with %d runbook entries and %d inventory hosts.",
len(_RUNBOOK_INDEX),
len(_INVENTORY_INDEX),
)
@app.post("/alerts")
async def handle_alert(request: Request) -> Dict[str, Any]:
payload = await request.json()
LOGGER.info("Received Grafana payload: %s", json.dumps(payload, indent=2, sort_keys=True))
results = []
unmatched = []
for alert in get_alerts(payload):
LOGGER.info("Processing alert: %s", json.dumps(alert, indent=2, sort_keys=True))
unmatched_reason: Optional[str] = None
alert_status = str(alert.get("status") or payload.get("status") or "").lower()
if alert_status and alert_status != "firing":
details = {"reason": "non_firing_status", "status": alert_status, "alert": alert}
unmatched.append(details)
LOGGER.info("Skipping alert with status=%s (only 'firing' alerts are processed).", alert_status)
continue
rule_uid = extract_rule_uid(alert, payload)
if not rule_uid:
unmatched_reason = "missing_rule_uid"
derived_uid = derive_fallback_rule_uid(alert, payload)
details = {"reason": unmatched_reason, "derived_rule_uid": derived_uid, "alert": alert}
unmatched.append(details)
LOGGER.warning("Alert missing rule UID, using fallback identifier %s", derived_uid)
rule_uid = derived_uid
entry = _RUNBOOK_INDEX.get(rule_uid)
runbook_matched = entry is not None
if not entry:
unmatched_reason = unmatched_reason or "no_runbook_entry"
details = {"reason": unmatched_reason, "rule_uid": rule_uid, "alert": alert}
unmatched.append(details)
LOGGER.warning("No runbook entry for rule_uid=%s, using generic fallback.", rule_uid)
entry = build_fallback_runbook_entry(alert, payload)
context = build_context(alert, payload)
context["host_os"] = determine_host_os(alert)
context["rule_uid"] = rule_uid
apply_inventory_context(context)
initial_outputs = execute_triage_commands(entry, alert, context)
try:
llm_text, command_outputs = investigate_with_langchain(entry, alert, payload, context, initial_outputs)
except Exception as exc: # pylint: disable=broad-except
LOGGER.exception("Investigation failed for rule_uid=%s: %s", rule_uid, exc)
raise HTTPException(status_code=502, detail=f"LLM investigation error: {exc}") from exc
result = {
"rule_uid": rule_uid,
"alertname": entry.get("name"),
"host": alert.get("labels", {}).get("host"),
"llm_summary": llm_text,
"command_results": command_outputs,
"runbook_matched": runbook_matched,
}
if not runbook_matched and unmatched_reason:
result["fallback_reason"] = unmatched_reason
results.append(result)
send_summary_email(alert, result, context)
return {"processed": len(results), "results": results, "unmatched": unmatched}
@app.post("/reload-runbook")
def reload_runbook() -> Dict[str, Any]:
global _RUNBOOK_INDEX, _INVENTORY_INDEX, _INVENTORY_GROUP_VARS
_RUNBOOK_INDEX = load_runbook()
_INVENTORY_INDEX, _INVENTORY_GROUP_VARS = load_ansible_inventory()
return {"entries": len(_RUNBOOK_INDEX), "inventory_hosts": len(_INVENTORY_INDEX)}

View File

@ -0,0 +1,178 @@
#!/usr/bin/env python3
"""
Log anomaly checker that queries Elasticsearch and asks an OpenRouter-hosted LLM
for a quick triage summary. Intended to be run on a schedule (cron/systemd).
Required environment variables:
ELASTIC_HOST e.g. https://casper.localdomain:9200
ELASTIC_API_KEY Base64 ApiKey used for Elasticsearch requests
OPENROUTER_API_KEY Token for https://openrouter.ai/
Optional environment variables:
OPENROUTER_MODEL Model identifier (default: openai/gpt-4o-mini)
OPENROUTER_REFERER Passed through as HTTP-Referer header
OPENROUTER_TITLE Passed through as X-Title header
"""
from __future__ import annotations
import argparse
import datetime as dt
import os
import sys
from typing import Any, Iterable
import requests
def utc_iso(ts: dt.datetime) -> str:
"""Return an ISO8601 string with Z suffix."""
return ts.replace(microsecond=0).isoformat() + "Z"
def query_elasticsearch(
host: str,
api_key: str,
index_pattern: str,
minutes: int,
size: int,
verify: bool,
) -> list[dict[str, Any]]:
"""Fetch recent logs from Elasticsearch."""
end = dt.datetime.utcnow()
start = end - dt.timedelta(minutes=minutes)
url = f"{host.rstrip('/')}/{index_pattern}/_search"
payload = {
"size": size,
"sort": [{"@timestamp": {"order": "desc"}}],
"query": {
"range": {
"@timestamp": {
"gte": utc_iso(start),
"lte": utc_iso(end),
}
}
},
"_source": ["@timestamp", "message", "host.name", "container.image.name", "log.level"],
}
headers = {
"Authorization": f"ApiKey {api_key}",
"Content-Type": "application/json",
}
response = requests.post(url, json=payload, headers=headers, timeout=30, verify=verify)
response.raise_for_status()
hits = response.json().get("hits", {}).get("hits", [])
return hits
def build_prompt(logs: Iterable[dict[str, Any]], limit_messages: int) -> str:
"""Create the prompt that will be sent to the LLM."""
selected = []
for idx, hit in enumerate(logs):
if idx >= limit_messages:
break
source = hit.get("_source", {})
message = source.get("message") or source.get("event", {}).get("original") or ""
timestamp = source.get("@timestamp", "unknown time")
host = source.get("host", {}).get("name") or source.get("host", {}).get("hostname") or "unknown-host"
container = source.get("container", {}).get("image", {}).get("name") or ""
level = source.get("log", {}).get("level") or source.get("log.level") or ""
selected.append(
f"[{timestamp}] host={host} level={level} container={container}\n{message}".strip()
)
if not selected:
return "No logs were returned from Elasticsearch in the requested window."
prompt = (
"You are assisting with HomeLab observability. Review the following log entries collected from "
"Elasticsearch and highlight any notable anomalies, errors, or emerging issues. "
"Explain the impact and suggest next steps when applicable. "
"Use concise bullet points. Logs:\n\n"
+ "\n\n".join(selected)
)
return prompt
def call_openrouter(prompt: str, model: str, api_key: str, referer: str | None, title: str | None) -> str:
"""Send prompt to OpenRouter and return the model response text."""
url = "https://openrouter.ai/api/v1/chat/completions"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
if referer:
headers["HTTP-Referer"] = referer
if title:
headers["X-Title"] = title
body = {
"model": model,
"messages": [
{"role": "system", "content": "You are a senior SRE helping analyze log anomalies."},
{"role": "user", "content": prompt},
],
}
response = requests.post(url, json=body, headers=headers, timeout=60)
response.raise_for_status()
data = response.json()
choices = data.get("choices", [])
if not choices:
raise RuntimeError("OpenRouter response did not include choices")
return choices[0]["message"]["content"]
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Query Elasticsearch and summarize logs with OpenRouter.")
parser.add_argument("--host", default=os.environ.get("ELASTIC_HOST"), help="Elasticsearch host URL")
parser.add_argument("--api-key", default=os.environ.get("ELASTIC_API_KEY"), help="Elasticsearch ApiKey")
parser.add_argument("--index", default="log*", help="Index pattern (default: log*)")
parser.add_argument("--minutes", type=int, default=60, help="Lookback window in minutes (default: 60)")
parser.add_argument("--size", type=int, default=200, help="Max number of logs to fetch (default: 200)")
parser.add_argument("--message-limit", type=int, default=50, help="Max log lines sent to LLM (default: 50)")
parser.add_argument("--openrouter-model", default=os.environ.get("OPENROUTER_MODEL", "openai/gpt-4o-mini"))
parser.add_argument("--insecure", action="store_true", help="Disable TLS verification for Elasticsearch")
return parser.parse_args()
def main() -> int:
args = parse_args()
if not args.host or not args.api_key:
print("ELASTIC_HOST and ELASTIC_API_KEY must be provided via environment or CLI", file=sys.stderr)
return 1
logs = query_elasticsearch(
host=args.host,
api_key=args.api_key,
index_pattern=args.index,
minutes=args.minutes,
size=args.size,
verify=not args.insecure,
)
prompt = build_prompt(logs, limit_messages=args.message_limit)
if not prompt.strip() or prompt.startswith("No logs"):
print(prompt)
return 0
openrouter_key = os.environ.get("OPENROUTER_API_KEY")
if not openrouter_key:
print("OPENROUTER_API_KEY is required to summarize logs", file=sys.stderr)
return 1
referer = os.environ.get("OPENROUTER_REFERER")
title = os.environ.get("OPENROUTER_TITLE", "Elastic Log Monitor")
response_text = call_openrouter(
prompt=prompt,
model=args.openrouter_model,
api_key=openrouter_key,
referer=referer,
title=title,
)
print(response_text.strip())
return 0
if __name__ == "__main__":
raise SystemExit(main())

17
stacks/mllogwatcher/testing.py Executable file
View File

@ -0,0 +1,17 @@
# pip install -qU langchain "langchain[anthropic]"
from langchain.agents import create_agent
def get_weather(city: str) -> str:
"""Get weather for a given city."""
return f"It's always sunny in {city}!"
agent = create_agent(
model="claude-sonnet-4-5-20250929",
tools=[get_weather],
system_prompt="You are a helpful assistant",
)
# Run the agent
agent.invoke(
{"messages": [{"role": "user", "content": "what is the weather in sf"}]}
)

View File

@ -0,0 +1,10 @@
# Worklog 2025-12-29
1. Added containerization assets for grafana_alert_webhook:
- `Dockerfile`, `.dockerignore`, `docker-compose.yml`, `.env.example`, and consolidated `requirements.txt`.
- Compose mounts the runbook, `/etc/ansible/hosts`, and `.ssh` so SSH automation works inside the container.
- README now documents the compose workflow.
2. Copied knights SSH key to `.ssh/webhook_id_rsa` and updated `jet-alone` inventory entry with `ansible_user` + `ansible_ssh_private_key_file` so remote commands can run non-interactively.
3. Updated `OpenRouterLLM` to satisfy Pydantics field validation inside the container.
4. Brought the webhook up under Docker Compose, tested alerts end-to-end, and reverted `OPENROUTER_MODEL` to the valid `openai/gpt-5.1-codex-max`.
5. Created `/var/core/ansible/ops_baseline.yml` to install sysstat/iotop/smartmontools/hdparm and enforce synchronized Bash history (`/etc/profile.d/99-bash-history.sh`). Ran the playbook against the primary LAN hosts; noted remediation items for the few that failed (outdated mirrors, pending grub configuration, missing sudo password).

View File

@ -0,0 +1,26 @@
ES_URL=http://elasticsearch:9200
# Elasticsearch API Key authentication (preferred over user/pass)
ES_API_ID=
ES_API_KEY=
# Or, Elasticsearch Basic authentication (if no API key)
# ES_USER=elastic
# ES_PASS=changeme
ES_VERIFY_SSL=false
OPNSENSE_URL=https://192.168.1.1
OPNSENSE_API_KEY=your_key
OPNSENSE_API_SECRET=your_secret
COLLECTOR_INTERVAL_SECONDS=60
NMAP_INTERVAL_SECONDS=300
NMAP_PORT_RANGE=1-1024
NMAP_BATCH_SIZE=10
NMAP_DISCOVERY_ENABLED=true
NMAP_DISCOVERY_INTERVAL_SECONDS=3600
NMAP_DISCOVERY_VLANS=
NMAP_DISCOVERY_EXTRA_ARGS="-sn -n"
NMAP_QUICK_BATCH_SIZE=30
NMAP_QUICK_EXTRA_ARGS="-sS --top-ports 100 -T4 --open -Pn"
NMAP_FULL_INTERVAL_SECONDS=86400

View File

@ -0,0 +1,26 @@
ES_URL=http://elasticsearch:9200
# Elasticsearch API Key authentication (preferred over user/pass)
ES_API_ID=
ES_API_KEY=
# Or, Elasticsearch Basic authentication (if no API key)
# ES_USER=elastic
# ES_PASS=changeme
ES_VERIFY_SSL=false
OPNSENSE_URL=https://192.168.1.1
OPNSENSE_API_KEY=your_key
OPNSENSE_API_SECRET=your_secret
COLLECTOR_INTERVAL_SECONDS=60
NMAP_INTERVAL_SECONDS=300
NMAP_PORT_RANGE=1-1024
NMAP_BATCH_SIZE=10
NMAP_DISCOVERY_ENABLED=true
NMAP_DISCOVERY_INTERVAL_SECONDS=3600
NMAP_DISCOVERY_VLANS=
NMAP_DISCOVERY_EXTRA_ARGS="-sn -n"
NMAP_QUICK_BATCH_SIZE=30
NMAP_QUICK_EXTRA_ARGS="-sS --top-ports 100 -T4 --open -Pn"
NMAP_FULL_INTERVAL_SECONDS=86400

11
stacks/network-mcp/.gitignore vendored Normal file
View File

@ -0,0 +1,11 @@
.env
.venv/
__pycache__/
*.pyc
.DS_Store
# Local/infra
node_modules/
# Logs
*.log

View File

@ -0,0 +1,76 @@
# Network MCP - Project Summary
## Overview
This project is a long-running Network MCP service that merges OPNsense discovery data, Nmap scans, and static inventory into Elasticsearch, then exposes both a minimal web UI and a full MCP JSON-RPC interface for LLM agents. It runs via Docker Compose and is now located at `/var/core/network-mcp`.
## What We Built
- **Collectors**
- OPNsense collector ingests DHCP/ARP/DNS and overlays inventory targets.
- Nmap collector performs discovery and port scans.
- Data lands in Elasticsearch: `network-hosts` (current state) and `network-events-*` (historical events).
- **Inventory merge**
- Inventory data from `inventory_targets.yml` is merged onto live hosts by IP when a MAC is known (so live MAC-based records carry inventory notes/expected ports).
- **Frontend**
- Flask UI + JSON API, containerized with Gunicorn and exposed on port `5001` for LAN access.
- **MCP server**
- JSON-RPC endpoint at `/.well-known/mcp.json` (and `/api/mcp`) supports:
- `initialize`, `ping`, `tools/list`, `tools/call`
- `resources/list`, `resources/read`, `resources/templates/list`
- Tool schemas include titles, descriptions, input/output schemas, and annotations (read-only hints).
- Resource templates provide snapshot + query access (e.g. `network://hosts?q=...`).
- **Search behavior**
- Host search is case-insensitive across name/hostname/IP/MAC.
- **Tests**
- Unit tests for REST and MCP search by hostname/IP/MAC, MCP resource reads, and MCP notifications.
## Key Endpoints
- UI: `http://<host>:5001/`
- REST:
- `GET /api/hosts` (supports `q`, `source`, `limit`)
- `GET /api/hosts/<host_id>`
- `GET /api/events`
- `GET /api/hosts/<host_id>/events`
- `GET /api/map`
- MCP JSON-RPC: `POST /.well-known/mcp.json`
## MCP Tools (JSON-RPC)
- `list_hosts` (search by hostname/IP/MAC; case-insensitive)
- `get_host` (optional events)
- `list_events`
- `host_events`
- `network_map`
## MCP Resources
- `resources/list` -> `network://hosts`, `network://map`, `network://events`
- `resources/templates/list` -> query templates such as:
- `network://hosts{?q,source,limit}`
- `network://host/{host_id}{?include_events,events_limit}`
- `network://events{?host_id,type,since,limit}`
## Docker & Repo State
- Repo path: `/var/core/network-mcp`
- `inventory_targets.yml` lives in the repo and is mounted via compose.
- Services run via `docker-compose up -d`.
- Git repo initialized and initial commit created.
## Gotchas / Pitfalls We Hit
- **MCP handshake**: Codex sent `notifications/initialized` without `id` (notification). Returning a response caused the transport to close. Fixed by treating notifications as no-response.
- **Case-sensitive search**: Elasticsearch wildcard on `.keyword` fields was case-sensitive, so `seele` didnt match `SEELE`. Fixed via `case_insensitive: true` in wildcard queries.
- **Inventory merge duplication**: Initial inventory-only docs were `ip:*` and live docs were `mac:*`, so both existed. Merge now attaches inventory to live MAC records by IP. Legacy `ip:*` docs may remain stale unless cleaned.
- **MCP errors**: Tool errors are now returned as `CallToolResult` with `isError: true` (instead of JSON-RPC errors), so LLMs can see and correct issues.
- **Service move**: Repo moved from `/var/core/ansible/network-mcp` to `/var/core/network-mcp`. Compose mount paths updated.
## Verification Performed
- REST search works for hostname/IP/MAC.
- MCP `initialize`, `tools/list`, `tools/call` work.
- MCP resource list/templates/read work.
- Services verified running via `docker-compose up -d`.
## Future Work Ideas
- **Cleanup**: Add a cleanup job to remove stale `ip:*` docs after successful MAC merge.
- **Resource subscriptions**: Implement `resources/subscribe` if clients need push updates.
- **Auth**: Optional token on the MCP endpoint for shared LAN exposure.
- **More UI**: Add filters/alerts for stale hosts or missing expected ports.
- **Metrics**: Export collector stats to detect scan/ingest failures.
- **Schema mapping**: Improve Elasticsearch mappings for search (e.g., lowercase normalizers for names/hostnames).

View File

@ -0,0 +1,105 @@
# Network MCP
A "source of truth" for network devices and ports, backed by Elasticsearch, OPNsense, and Nmap.
## Architecture
- **Elasticsearch**: Stores current state (`network-hosts`) and historical events (`network-events-*`).
- **OPNsense Collector**: Fetches DHCP/ARP/DNS data to discover hosts.
- **Nmap Collector**: Scans discovered hosts for open ports and OS info.
## Setup
1. **Environment Config**
Copy `.env.example` to `.env` and fill in your details:
```bash
cp .env.example .env
# Edit .env
```
2. **Bootstrap Elastic**
Run the bootstrap script (requires `requests` installed locally, or you can run it inside a container):
```bash
python3 scripts/bootstrap_indices.py
```
*Note: Ensure you have connectivity to your Elasticsearch instance.*
3. **Start Services**
```bash
docker-compose up -d --build
```
This brings up the collectors and the lightweight frontend (reachable on port `5001`).
## Configuration
- **Static Metadata**: Edit `static/host_metadata.json` to add manual notes, roles, or tags to hosts (keyed by `mac:xx:xx...`).
- **Intervals**: Adjust polling intervals in `.env`.
- **VLAN Discovery (default on)**: Discovery sweeps (`nmap -sn`) run periodically across the OPNsense interfaces listed in `NMAP_DISCOVERY_VLANS`. Adjust the list (or set the flag to `false`) if you only want targeted subnets.
- **Quick vs Full Port Scans**: Each collector loop runs a fast, common-port sweep (`NMAP_QUICK_EXTRA_ARGS`, `NMAP_QUICK_BATCH_SIZE`) while a deeper service scan (`NMAP_PORT_RANGE`, `NMAP_BATCH_SIZE`) is triggered once per `NMAP_FULL_INTERVAL_SECONDS` (default daily). Tune these env vars to balance coverage vs. runtime.
- **Inventory Overlay**: Entries in `./inventory_targets.yml` are mounted into the OPNsense collector and merged by IP—offline/static hosts from that file (names, notes, expected ports) now appear in `network-hosts` with `source: inventory`.
## Data Model
- **`network-hosts`**: Current state of every known host.
- **`network-events-YYYY.MM.DD`**: Immutable log of scans and discovery events.
## Usage
Query `network-hosts` for the latest view of your network:
```json
GET network-hosts/_search
{
"query": {
"match_all": {}
}
}
```
### Quick Frontend
A minimal Flask frontend is bundled in docker-compose (service `frontend`) and is exposed on port `5001` so it can be reached from other machines:
```bash
docker-compose up -d frontend
```
Then visit `http://<host-ip>:5001/` to see the merged view (inventory entries are marked with `source: inventory`). If you prefer to run it without Docker for debugging, follow the steps below:
```bash
cd network-mcp
python3 -m venv .venv && source .venv/bin/activate
pip install -r frontend/requirements.txt
python frontend/app.py
```
### MCP / API Endpoints
The frontend doubles as a Model Context Protocol server. It exposes the manifest at `/.well-known/mcp.json` (or `/api/mcp`) and supports the standard JSON-RPC handshake (`initialize`, `tools/list`, `tools/call`) on the same URL. Agents can either use the RPC tools below or hit the underlying REST endpoints directly.
- MCP Resources are also available (`resources/list`, `resources/read`, `resources/templates/list`) for clients that prefer resource-style access to snapshots and queries.
- `GET /api/hosts` merged host list (supports `limit`, `source`, and repeated `q` params to fuzzy search names, hostnames, IPs, or MACs in a single call).
- `GET /api/hosts/<host_id>` single host document with optional `include_events=true`.
- `GET /api/events` recent scan/discovery events (`limit`, `host_id`, `type`, `since` filters).
- `GET /api/hosts/<host_id>/events` scoped events for a host.
- `GET /api/map` high-level “network map” grouping hosts by detected /24 (IPv4) or /64 (IPv6).
RPC tool names (mirrored in the manifest) are:
- `list_hosts` accepts `{limit, source, terms}` and returns the merged host list.
- `network_map` optional `{limit}` for building /24-/64 summaries.
- `get_host` requires `{host_id}` plus optional `include_events`, `events_limit`.
- `list_events` `{limit, host_id, type, since}`.
- `host_events` requires `{host_id}` plus optional `limit`, `type`, `since`.
Resource URI examples:
- `network://hosts?q=seele&limit=50`
- `network://host/mac:dc:a6:32:67:55:dc?include_events=true&events_limit=50`
- `network://events?type=discovery&limit=100`
All RPC and REST calls share the Elasticsearch credentials from `.env`, so an agent only needs HTTP access to port `5001` to query hosts, notes, and event timelines. Registering the MCP with Codex looks like:
```bash
codex mcp install network-mcp http://<host>:5001/.well-known/mcp.json
```

View File

@ -0,0 +1,55 @@
import base64
from typing import Optional, Tuple
def _clean(value: Optional[str]) -> str:
"""
Normalize values coming from env files where quotes might be preserved.
"""
if not value:
return ""
return value.strip().strip('"').strip()
def resolve_api_key(api_id: Optional[str], api_key: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
"""
Accept various API key formats and return (api_id, api_key).
Supported formats:
- Explicit ES_API_ID and ES_API_KEY values.
- ES_API_KEY that already contains \"id:key\".
- ES_API_KEY that is the base64 encoding of \"id:key\".
"""
cleaned_id = _clean(api_id)
cleaned_key = _clean(api_key)
if cleaned_id and cleaned_key:
return cleaned_id, cleaned_key
if not cleaned_key:
return None, None
# Raw "id:key" format
if ":" in cleaned_key:
potential_id, potential_key = cleaned_key.split(":", 1)
if potential_id and potential_key:
return potential_id, potential_key
# Base64 encoded "id:key" format
try:
decoded = base64.b64decode(cleaned_key, validate=True).decode()
if ":" in decoded:
potential_id, potential_key = decoded.split(":", 1)
if potential_id and potential_key:
return potential_id, potential_key
except Exception:
pass
return None, None
def build_api_key_header(api_id: str, api_key: str) -> str:
"""
Return the value for the Authorization header using ApiKey auth.
"""
token = base64.b64encode(f"{api_id}:{api_key}".encode()).decode()
return f"ApiKey {token}"

View File

@ -0,0 +1,85 @@
import os
import time
import urllib3
from elasticsearch import Elasticsearch, helpers
from .es_auth import resolve_api_key
from .logging_config import setup_logging
# Suppress insecure request warnings if SSL verification is disabled
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
logger = setup_logging("es_client")
class ESClient:
def __init__(self):
self.url = os.getenv("ES_URL", "http://localhost:9200")
env_api_id = os.getenv("ES_API_ID")
env_api_key = os.getenv("ES_API_KEY")
self.api_id, self.api_key = resolve_api_key(env_api_id, env_api_key)
self.user = os.getenv("ES_USER", "elastic")
self.password = os.getenv("ES_PASS", "changeme")
self.verify_ssl = os.getenv("ES_VERIFY_SSL", "true").lower() == "true"
if self.api_id and self.api_key:
# Use API key authentication
self.client = Elasticsearch(
self.url,
api_key=(self.api_id, self.api_key),
verify_certs=self.verify_ssl,
ssl_show_warn=False
)
logger.info("Using Elasticsearch API key authentication.")
else:
# Fallback to basic auth
self.client = Elasticsearch(
self.url,
basic_auth=(self.user, self.password),
verify_certs=self.verify_ssl,
ssl_show_warn=False
)
logger.info("Using Elasticsearch basic authentication.")
def check_connection(self):
try:
return self.client.info()
except Exception as e:
logger.error(f"Failed to connect to Elasticsearch: {e}")
raise
def bulk_index(self, actions):
"""
Bulk index a list of actions.
actions: list of dicts compatible with elasticsearch.helpers.bulk
"""
if not actions:
return 0, []
try:
success, failed = helpers.bulk(self.client, actions, stats_only=False, raise_on_error=False)
if failed:
logger.warning(f"Bulk index had failures: {len(failed)} items failed.")
for item in failed[:5]: # Log first 5 failures
logger.warning(f"Failure sample: {item}")
else:
logger.info(f"Bulk index successful: {success} items.")
return success, failed
except Exception as e:
logger.error(f"Bulk index exception: {e}")
raise
def search_hosts(self, index="network-hosts", query=None, size=1000):
"""
Search for hosts in network-hosts index.
"""
if query is None:
query = {"match_all": {}}
try:
resp = self.client.search(index=index, query=query, size=size)
return [hit["_source"] for hit in resp["hits"]["hits"]]
except Exception as e:
logger.error(f"Search failed: {e}")
return []
def get_es_client():
return ESClient()

View File

@ -0,0 +1,21 @@
import logging
import os
import sys
def setup_logging(name: str = "collector") -> logging.Logger:
"""
Sets up a structured logger.
"""
logger = logging.getLogger(name)
level = os.getenv("LOG_LEVEL", "INFO").upper()
logger.setLevel(level)
if not logger.handlers:
handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter(
'%(asctime)s [%(levelname)s] %(name)s: %(message)s'
)
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger

View File

@ -0,0 +1,131 @@
import subprocess
import xml.etree.ElementTree as ET
import shutil
from typing import List, Dict, Optional
from .logging_config import setup_logging
logger = setup_logging("nmap_parser")
def run_nmap_scan(ips: List[str], extra_args: Optional[List[str]] = None) -> List[Dict]:
"""
Run nmap on the given IPs and return a list of parsed host dicts.
"""
if not ips:
return []
if not shutil.which("nmap"):
logger.error("nmap binary not found in PATH")
return []
# Default args: -oX - (XML to stdout)
cmd = ["nmap", "-oX", "-"]
if extra_args:
cmd.extend(extra_args)
# Append IPs
cmd.extend(ips)
logger.info(f"Running nmap command: {' '.join(cmd)}")
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
xml_output = result.stdout
return parse_nmap_xml(xml_output)
except subprocess.CalledProcessError as e:
logger.error(f"Nmap failed: {e.stderr}")
return []
except Exception as e:
logger.error(f"Error running nmap: {e}")
return []
def parse_nmap_xml(xml_string: str) -> List[Dict]:
"""
Parse Nmap XML output into our internal host/port structure.
"""
try:
root = ET.fromstring(xml_string)
except ET.ParseError as e:
logger.error(f"Failed to parse Nmap XML: {e}")
return []
hosts = []
for host_node in root.findall("host"):
# Helper to find basic info
ip = None
mac = None
hostname = None
vendor = None
# Addresses
for addr in host_node.findall("address"):
addr_type = addr.get("addrtype")
if addr_type == "ipv4":
ip = addr.get("addr")
elif addr_type == "mac":
mac = addr.get("addr")
vendor = addr.get("vendor")
# Hostnames
hostnames_node = host_node.find("hostnames")
if hostnames_node is not None:
# Pick first for now
hn = hostnames_node.find("hostname")
if hn is not None:
hostname = hn.get("name")
# Ports
ports = []
ports_node = host_node.find("ports")
if ports_node is not None:
for port_node in ports_node.findall("port"):
state_node = port_node.find("state")
state = state_node.get("state") if state_node is not None else "unknown"
# Only care about open ports usually, but keep all for now if needed
if state != "open":
continue
port_id = int(port_node.get("portid"))
protocol = port_node.get("protocol")
service_node = port_node.find("service")
service_name = service_node.get("name") if service_node is not None else "unknown"
product = service_node.get("product") if service_node is not None else None
version = service_node.get("version") if service_node is not None else None
service_def = {
"name": service_name,
}
if product: service_def["product"] = product
if version: service_def["version"] = version
ports.append({
"port": port_id,
"proto": protocol,
"state": state,
"service": service_def
})
# OS detection (basic)
os_match = None
os_node = host_node.find("os")
if os_node is not None:
os_match_node = os_node.find("osmatch")
if os_match_node is not None:
os_match = {
"name": os_match_node.get("name"),
"accuracy": os_match_node.get("accuracy")
}
host_data = {
"ip": ip,
"mac": mac, # might be None if scanning remote segment
"hostname": hostname,
"vendor": vendor,
"ports": ports,
"os_match": os_match
}
hosts.append(host_data)
return hosts

View File

@ -0,0 +1,105 @@
import os
import requests
import json
import ipaddress
from .logging_config import setup_logging
logger = setup_logging("opnsense_client")
class OPNsenseClient:
def __init__(self):
self.base_url = os.getenv("OPNSENSE_URL", "https://192.168.1.1").rstrip('/')
self.api_key = os.getenv("OPNSENSE_API_KEY")
self.api_secret = os.getenv("OPNSENSE_API_SECRET")
self.verify_ssl = os.getenv("ES_VERIFY_SSL", "true").lower() == "true" # Reusing verify flag or add explicit OPNSENSE_VERIFY_SSL
if not self.api_key or not self.api_secret:
logger.warning("OPNSENSE_API_KEY or OPNSENSE_API_SECRET not set. API calls will fail.")
def _get(self, endpoint, params=None):
url = f"{self.base_url}{endpoint}"
try:
response = requests.get(
url,
auth=(self.api_key, self.api_secret),
verify=self.verify_ssl,
params=params,
timeout=10
)
response.raise_for_status()
return response.json()
except Exception as e:
logger.error(f"Failed to fetch {url}: {e}")
return {}
def get_dhcp_leases_v4(self):
# Endpoint: /api/dhcpv4/leases/search
# Note: 'search' endpoints in OPNsense often expect POST or GET with params for filtering.
# Often a simple GET works for 'searchLeases' or similar.
# Standard OPNsense API for leases might be under /api/dhcpv4/leases/searchLeases
# Let's try the standard search endpoint.
data = self._get("/api/dhcpv4/leases/searchLease")
# API return structure usually: {"rows": [...], "total": ...}
return data.get("rows", [])
def get_arp_table(self):
# Endpoint: /api/diagnostics/arp/search
# This endpoint returns the ARP table.
data = self._get("/api/diagnostics/interface/getArp")
# Structure varies, let's assume standard response list or rows
# If the standard plugin is used, it might be /api/diagnostics/interface/getArp
# Or /api/diagnostics/network/arp ...
# NOTE: OPNsense API paths can be tricky. /api/diagnostics/interface/getArp is a common one.
# It returns a list directly or a dict with rows.
# Let's assume list of dicts or {"rows": []}
if isinstance(data, list):
return data
return data.get("rows", [])
def get_dns_overrides(self):
# Endpoint: /api/unbound/settings/searchHostOverride
data = self._get("/api/unbound/settings/searchHostOverride")
return data.get("rows", [])
def get_vlan_networks(self):
"""
Build a list of IPv4 networks (CIDRs) from the routing table, grouped by interface description.
"""
routes = self._get("/api/diagnostics/interface/getRoutes")
networks = []
if not isinstance(routes, list):
return networks
seen = set()
for route in routes:
if route.get("proto") != "ipv4":
continue
destination = route.get("destination")
if not destination or "/" not in destination or destination == "default":
continue
desc = route.get("intf_description")
if not desc:
continue
try:
network = ipaddress.ip_network(destination, strict=False)
except ValueError:
continue
# Skip host routes (/32) which are usually static peers
if network.prefixlen == 32:
continue
if network.prefixlen < 16:
continue
key = (desc, str(network))
if key in seen:
continue
seen.add(key)
networks.append({
"key": desc,
"name": desc,
"cidr": str(network)
})
return networks
def get_opnsense_client():
return OPNsenseClient()

View File

@ -0,0 +1,14 @@
FROM python:3.11-slim
RUN apt-get update && apt-get install -y nmap && rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY collectors/common /app/collectors/common
COPY collectors/nmap_collector /app/collectors/nmap_collector
ENV PYTHONPATH=/app
RUN pip install requests elasticsearch==8.15.1
CMD ["python", "collectors/nmap_collector/main.py"]

View File

@ -0,0 +1,378 @@
import os
import time
import datetime
import sys
import json
import shlex
# Ensure we can import from common
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
from collectors.common.es_client import get_es_client
from collectors.common.opnsense_client import get_opnsense_client
from collectors.common.nmap_parser import run_nmap_scan
from collectors.common.logging_config import setup_logging
logger = setup_logging("nmap_collector")
def get_now_iso():
return datetime.datetime.now(datetime.timezone.utc).isoformat()
def chunk_list(lst, n):
for i in range(0, len(lst), n):
yield lst[i:i + n]
def should_scan_vlan(vlan, allowlist):
if not allowlist:
return True
name = (vlan.get("name") or "").strip()
key = (vlan.get("key") or "").strip()
return name in allowlist or key in allowlist
def build_discovery_update_action(host_id, mac, ip, hostname, vendor, ts_iso):
mac_norm = mac.lower() if mac else None
upsert_host = {
"host": {
"id": host_id,
"macs": [mac_norm] if mac_norm else [],
"ips": [ip] if ip else [],
"name": hostname,
"hostnames": [hostname] if hostname else [],
"vendor": vendor,
"sources": ["nmap-discovery"],
"last_seen": ts_iso,
"first_seen": ts_iso
}
}
script_source = """
if (ctx._source.host == null) { ctx._source.host = [:]; }
if (ctx._source.host.macs == null) { ctx._source.host.macs = []; }
if (ctx._source.host.ips == null) { ctx._source.host.ips = []; }
if (ctx._source.host.hostnames == null) { ctx._source.host.hostnames = []; }
if (ctx._source.host.sources == null) { ctx._source.host.sources = []; }
if (params.mac != null && !ctx._source.host.macs.contains(params.mac)) {
ctx._source.host.macs.add(params.mac);
}
if (params.ip != null && !ctx._source.host.ips.contains(params.ip)) {
ctx._source.host.ips.add(params.ip);
}
if (params.hostname != null && !ctx._source.host.hostnames.contains(params.hostname)) {
ctx._source.host.hostnames.add(params.hostname);
}
if (!ctx._source.host.sources.contains(params.source_tag)) {
ctx._source.host.sources.add(params.source_tag);
}
ctx._source.host.last_seen = params.ts;
if (ctx._source.host.name == null && params.hostname != null) {
ctx._source.host.name = params.hostname;
}
if (params.vendor != null && (ctx._source.host.vendor == null || ctx._source.host.vendor == \"\")) {
ctx._source.host.vendor = params.vendor;
}
"""
return {
"_index": "network-hosts",
"_op_type": "update",
"_id": host_id,
"script": {
"source": script_source,
"lang": "painless",
"params": {
"mac": mac_norm,
"ip": ip,
"hostname": hostname,
"vendor": vendor,
"ts": ts_iso,
"source_tag": "nmap-discovery"
}
},
"upsert": upsert_host
}
def run_vlan_discovery(es, opnsense_client, discovery_args, vlan_filter):
networks = opnsense_client.get_vlan_networks()
if not networks:
logger.info("VLAN discovery skipped: OPNsense returned no interfaces.")
return
scoped_networks = [n for n in networks if should_scan_vlan(n, vlan_filter)]
if not scoped_networks:
logger.info("VLAN discovery skipped: no interfaces matched NMAP_DISCOVERY_VLANS.")
return
actions = []
today = datetime.datetime.now().strftime("%Y.%m.%d")
event_index = f"network-events-{today}"
for vlan in scoped_networks:
cidr = vlan.get("cidr")
if not cidr:
continue
logger.info(f"VLAN discovery scan for {vlan.get('name')} ({cidr})")
scan_ts = get_now_iso()
scan_id = f"nmap_discovery_{vlan.get('name')}_{scan_ts}"
results = run_nmap_scan([cidr], discovery_args)
for res in results:
ip = res.get("ip")
if not ip:
continue
mac = res.get("mac")
hostname = res.get("hostname")
vendor = res.get("vendor")
host_id = f"mac:{mac.lower()}" if mac else None
event_doc = {
"@timestamp": scan_ts,
"source": "nmap-discovery",
"scan_id": scan_id,
"vlan": vlan.get("name"),
"cidr": cidr,
"host": {
"id": host_id,
"ip": ip,
"mac": mac,
"hostname": hostname,
"vendor": vendor
}
}
actions.append({
"_index": event_index,
"_op_type": "index",
"_source": event_doc
})
if host_id:
actions.append(
build_discovery_update_action(host_id, mac, ip, hostname, vendor, scan_ts)
)
if actions:
logger.info(f"VLAN discovery produced {len(actions)} Elasticsearch actions.")
es.bulk_index(actions)
else:
logger.info("VLAN discovery finished with no hosts discovered.")
def main():
es = get_es_client()
opnsense_client = get_opnsense_client()
interval = int(os.getenv("NMAP_INTERVAL_SECONDS", "300"))
full_batch_size = int(os.getenv("NMAP_BATCH_SIZE", "10"))
quick_batch_size = int(os.getenv("NMAP_QUICK_BATCH_SIZE", "30"))
port_range = os.getenv("NMAP_PORT_RANGE", "1-1024") # Full scan range
discovery_enabled = os.getenv("NMAP_DISCOVERY_ENABLED", "false").lower() == "true"
discovery_interval = int(os.getenv("NMAP_DISCOVERY_INTERVAL_SECONDS", "3600"))
discovery_vlan_filter = [v.strip() for v in os.getenv("NMAP_DISCOVERY_VLANS", "").split(",") if v.strip()]
discovery_extra_args = os.getenv("NMAP_DISCOVERY_EXTRA_ARGS", "-sn -n").strip()
if discovery_extra_args:
discovery_extra_args = shlex.split(discovery_extra_args)
else:
discovery_extra_args = ["-sn", "-n"]
discovery_last_run = time.time() - discovery_interval if discovery_enabled else 0.0
full_interval = int(os.getenv("NMAP_FULL_INTERVAL_SECONDS", "86400"))
quick_extra_str = os.getenv("NMAP_QUICK_EXTRA_ARGS", "-sS --top-ports 100 -T4 --open -Pn").strip()
quick_extra_args = shlex.split(quick_extra_str) if quick_extra_str else ["-sS", "--top-ports", "100", "-T4", "--open", "-Pn"]
last_full_scan = time.time()
# Construct base nmap args
# -sV for service version, -O for OS detection (requires root usually), --open to only show open
# We run as root in docker (usually) or need capabilities.
extra_args = ["-sV", "--open"]
# Check if port_range looks like a range or specific ports
if port_range:
extra_args.extend(["-p", port_range])
# Add user provided extra args
user_args = os.getenv("NMAP_EXTRA_ARGS", "")
if user_args:
extra_args.extend(user_args.split())
logger.info("Starting Nmap collector loop...")
while True:
try:
start_time = time.time()
ts_iso = get_now_iso()
now = time.time()
use_full_scan = (now - last_full_scan) >= full_interval
scan_type = "full" if use_full_scan else "quick"
scan_id = f"nmap_{scan_type}_{ts_iso}"
current_batch_size = full_batch_size if use_full_scan else quick_batch_size
scan_args = extra_args if use_full_scan else quick_extra_args
if use_full_scan:
last_full_scan = now
logger.info("Running scheduled full service scan.")
else:
logger.info("Running quick common-port sweep.")
if discovery_enabled and (time.time() - discovery_last_run) >= discovery_interval:
run_vlan_discovery(es, opnsense_client, discovery_extra_args, discovery_vlan_filter)
discovery_last_run = time.time()
# 1. Get targets from ES
# We only want hosts that have an IP.
hosts = es.search_hosts(index="network-hosts", size=1000)
# Extract IPs to scan. Map IP -> Host ID to correlate back
targets = []
ip_to_host_id = {}
for h in hosts:
# h is {"host": {...}, "ports": [...]}
host_info = h.get("host", {})
hid = host_info.get("id")
ips = host_info.get("ips", [])
if not hid or not ips:
continue
# Pick the "best" IP? Or scan all?
# Scaning all might be duplicate work if they point to same box.
# Let's pick the first one for now.
target_ip = ips[0]
targets.append(target_ip)
ip_to_host_id[target_ip] = hid
logger.info(f"Found {len(targets)} targets to scan ({scan_type}).")
total_processed = 0
logger.info(f"Scanning {scan_type} run with {len(targets)} targets.")
scan_results = run_nmap_scan(targets, scan_args)
actions = []
today = datetime.datetime.now().strftime("%Y.%m.%d")
event_index = f"network-events-{today}"
for res in scan_results:
ip = res.get("ip")
if not ip or ip not in ip_to_host_id:
continue
hid = ip_to_host_id[ip]
total_processed += 1
for p in res["ports"]:
p["last_seen"] = ts_iso
p["last_scan_id"] = scan_id
event_doc = {
"@timestamp": ts_iso,
"source": "nmap",
"scan_id": scan_id,
"host": {"id": hid, "ip": ip},
"ports": res["ports"],
"os": res.get("os_match")
}
actions.append({
"_index": event_index,
"_op_type": "index",
"_source": event_doc
})
script_source = """
if (ctx._source.host == null) { ctx._source.host = [:]; }
if (ctx._source.host.sources == null) { ctx._source.host.sources = []; }
if (!ctx._source.host.sources.contains('nmap')) {
ctx._source.host.sources.add('nmap');
}
ctx._source.host.last_seen = params.ts;
if (params.os != null) {
ctx._source.host.os = params.os;
}
if (ctx._source.ports == null) {
ctx._source.ports = [];
}
for (new_p in params.new_ports) {
boolean found = false;
for (old_p in ctx._source.ports) {
if (old_p.port == new_p.port && old_p.proto == new_p.proto) {
old_p.last_seen = params.ts;
old_p.state = new_p.state;
old_p.service = new_p.service;
old_p.last_scan_id = params.scan_id;
found = true;
break;
}
}
if (!found) {
new_p.first_seen = params.ts;
ctx._source.ports.add(new_p);
}
}
"""
actions.append({
"_index": "network-hosts",
"_op_type": "update",
"_id": hid,
"script": {
"source": script_source,
"lang": "painless",
"params": {
"ts": ts_iso,
"os": res.get("os_match"),
"new_ports": res["ports"],
"scan_id": scan_id
}
}
})
for p in res["ports"]:
svc_id = f"{hid}:{p['proto']}:{p['port']}"
svc_script = """
ctx._source.last_seen = params.ts;
ctx._source.state = params.state;
ctx._source.service = params.service;
if (ctx._source.first_seen == null) {
ctx._source.first_seen = params.ts;
}
"""
actions.append({
"_index": "network-services",
"_op_type": "update",
"_id": svc_id,
"script": {
"source": svc_script,
"lang": "painless",
"params": {
"ts": ts_iso,
"state": p["state"],
"service": p["service"]
}
},
"upsert": {
"host_id": hid,
"host_ip": ip,
"port": p["port"],
"proto": p["proto"],
"service": p["service"],
"state": p["state"],
"last_seen": ts_iso,
"first_seen": ts_iso,
"sources": ["nmap"]
}
})
if actions:
es.bulk_index(actions)
elapsed = time.time() - start_time
sleep_time = max(0, interval - elapsed)
logger.info(f"Nmap {scan_type} cycle done. Scanned {total_processed} hosts in {elapsed:.2f}s. Sleeping {sleep_time:.2f}s")
time.sleep(sleep_time)
except Exception as e:
logger.error(f"Error in Nmap loop: {e}")
time.sleep(10)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,14 @@
FROM python:3.11-slim
WORKDIR /app
COPY collectors/common /app/collectors/common
COPY collectors/opnsense_collector /app/collectors/opnsense_collector
# We need to make sure the module path works.
# The main.py does sys.path.append, but cleanest is to set PYTHONPATH.
ENV PYTHONPATH=/app
RUN pip install requests elasticsearch==8.15.1 pyyaml
CMD ["python", "collectors/opnsense_collector/main.py"]

View File

@ -0,0 +1,261 @@
import os
import time
import json
import datetime
import sys
import yaml
# Ensure we can import from common
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
from collectors.common.es_client import get_es_client
from collectors.common.opnsense_client import get_opnsense_client
from collectors.common.logging_config import setup_logging
logger = setup_logging("opnsense_collector")
def load_static_metadata(path="/app/static/host_metadata.json"):
if not os.path.exists(path):
logger.info(f"No static metadata found at {path}")
return {}
try:
with open(path, 'r') as f:
return json.load(f)
except Exception as e:
logger.error(f"Failed to load static metadata: {e}")
return {}
def load_inventory_targets(path=None):
path = path or os.getenv("INVENTORY_FILE", "/app/inventory_targets.yml")
if not os.path.exists(path):
logger.info(f"No inventory targets found at {path}")
return []
try:
with open(path, 'r') as f:
data = yaml.safe_load(f) or {}
return data.get("inventory_targets", [])
except Exception as e:
logger.error(f"Failed to load inventory targets: {e}")
return []
def normalize_mac(mac):
if not mac:
return None
return mac.lower().replace("-", ":")
def get_now_iso():
return datetime.datetime.now(datetime.timezone.utc).isoformat()
def main():
es = get_es_client()
opn = get_opnsense_client()
interval = int(os.getenv("COLLECTOR_INTERVAL_SECONDS", "60"))
logger.info("Starting OPNsense collector loop...")
while True:
try:
start_time = time.time()
ts_iso = get_now_iso()
# 1. Fetch Data
dhcp_v4 = opn.get_dhcp_leases_v4()
arp_table = opn.get_arp_table()
dns_overrides = opn.get_dns_overrides()
static_meta = load_static_metadata()
inventory_entries = load_inventory_targets()
# 2. Process Data -> hosts map
# Key: identifier (mac:xx... or ip:xxx)
hosts_map = {}
def create_host_entry(identifier):
return {
"id": identifier,
"macs": set(),
"ips": set(),
"hostnames": set(),
"sources": set(),
"preferred_name": None,
"inventory_notes": None,
"inventory_ports": None
}
def get_or_create_host(mac):
norm_mac = normalize_mac(mac)
if not norm_mac:
return None
identifier = f"mac:{norm_mac}"
host = hosts_map.setdefault(identifier, create_host_entry(identifier))
host["macs"].add(norm_mac)
return host
def get_or_create_host_by_ip(ip):
if not ip:
return None
identifier = f"ip:{ip}"
host = hosts_map.setdefault(identifier, create_host_entry(identifier))
host["ips"].add(ip)
return host
# Process DHCP
for lease in dhcp_v4:
# Structure depends on OPNsense version, but usually has 'mac', 'address', 'hostname'
mac = lease.get('mac') or lease.get('hw_address')
ip = lease.get('address') or lease.get('ip')
hostname = lease.get('hostname')
host = get_or_create_host(mac)
if host:
if ip: host["ips"].add(ip)
if hostname: host["hostnames"].add(hostname)
host["sources"].add("opnsense-dhcp")
# Process ARP
for entry in arp_table:
# Structure: 'mac', 'ip', 'hostname' (sometimes)
mac = entry.get('mac')
ip = entry.get('ip')
hostname = entry.get('hostname')
host = get_or_create_host(mac)
if host:
if ip: host["ips"].add(ip)
if hostname and hostname != "?": host["hostnames"].add(hostname)
host["sources"].add("opnsense-arp")
# Process DNS Overrides (mapped by IP when possible)
ip_to_identifier = {}
for identifier, h in hosts_map.items():
for ip in h["ips"]:
ip_to_identifier[ip] = identifier
for override in dns_overrides:
ip = override.get('ip')
domain = override.get('domain')
hostname = override.get('hostname')
full_fqdn = f"{hostname}.{domain}" if hostname and domain else hostname
if ip and ip in ip_to_identifier:
identifier = ip_to_identifier[ip]
if full_fqdn:
hosts_map[identifier]["hostnames"].add(full_fqdn)
hosts_map[identifier]["sources"].add("opnsense-dns")
# Process inventory targets (by IP)
for entry in inventory_entries:
ip = entry.get("ip")
if not ip:
continue
identifier = ip_to_identifier.get(ip)
if identifier:
host = hosts_map.get(identifier)
if host is None:
host = get_or_create_host_by_ip(ip)
ip_to_identifier[ip] = host["id"]
else:
host = get_or_create_host_by_ip(ip)
if host:
ip_to_identifier[ip] = host["id"]
if not host:
continue
hostname = entry.get("hostname")
name = entry.get("name")
if hostname:
host["hostnames"].add(hostname)
if name:
host["hostnames"].add(name)
host["preferred_name"] = name
host["sources"].add("inventory")
notes = entry.get("notes")
if notes:
host["inventory_notes"] = notes
ports = entry.get("ports")
if ports:
host["inventory_ports"] = ports
# 3. Build Actions
actions = []
today = datetime.datetime.now().strftime("%Y.%m.%d")
event_index = f"network-events-{today}"
for _, h_data in hosts_map.items():
name = h_data.get("preferred_name")
if not name and h_data["hostnames"]:
name = next(iter(h_data["hostnames"]))
final_host = {
"host": {
"id": h_data["id"],
"macs": list(h_data["macs"]),
"ips": list(h_data["ips"]),
"name": name,
"hostnames": list(h_data["hostnames"]),
"last_seen": ts_iso,
"sources": list(h_data["sources"])
}
}
if h_data.get("inventory_notes"):
final_host["host"]["notes"] = h_data["inventory_notes"]
if h_data.get("inventory_ports"):
final_host["host"]["expected_ports"] = h_data["inventory_ports"]
# Merge Static Metadata
if h_data["id"] in static_meta:
meta = static_meta[h_data["id"]]
# Merge fields
for k, v in meta.items():
final_host["host"][k] = v
# 3a. Event Document
event_doc = {
"@timestamp": ts_iso,
"source": "opnsense",
"scan_id": f"opnsense_{ts_iso}",
"host": final_host["host"]
}
actions.append({
"_index": event_index,
"_op_type": "index",
"_source": event_doc
})
# 3b. Host Upsert
# We use a script upsert or doc_as_upsert.
# doc_as_upsert is simpler but replaces lists.
# Ideally we merge lists (ips, macs), but for now replacing with latest 'truth' from OPNsense + Static is okay.
# However, we don't want to lose 'ports' info from Nmap.
# So we must NOT overwrite 'ports'.
host_update_doc = {
"host": final_host["host"]
}
actions.append({
"_index": "network-hosts",
"_op_type": "update",
"_id": h_data["id"],
"doc": host_update_doc,
"doc_as_upsert": True
})
# 4. Send to ES
if actions:
logger.info(f"Sending {len(actions)} actions to Elasticsearch...")
success, failed = es.bulk_index(actions)
else:
logger.info("No hosts found or no actions generated.")
elapsed = time.time() - start_time
sleep_time = max(0, interval - elapsed)
logger.info(f"Cycle done in {elapsed:.2f}s. Sleeping for {sleep_time:.2f}s")
time.sleep(sleep_time)
except Exception as e:
logger.error(f"Error in main loop: {e}")
time.sleep(10)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,43 @@
version: "3.9"
services:
frontend:
build:
context: .
dockerfile: frontend/Dockerfile
restart: always
env_file:
- .env
environment:
FRONTEND_PORT: "5001"
ports:
- "5001:5001"
opnsense_collector:
build:
context: .
dockerfile: collectors/opnsense_collector/Dockerfile
restart: always
env_file:
- .env
volumes:
- ./static:/app/static
- ./inventory_targets.yml:/app/inventory_targets.yml:ro
environment:
COLLECTOR_INTERVAL_SECONDS: "60"
INVENTORY_FILE: "/app/inventory_targets.yml"
nmap_collector:
build:
context: .
dockerfile: collectors/nmap_collector/Dockerfile
restart: always
cap_add:
- NET_RAW
- NET_ADMIN
env_file:
- .env
environment:
NMAP_INTERVAL_SECONDS: "300"
NMAP_PORT_RANGE: "1-1024"
NMAP_BATCH_SIZE: "10"

View File

@ -0,0 +1,15 @@
FROM python:3.11-slim
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
WORKDIR /app
COPY frontend/requirements.txt /tmp/requirements.txt
RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
COPY frontend/ /app/
EXPOSE 5001
CMD ["gunicorn", "--bind", "0.0.0.0:5001", "app:app"]

View File

@ -0,0 +1,2 @@
"""Network MCP frontend package (used for local testing/imports)."""

View File

@ -0,0 +1,934 @@
import base64
import json
import os
from pathlib import Path
from typing import Any, Dict, List, Optional
from urllib.parse import parse_qs, unquote, urlparse
import requests
from dotenv import load_dotenv
from flask import Flask, abort, jsonify, render_template, request
BASE_DIR = Path(__file__).resolve().parent.parent
env_path = BASE_DIR / ".env"
if env_path.exists():
load_dotenv(env_path)
ES_URL = os.getenv("ES_URL", "http://localhost:9200").rstrip("/")
ES_VERIFY_SSL = os.getenv("ES_VERIFY_SSL", "false").lower() == "true"
app = Flask(__name__)
HOST_SEARCH_LIMIT = int(os.getenv("FRONTEND_HOST_LIMIT", "1000"))
DEFAULT_EVENT_LIMIT = int(os.getenv("FRONTEND_EVENT_LIMIT", "200"))
SERVER_VERSION = os.getenv("NETWORK_MCP_VERSION", "0.1.0")
REST_TOOLS = [
{
"name": "list_hosts",
"description": "Return the merged view of every known device on the network (searchable by hostname, IP, or MAC).",
"method": "GET",
"path": "/api/hosts",
},
{
"name": "network_map",
"description": "Summarize hosts grouped by detected /24 (IPv4) or /64 (IPv6) networks.",
"method": "GET",
"path": "/api/map",
},
{
"name": "get_host",
"description": "Fetch a single host document by ID (e.g. ip:192.168.5.10).",
"method": "GET",
"path": "/api/hosts/{host_id}",
},
{
"name": "list_events",
"description": "List recent scan/discovery events with filters for host, type, or time range.",
"method": "GET",
"path": "/api/events",
},
{
"name": "host_events",
"description": "List the recent events associated with a specific host.",
"method": "GET",
"path": "/api/hosts/{host_id}/events",
},
]
def tool_schema(description: str, properties: Dict[str, Any], required: Optional[List[str]] = None, title: Optional[str] = None):
schema: Dict[str, Any] = {
"type": "object",
"description": description,
"properties": properties,
"additionalProperties": False,
}
if required:
schema["required"] = required
if title:
schema["title"] = title
return schema
PORT_SCHEMA = tool_schema(
"Observed port entry.",
{
"port": {"type": "integer", "description": "Port number."},
"state": {"type": "string", "description": "State reported by nmap (e.g. open, closed)."},
"service": {"type": "string", "description": "Detected service name, if available."},
},
required=["port"],
title="Port",
)
HOST_SCHEMA = tool_schema(
"Host summary merged from inventory, OPNsense, and nmap.",
{
"id": {"type": "string", "description": "Stable host identifier (ip:* or mac:*)."},
"name": {"type": "string", "description": "Best-known display name."},
"ips": {"type": "array", "items": {"type": "string"}, "description": "Associated IP addresses."},
"macs": {"type": "array", "items": {"type": "string"}, "description": "Observed MAC addresses."},
"hostnames": {"type": "array", "items": {"type": "string"}, "description": "DNS or hostnames discovered."},
"sources": {"type": "array", "items": {"type": "string"}, "description": "Data sources contributing to this record."},
"last_seen": {"type": "string", "description": "ISO timestamp of the most recent observation."},
"notes": {"type": "string", "description": "Inventory notes/annotations, if present."},
"expected_ports": {"type": "array", "items": {"type": "string"}, "description": "Ports expected per inventory targets."},
"ports": {"type": "array", "items": PORT_SCHEMA, "description": "Latest observed open ports."},
},
required=["id"],
title="Host",
)
EVENT_SCHEMA = tool_schema(
"Scan or discovery event emitted by collectors.",
{
"id": {"type": "string", "description": "Event document identifier."},
"timestamp": {"type": "string", "description": "Observation timestamp (@timestamp)."},
"source": {"type": "string", "description": "Collector that produced the event (nmap, opnsense, inventory)."},
"event": {"type": "object", "description": "Event metadata (type, outcome)."},
"host": HOST_SCHEMA,
"ports": {"type": "array", "items": PORT_SCHEMA, "description": "Ports included with the event (if any)."},
},
required=["id", "timestamp"],
title="Event",
)
NETWORK_ENTRY_SCHEMA = tool_schema(
"Network grouping entry showing hosts per /24 or /64.",
{
"cidr": {"type": "string", "description": "CIDR label (e.g. 192.168.5.0/24)."},
"hosts": {"type": "array", "items": HOST_SCHEMA, "description": "Hosts that belong to this network."},
},
required=["cidr", "hosts"],
title="NetworkEntry",
)
MCP_TOOL_DEFINITIONS = {
"list_hosts": {
"title": "List Hosts",
"description": "Return the merged view of every known device on the network with optional filtering by source or identifier.",
"annotations": {"readOnlyHint": True, "destructiveHint": False, "openWorldHint": False},
"inputSchema": tool_schema(
"Filter options when listing hosts.",
{
"limit": {"type": "integer", "minimum": 1, "maximum": 5000, "title": "Limit", "description": "Maximum number of hosts to return."},
"source": {"type": "string", "title": "Source filter", "description": "Only include hosts that contain this source tag (e.g. inventory, nmap, opnsense-arp)."},
"terms": {
"type": "array",
"items": {"type": "string"},
"title": "Search terms",
"description": "Identifiers (names, hostnames, IPs, or MACs) to match. Equivalent to repeated q parameters in the REST API.",
},
},
title="ListHostsInput",
),
"outputSchema": tool_schema(
"Host list result payload.",
{
"total": {"type": "integer", "description": "Number of hosts returned."},
"hosts": {"type": "array", "items": HOST_SCHEMA, "description": "Host entries sorted by last-seen time."},
},
required=["total", "hosts"],
title="ListHostsResult",
),
},
"network_map": {
"title": "Network Map",
"description": "Summarize hosts grouped by detected /24 (IPv4) or /64 (IPv6) ranges.",
"annotations": {"readOnlyHint": True, "destructiveHint": False, "openWorldHint": False},
"inputSchema": tool_schema(
"Options when generating the network grouping.",
{
"limit": {"type": "integer", "minimum": 1, "maximum": 5000, "title": "Host limit", "description": "Maximum number of hosts to consider when building the map."},
},
title="NetworkMapInput",
),
"outputSchema": tool_schema(
"Grouped view of networks and their hosts.",
{
"host_count": {"type": "integer", "description": "Number of hosts examined for this map."},
"networks": {"type": "array", "items": NETWORK_ENTRY_SCHEMA, "description": "List of network segments and their hosts."},
},
required=["host_count", "networks"],
title="NetworkMapResult",
),
},
"get_host": {
"title": "Get Host",
"description": "Fetch a single host document by ID, optionally including recent events.",
"annotations": {"readOnlyHint": True, "destructiveHint": False, "openWorldHint": False},
"inputSchema": tool_schema(
"Parameters for retrieving an individual host.",
{
"host_id": {"type": "string", "title": "Host ID", "description": "Host identifier (e.g. ip:192.168.5.10, mac:aa:bb:cc...)."},
"include_events": {"type": "boolean", "title": "Include events", "description": "If true, include recent events for the host."},
"events_limit": {"type": "integer", "minimum": 1, "maximum": 1000, "title": "Events limit", "description": "Number of events to include if requested."},
},
required=["host_id"],
title="GetHostInput",
),
"outputSchema": tool_schema(
"Host payload with optional embedded events.",
{
"host": HOST_SCHEMA,
"events": {"type": "array", "items": EVENT_SCHEMA, "description": "Recent events when include_events=true."},
},
required=["host"],
title="GetHostResult",
),
},
"list_events": {
"title": "List Events",
"description": "List recent scan/discovery events with optional filters.",
"annotations": {"readOnlyHint": True, "destructiveHint": False, "openWorldHint": False},
"inputSchema": tool_schema(
"Filters applied when querying events.",
{
"limit": {"type": "integer", "minimum": 1, "maximum": 1000, "title": "Limit", "description": "Maximum number of events to return."},
"host_id": {"type": "string", "title": "Host filter", "description": "Only include events for this host identifier."},
"type": {"type": "string", "title": "Event type", "description": "Restrict to a specific event type (e.g. scan, discovery)."},
"since": {"type": "string", "title": "Since timestamp", "description": "ISO8601 timestamp used as a lower bound for @timestamp."},
},
title="ListEventsInput",
),
"outputSchema": tool_schema(
"Event search result.",
{
"total": {"type": "integer", "description": "Number of events returned."},
"events": {"type": "array", "items": EVENT_SCHEMA, "description": "Event documents sorted by timestamp."},
},
required=["total", "events"],
title="ListEventsResult",
),
},
"host_events": {
"title": "Host Events",
"description": "List recent events associated with a specific host.",
"annotations": {"readOnlyHint": True, "destructiveHint": False, "openWorldHint": False},
"inputSchema": tool_schema(
"Parameters when retrieving events bound to a single host.",
{
"host_id": {"type": "string", "title": "Host ID", "description": "Host identifier to filter by."},
"limit": {"type": "integer", "minimum": 1, "maximum": 1000, "title": "Limit", "description": "Maximum number of events to return."},
"type": {"type": "string", "title": "Event type", "description": "Restrict to a specific event type (e.g. scan, discovery)."},
"since": {"type": "string", "title": "Since timestamp", "description": "ISO8601 timestamp used as a lower bound for @timestamp."},
},
required=["host_id"],
title="HostEventsInput",
),
"outputSchema": tool_schema(
"Event list scoped to a host.",
{
"total": {"type": "integer", "description": "Number of events returned for the host."},
"events": {"type": "array", "items": EVENT_SCHEMA, "description": "Host-specific event entries."},
},
required=["total", "events"],
title="HostEventsResult",
),
},
}
def resolve_api_key(api_id: str, api_key: str):
if api_id and api_key:
return api_id, api_key
if not api_key:
return None, None
if ":" in api_key:
possible_id, possible_key = api_key.split(":", 1)
return possible_id, possible_key
try:
decoded = base64.b64decode(api_key).decode()
if ":" in decoded:
possible_id, possible_key = decoded.split(":", 1)
return possible_id, possible_key
except Exception:
pass
return None, None
def build_es_request():
headers = {}
auth = None
api_id = os.getenv("ES_API_ID")
api_key = os.getenv("ES_API_KEY")
api_id, api_key = resolve_api_key(api_id, api_key)
if api_id and api_key:
token = base64.b64encode(f"{api_id}:{api_key}".encode()).decode()
headers["Authorization"] = f"ApiKey {token}"
else:
auth = (os.getenv("ES_USER", "elastic"), os.getenv("ES_PASS", "changeme"))
return headers, auth
def normalize_host(doc: Dict) -> Dict:
host = doc.get("host", {})
ports = doc.get("ports", [])
return {
"id": host.get("id"),
"name": host.get("name") or host.get("id"),
"ips": host.get("ips", []),
"macs": host.get("macs", []),
"hostnames": host.get("hostnames", []),
"sources": host.get("sources", []),
"last_seen": host.get("last_seen"),
"notes": host.get("notes"),
"expected_ports": host.get("expected_ports", []),
"ports": [
{
"port": p.get("port"),
"state": p.get("state"),
"service": (p.get("service") or {}).get("name"),
}
for p in ports
],
}
def parse_search_terms(raw_terms: List[str]) -> List[str]:
terms: List[str] = []
for raw in raw_terms:
if not raw:
continue
cleaned = raw.replace(",", " ")
for chunk in cleaned.split():
chunk = chunk.strip()
if chunk:
terms.append(chunk)
return terms
def coerce_string_list(value: Any) -> List[str]:
if value is None:
return []
if isinstance(value, str):
return [value]
if isinstance(value, (list, tuple)):
return [str(item) for item in value if item is not None]
return []
def clamp_int(value: Any, default: int, min_value: int, max_value: int) -> int:
try:
if value is None:
return default
parsed = int(value)
except (TypeError, ValueError):
return default
return max(min_value, min(max_value, parsed))
def coerce_bool(value: Any, default: bool = False) -> bool:
if value is None:
return default
if isinstance(value, bool):
return value
if isinstance(value, str):
return value.lower() in {"1", "true", "yes", "on"}
return default
def build_search_clause(term: str) -> Dict:
wildcard = f"*{term}*"
return {
"bool": {
"should": [
{"wildcard": {"host.name.keyword": {"value": wildcard, "case_insensitive": True}}},
{"wildcard": {"host.hostnames.keyword": {"value": wildcard, "case_insensitive": True}}},
{"wildcard": {"host.id.keyword": {"value": wildcard, "case_insensitive": True}}},
{"wildcard": {"host.ips": {"value": wildcard, "case_insensitive": True}}},
{"wildcard": {"host.macs": {"value": wildcard, "case_insensitive": True}}},
],
"minimum_should_match": 1,
}
}
def fetch_hosts(limit: int = HOST_SEARCH_LIMIT, source: Optional[str] = None, search_terms: Optional[List[str]] = None):
headers, auth = build_es_request()
body = {
"size": limit,
"sort": [{"host.last_seen": {"order": "desc"}}],
}
filters: List[Dict] = []
if source:
filters.append({"term": {"host.sources.keyword": source}})
if search_terms:
should_clauses = [build_search_clause(term) for term in search_terms]
filters.append({"bool": {"should": should_clauses, "minimum_should_match": 1}})
if filters:
body["query"] = {"bool": {"filter": filters}}
resp = requests.get(
f"{ES_URL}/network-hosts/_search",
json=body,
headers=headers,
auth=auth,
verify=ES_VERIFY_SSL,
)
resp.raise_for_status()
return [normalize_host(hit.get("_source", {})) for hit in resp.json()["hits"]["hits"]]
def fetch_host_by_id(host_id: str) -> Optional[Dict]:
headers, auth = build_es_request()
body = {"size": 1, "query": {"term": {"host.id.keyword": host_id}}}
resp = requests.get(
f"{ES_URL}/network-hosts/_search",
json=body,
headers=headers,
auth=auth,
verify=ES_VERIFY_SSL,
)
resp.raise_for_status()
hits = resp.json()["hits"]["hits"]
if not hits:
return None
return normalize_host(hits[0].get("_source", {}))
def fetch_events(host_id: Optional[str] = None, limit: int = DEFAULT_EVENT_LIMIT, event_type: Optional[str] = None, since: Optional[str] = None):
headers, auth = build_es_request()
filters: List[Dict] = []
if host_id:
filters.append({"term": {"host.id.keyword": host_id}})
if event_type:
filters.append({"term": {"event.type.keyword": event_type}})
if since:
filters.append({"range": {"@timestamp": {"gte": since}}})
body: Dict = {
"size": limit,
"sort": [{"@timestamp": {"order": "desc"}}],
}
if filters:
body["query"] = {"bool": {"filter": filters}}
resp = requests.get(
f"{ES_URL}/network-events-*/_search",
json=body,
headers=headers,
auth=auth,
verify=ES_VERIFY_SSL,
)
if resp.status_code == 404:
return []
resp.raise_for_status()
events = []
for hit in resp.json()["hits"]["hits"]:
doc = hit.get("_source", {})
events.append(
{
"id": hit.get("_id"),
"timestamp": doc.get("@timestamp"),
"event": doc.get("event", {}),
"host": doc.get("host", {}),
"observed": doc.get("observed"),
"scan": doc.get("scan"),
"ports": doc.get("ports", []),
"source": doc.get("source"),
}
)
return events
def derive_network_label(ip: str) -> str:
if not ip:
return "unknown"
if ":" in ip:
parts = ip.split(":")
prefix = ":".join(parts[:4])
return f"{prefix}::/64"
octets = ip.split(".")
if len(octets) == 4:
return f"{octets[0]}.{octets[1]}.{octets[2]}.0/24"
return "unknown"
def build_network_map(hosts: List[Dict]):
networks: Dict[str, Dict] = {}
for host in hosts:
seen = set()
for ip in host.get("ips", []):
label = derive_network_label(ip)
if label in seen:
continue
seen.add(label)
entry = networks.setdefault(label, {"cidr": label, "hosts": []})
entry["hosts"].append(
{
"id": host.get("id"),
"name": host.get("name"),
"ips": host.get("ips", []),
"sources": host.get("sources", []),
"last_seen": host.get("last_seen"),
}
)
sorted_networks = sorted(networks.values(), key=lambda n: n["cidr"])
for entry in sorted_networks:
entry["hosts"].sort(key=lambda h: h.get("name") or h.get("id") or "")
return sorted_networks
def bool_arg(value: Optional[str], default: bool = False) -> bool:
if value is None:
return default
return value.lower() in {"1", "true", "yes", "on"}
def build_manifest(base_url: str) -> Dict:
base = base_url.rstrip("/")
tools = []
for tool in REST_TOOLS:
tools.append(
{
"name": tool["name"],
"description": tool["description"],
"method": tool["method"],
"path": tool["path"],
"url": f"{base}{tool['path']}",
}
)
return {
"name": "network-mcp",
"description": "Network discovery source-of-truth backed by Elasticsearch, Nmap, and OPNsense.",
"schema": "1.0",
"tools": tools,
"auth": "env",
}
def tool_result(summary: str, data: Dict[str, Any]):
return summary, data
def handle_tool_list_hosts(arguments: Dict[str, Any]):
limit = clamp_int(arguments.get("limit"), HOST_SEARCH_LIMIT, 1, 5000)
raw_terms = coerce_string_list(arguments.get("terms"))
search_terms = parse_search_terms(raw_terms)
hosts = fetch_hosts(limit=limit, source=arguments.get("source"), search_terms=search_terms or None)
return tool_result(f"Returned {len(hosts)} hosts.", {"hosts": hosts, "total": len(hosts)})
def handle_tool_network_map(arguments: Dict[str, Any]):
limit = clamp_int(arguments.get("limit"), HOST_SEARCH_LIMIT, 1, 5000)
hosts = fetch_hosts(limit=limit)
network_map = build_network_map(hosts)
return tool_result(f"Computed {len(network_map)} networks.", {"networks": network_map, "host_count": len(hosts)})
def handle_tool_get_host(arguments: Dict[str, Any]):
host_id = arguments.get("host_id")
if not host_id:
raise ValueError("host_id is required")
host = fetch_host_by_id(host_id)
if not host:
raise KeyError(f"Host {host_id} not found")
include_events = coerce_bool(arguments.get("include_events"), default=False)
result = {"host": host}
if include_events:
events_limit = clamp_int(arguments.get("events_limit"), DEFAULT_EVENT_LIMIT, 1, 1000)
result["events"] = fetch_events(host_id=host_id, limit=events_limit)
return tool_result(f"Fetched host {host_id}.", result)
def handle_tool_list_events(arguments: Dict[str, Any]):
limit = clamp_int(arguments.get("limit"), DEFAULT_EVENT_LIMIT, 1, 1000)
events = fetch_events(
host_id=arguments.get("host_id"),
limit=limit,
event_type=arguments.get("type"),
since=arguments.get("since"),
)
return tool_result(f"Returned {len(events)} events.", {"events": events, "total": len(events)})
def handle_tool_host_events(arguments: Dict[str, Any]):
host_id = arguments.get("host_id")
if not host_id:
raise ValueError("host_id is required")
limit = clamp_int(arguments.get("limit"), DEFAULT_EVENT_LIMIT, 1, 1000)
events = fetch_events(host_id=host_id, limit=limit, event_type=arguments.get("type"), since=arguments.get("since"))
return tool_result(f"Returned {len(events)} events for {host_id}.", {"events": events, "total": len(events)})
TOOL_HANDLERS = {
"list_hosts": handle_tool_list_hosts,
"network_map": handle_tool_network_map,
"get_host": handle_tool_get_host,
"list_events": handle_tool_list_events,
"host_events": handle_tool_host_events,
}
def list_mcp_tools():
tools = []
for name, meta in MCP_TOOL_DEFINITIONS.items():
tool = {
"name": name,
"description": meta.get("description"),
"inputSchema": meta.get("inputSchema", {"type": "object"}),
}
title = meta.get("title")
if title:
tool["title"] = title
output_schema = meta.get("outputSchema")
if output_schema:
tool["outputSchema"] = output_schema
annotations = meta.get("annotations")
if annotations:
tool["annotations"] = annotations
tools.append(tool)
return tools
def call_tool_by_name(name: str, arguments: Optional[Dict[str, Any]] = None):
if name not in TOOL_HANDLERS:
raise KeyError(f"Unknown tool: {name}")
handler = TOOL_HANDLERS[name]
summary, data = handler(arguments or {})
return summary, data
def list_mcp_resources(base_uri: str = "network://"):
return [
{
"uri": f"{base_uri}hosts",
"name": "hosts",
"title": "Hosts (Snapshot)",
"mimeType": "application/json",
"description": "Snapshot of merged hosts (inventory + opnsense + nmap). Use resources/templates/list for search parameters.",
},
{
"uri": f"{base_uri}map",
"name": "map",
"title": "Network Map (Snapshot)",
"mimeType": "application/json",
"description": "Snapshot of networks grouped by /24 (IPv4) or /64 (IPv6).",
},
{
"uri": f"{base_uri}events",
"name": "events",
"title": "Recent Events (Snapshot)",
"mimeType": "application/json",
"description": "Recent scan/discovery events. Use resources/templates/list for filters (host_id/type/since).",
},
]
def list_mcp_resource_templates(base_uri: str = "network://"):
return [
{
"uriTemplate": f"{base_uri}hosts{{?q,source,limit}}",
"name": "hosts_query",
"title": "Hosts Query",
"mimeType": "application/json",
"description": "Query hosts by q (hostname/IP/MAC/name, case-insensitive), source, and limit. Repeat q to provide multiple terms.",
},
{
"uriTemplate": f"{base_uri}host/{{host_id}}{{?include_events,events_limit}}",
"name": "host_detail",
"title": "Host Detail",
"mimeType": "application/json",
"description": "Fetch a single host by host_id (e.g. mac:aa:bb.. or ip:192.168.5.10). Optionally include events.",
},
{
"uriTemplate": f"{base_uri}events{{?host_id,type,since,limit}}",
"name": "events_query",
"title": "Events Query",
"mimeType": "application/json",
"description": "Query recent events with optional filters host_id, type, since (ISO8601), and limit.",
},
{
"uriTemplate": f"{base_uri}map{{?limit}}",
"name": "map_query",
"title": "Network Map",
"mimeType": "application/json",
"description": "Build a network map from up to limit hosts.",
},
]
def read_mcp_resource(uri: str):
parsed = urlparse(uri)
if parsed.scheme != "network":
raise ValueError(f"Unsupported resource URI scheme: {parsed.scheme}")
netloc = parsed.netloc
query = parse_qs(parsed.query or "")
if netloc == "hosts":
limit = clamp_int((query.get("limit") or [HOST_SEARCH_LIMIT])[0], HOST_SEARCH_LIMIT, 1, 5000)
source = (query.get("source") or [None])[0]
q_terms = query.get("q") or []
search_terms = parse_search_terms(q_terms)
payload = {"hosts": fetch_hosts(limit=limit, source=source, search_terms=search_terms or None)}
payload["total"] = len(payload["hosts"])
return {"contents": [{"uri": uri, "mimeType": "application/json", "text": json.dumps(payload)}]}
if netloc == "map":
limit = clamp_int((query.get("limit") or [HOST_SEARCH_LIMIT])[0], HOST_SEARCH_LIMIT, 1, 5000)
hosts = fetch_hosts(limit=limit)
payload = {"networks": build_network_map(hosts), "host_count": len(hosts)}
return {"contents": [{"uri": uri, "mimeType": "application/json", "text": json.dumps(payload)}]}
if netloc == "events":
limit = clamp_int((query.get("limit") or [DEFAULT_EVENT_LIMIT])[0], DEFAULT_EVENT_LIMIT, 1, 1000)
host_id = (query.get("host_id") or [None])[0]
event_type = (query.get("type") or [None])[0]
since = (query.get("since") or [None])[0]
events = fetch_events(host_id=host_id, limit=limit, event_type=event_type, since=since)
payload = {"events": events, "total": len(events)}
return {"contents": [{"uri": uri, "mimeType": "application/json", "text": json.dumps(payload)}]}
if netloc == "host":
host_id = unquote((parsed.path or "").lstrip("/"))
if not host_id:
raise ValueError("Host resource requires /<host_id> path")
include_events = coerce_bool((query.get("include_events") or [False])[0], default=False)
events_limit = clamp_int((query.get("events_limit") or [DEFAULT_EVENT_LIMIT])[0], DEFAULT_EVENT_LIMIT, 1, 1000)
host = fetch_host_by_id(host_id)
if not host:
raise KeyError(f"Host {host_id} not found")
payload = {"host": host}
if include_events:
payload["events"] = fetch_events(host_id=host_id, limit=events_limit)
return {"contents": [{"uri": uri, "mimeType": "application/json", "text": json.dumps(payload)}]}
raise ValueError(f"Unknown resource URI: {uri}")
def jsonrpc_error(rpc_id: Any, code: int, message: str):
return {
"jsonrpc": "2.0",
"id": rpc_id,
"error": {"code": code, "message": message},
}
def build_initialize_result(protocol_version: Optional[str] = None):
protocol_version = protocol_version or "2025-11-25"
return {
"protocolVersion": protocol_version,
"capabilities": {
"tools": {"listChanged": False},
"resources": {"listChanged": False, "subscribe": False},
},
"serverInfo": {"name": "network-mcp", "version": SERVER_VERSION},
"instructions": "Start with list_hosts (search by hostname/IP/MAC), then use get_host for details and list_events/host_events for timelines; network_map gives a quick /24-/64 overview.",
}
def process_rpc_request(payload: Dict[str, Any]):
if not isinstance(payload, dict):
return jsonrpc_error(None, -32600, "Invalid request")
rpc_id = payload.get("id")
method = payload.get("method")
params = payload.get("params") or {}
is_notification = rpc_id is None
if method == "initialize":
requested = params.get("protocolVersion")
requested_str = str(requested) if requested is not None else None
return {"jsonrpc": "2.0", "id": rpc_id, "result": build_initialize_result(requested_str)}
if method == "ping":
return {"jsonrpc": "2.0", "id": rpc_id, "result": {}}
if method == "tools/list":
result = {"tools": list_mcp_tools(), "nextCursor": None}
return {"jsonrpc": "2.0", "id": rpc_id, "result": result}
if method == "resources/list":
result = {"resources": list_mcp_resources(), "nextCursor": None}
return {"jsonrpc": "2.0", "id": rpc_id, "result": result}
if method == "resources/templates/list":
result = {"resourceTemplates": list_mcp_resource_templates(), "nextCursor": None}
return {"jsonrpc": "2.0", "id": rpc_id, "result": result}
if method == "resources/read":
uri = (params or {}).get("uri")
if not uri:
return jsonrpc_error(rpc_id, -32602, "uri is required")
try:
result = read_mcp_resource(uri)
return {"jsonrpc": "2.0", "id": rpc_id, "result": result}
except ValueError as exc:
return jsonrpc_error(rpc_id, -32602, str(exc))
except KeyError as exc:
message = exc.args[0] if exc.args else str(exc)
return jsonrpc_error(rpc_id, -32004, message)
if method == "notifications/initialized":
# No response for notifications.
return None
if method == "tools/call":
name = params.get("name")
if not name:
if is_notification:
return None
return jsonrpc_error(rpc_id, -32602, "Tool name is required")
arguments = params.get("arguments") or {}
try:
summary, data = call_tool_by_name(name, arguments)
result = {
"content": [{"type": "text", "text": summary}],
"structuredContent": data,
"isError": False,
}
if is_notification:
return None
return {"jsonrpc": "2.0", "id": rpc_id, "result": result}
except ValueError as exc:
if is_notification:
return None
result = {
"content": [{"type": "text", "text": f"Tool argument error: {exc}"}],
"structuredContent": {"error": str(exc)},
"isError": True,
}
return {"jsonrpc": "2.0", "id": rpc_id, "result": result}
except KeyError as exc:
message = exc.args[0] if exc.args else str(exc)
if is_notification:
return None
result = {
"content": [{"type": "text", "text": f"Tool error: {message}"}],
"structuredContent": {"error": message},
"isError": True,
}
return {"jsonrpc": "2.0", "id": rpc_id, "result": result}
except Exception as exc: # pragma: no cover - defensive
if is_notification:
return None
return jsonrpc_error(rpc_id, -32603, f"Internal error: {exc}")
if is_notification:
return None
return jsonrpc_error(rpc_id, -32601, f"Method {method} not found")
def process_rpc_envelope(payload: Any):
if isinstance(payload, list):
responses = []
for entry in payload:
response = process_rpc_request(entry)
if response is not None:
responses.append(response)
return responses
if isinstance(payload, dict):
return process_rpc_request(payload)
return jsonrpc_error(None, -32600, "Invalid request")
@app.route("/api/hosts")
def api_hosts():
limit = min(int(request.args.get("limit", HOST_SEARCH_LIMIT)), 5000)
q_args = request.args.getlist("q")
search_terms = parse_search_terms(q_args)
hosts = fetch_hosts(
limit=limit,
source=request.args.get("source"),
search_terms=search_terms if search_terms else None,
)
return jsonify({"hosts": hosts, "total": len(hosts)})
@app.route("/api/hosts/<path:host_id>")
def api_host_detail(host_id: str):
host = fetch_host_by_id(host_id)
if not host:
abort(404, description=f"Host {host_id} not found")
include_events = bool_arg(request.args.get("include_events"), default=False)
result = {"host": host}
if include_events:
limit = min(int(request.args.get("events_limit", DEFAULT_EVENT_LIMIT)), 1000)
result["events"] = fetch_events(host_id=host_id, limit=limit)
return jsonify(result)
@app.route("/api/events")
def api_events():
limit = min(int(request.args.get("limit", DEFAULT_EVENT_LIMIT)), 1000)
events = fetch_events(
host_id=request.args.get("host_id"),
limit=limit,
event_type=request.args.get("type"),
since=request.args.get("since"),
)
return jsonify({"events": events, "total": len(events)})
@app.route("/api/hosts/<path:host_id>/events")
def api_host_events(host_id: str):
limit = min(int(request.args.get("limit", DEFAULT_EVENT_LIMIT)), 1000)
events = fetch_events(host_id=host_id, limit=limit, event_type=request.args.get("type"), since=request.args.get("since"))
return jsonify({"events": events, "total": len(events)})
@app.route("/api/map")
def api_map():
limit = min(int(request.args.get("limit", HOST_SEARCH_LIMIT)), 5000)
hosts = fetch_hosts(limit=limit)
network_map = build_network_map(hosts)
return jsonify({"networks": network_map, "host_count": len(hosts)})
@app.route("/.well-known/mcp.json", methods=["GET", "POST", "OPTIONS"])
@app.route("/api/mcp", methods=["GET", "POST", "OPTIONS"])
def api_manifest():
if request.method == "OPTIONS":
return ("", 204, {"Allow": "GET,POST,OPTIONS"})
if request.method == "POST":
payload = request.get_json(silent=True)
if payload is None:
return jsonify(jsonrpc_error(None, -32700, "Invalid JSON")), 400
rpc_response = process_rpc_envelope(payload)
if rpc_response is None or (isinstance(rpc_response, list) and not rpc_response):
return ("", 204)
return jsonify(rpc_response)
manifest = build_manifest(request.url_root.rstrip("/"))
return jsonify(manifest)
@app.route("/")
def index():
hosts = fetch_hosts()
total = len(hosts)
with_ports = sum(1 for h in hosts if h["ports"])
inventory_hosts = sum(1 for h in hosts if "inventory" in h["sources"])
return render_template(
"index.html",
hosts=hosts,
total=total,
with_ports=with_ports,
inventory_hosts=inventory_hosts,
es_url=ES_URL,
)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=int(os.getenv("FRONTEND_PORT", "5001")))

View File

@ -0,0 +1,4 @@
Flask==2.2.5
requests==2.31.0
python-dotenv==0.21.1
gunicorn==21.2.0

View File

@ -0,0 +1,206 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Network MCP Hosts</title>
<style>
body {
font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
background: #0f172a;
color: #e2e8f0;
margin: 0;
padding: 0 1.5rem 2rem;
}
header {
padding: 2rem 0 1rem;
}
h1 {
margin: 0;
}
.metrics {
display: flex;
gap: 1rem;
flex-wrap: wrap;
margin: 1rem 0 2rem;
}
.metric-card {
background: #1e293b;
padding: 1rem 1.5rem;
border-radius: 0.75rem;
border: 1px solid #334155;
min-width: 160px;
}
.metric-card h3 {
margin: 0;
font-size: 0.9rem;
color: #94a3b8;
}
.metric-card p {
margin: 0.4rem 0 0;
font-size: 1.5rem;
font-weight: bold;
color: #f1f5f9;
}
.hosts-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(320px, 1fr));
gap: 1rem;
}
.host-card {
background: #1e293b;
border-radius: 0.75rem;
border: 1px solid #334155;
padding: 1rem;
display: flex;
flex-direction: column;
gap: 0.6rem;
}
.host-card h2 {
margin: 0;
font-size: 1.1rem;
color: #f8fafc;
display: flex;
justify-content: space-between;
align-items: center;
}
.sources span {
display: inline-block;
font-size: 0.7rem;
padding: 0.15rem 0.4rem;
margin-right: 0.3rem;
border-radius: 0.4rem;
background: #0f766e;
}
.hosts-card ul {
margin: 0;
padding-left: 1.2rem;
}
.port-list {
display: flex;
flex-wrap: wrap;
gap: 0.4rem;
}
.port-chip {
background: #0f172a;
border: 1px solid #334155;
border-radius: 9999px;
padding: 0.2rem 0.6rem;
font-size: 0.8rem;
}
.notes {
font-size: 0.9rem;
color: #cbd5f5;
}
.source-tag-inventory {
background: #a855f7;
}
.source-tag-opnsense {
background: #0284c7;
}
.source-tag-nmap {
background: #ea580c;
}
.source-tag-discovery {
background: #0ea5e9;
}
.section-title {
font-size: 0.9rem;
color: #94a3b8;
margin: 0;
}
.ips, .notes, .last-seen {
font-size: 0.9rem;
}
</style>
</head>
<body>
<header>
<h1>Network MCP Overview</h1>
<p class="last-seen">Elasticsearch: {{ es_url }}</p>
</header>
<section class="metrics">
<div class="metric-card">
<h3>Total Hosts</h3>
<p>{{ total }}</p>
</div>
<div class="metric-card">
<h3>With Port Data</h3>
<p>{{ with_ports }}</p>
</div>
<div class="metric-card">
<h3>Inventory Entries</h3>
<p>{{ inventory_hosts }}</p>
</div>
</section>
<section class="hosts-grid">
{% for host in hosts %}
<article class="host-card">
<h2>{{ host.name }}
{% if host.notes %}
<span title="Inventory notes available">📝</span>
{% endif %}
</h2>
<div class="sources">
{% for source in host.sources %}
{% set tag_class = "" %}
{% if source == "inventory" %}
{% set tag_class = "source-tag-inventory" %}
{% elif source.startswith("opnsense") %}
{% set tag_class = "source-tag-opnsense" %}
{% elif source == "nmap" %}
{% set tag_class = "source-tag-nmap" %}
{% elif source == "nmap-discovery" %}
{% set tag_class = "source-tag-discovery" %}
{% endif %}
<span class="{{ tag_class }}">{{ source }}</span>
{% endfor %}
</div>
<div class="ips">
<strong>IPs:</strong> {{ host.ips|join(", ") if host.ips else "—" }}
</div>
{% if host.macs %}
<div class="ips">
<strong>MACs:</strong> {{ host.macs|join(", ") }}
</div>
{% endif %}
{% if host.hostnames %}
<div class="ips">
<strong>Hostnames:</strong> {{ host.hostnames|join(", ") }}
</div>
{% endif %}
<div class="last-seen">
<strong>Last seen:</strong> {{ host.last_seen or "unknown" }}
</div>
{% if host.notes %}
<div class="notes">
<strong>Notes:</strong> {{ host.notes }}
</div>
{% endif %}
{% if host.expected_ports %}
<div>
<p class="section-title">Expected Ports</p>
<div class="port-list">
{% for port in host.expected_ports %}
<span class="port-chip">{{ port }}</span>
{% endfor %}
</div>
</div>
{% endif %}
{% if host.ports %}
<div>
<p class="section-title">Observed Ports</p>
<div class="port-list">
{% for port in host.ports %}
<span class="port-chip">{{ port.port }} {{ port.service or "" }}</span>
{% endfor %}
</div>
</div>
{% endif %}
</article>
{% endfor %}
</section>
</body>
</html>

View File

@ -0,0 +1,2 @@
"""Unit tests for the Network MCP frontend."""

View File

@ -0,0 +1,203 @@
import json
import unittest
from unittest.mock import patch
class FakeResponse:
def __init__(self, payload, status_code=200):
self._payload = payload
self.status_code = status_code
def json(self):
return self._payload
def raise_for_status(self):
if self.status_code >= 400:
raise RuntimeError(f"HTTP {self.status_code}")
def _wildcard_match(pattern: str, value: str, case_insensitive: bool) -> bool:
if value is None:
return False
if case_insensitive:
pattern = pattern.lower()
value = value.lower()
if pattern.startswith("*") and pattern.endswith("*"):
needle = pattern.strip("*")
return needle in value
return pattern == value
def _extract_wildcard_clause(field_clause):
# Supports either {"field": "*term*"} or {"field": {"value":"*term*", "case_insensitive":true}}
if not isinstance(field_clause, dict):
return None, None, None
if len(field_clause) != 1:
return None, None, None
field, value = next(iter(field_clause.items()))
if isinstance(value, str):
return field, value, False
if isinstance(value, dict):
return field, value.get("value"), bool(value.get("case_insensitive"))
return None, None, None
def _filter_hosts_by_query(host_docs, query):
if not query:
return host_docs
bool_query = query.get("bool") if isinstance(query, dict) else None
if not bool_query:
return host_docs
filters = bool_query.get("filter") or []
if not filters:
return host_docs
matched = host_docs
for f in filters:
if "term" in f and "host.sources.keyword" in f["term"]:
src = f["term"]["host.sources.keyword"]
matched = [h for h in matched if src in (h.get("host", {}).get("sources") or [])]
continue
if "bool" in f and "should" in f["bool"]:
shoulds = f["bool"]["should"]
def matches_any(host_doc):
host = host_doc.get("host", {})
haystacks = {
"host.name.keyword": [host.get("name")],
"host.hostnames.keyword": host.get("hostnames") or [],
"host.id.keyword": [host.get("id")],
"host.ips": host.get("ips") or [],
"host.macs": host.get("macs") or [],
}
for clause in shoulds:
if "bool" in clause and "should" in clause["bool"]:
# nested should from multiple search terms
nested_shoulds = clause["bool"]["should"]
for nested in nested_shoulds:
if "wildcard" not in nested:
continue
field, value, ci = _extract_wildcard_clause(nested["wildcard"])
if not field or value is None:
continue
for candidate in haystacks.get(field, []):
if _wildcard_match(value, str(candidate or ""), ci):
return True
if "wildcard" in clause:
field, value, ci = _extract_wildcard_clause(clause["wildcard"])
if not field or value is None:
continue
for candidate in haystacks.get(field, []):
if _wildcard_match(value, str(candidate or ""), ci):
return True
return False
matched = [h for h in matched if matches_any(h)]
continue
return matched
class TestNetworkMCP(unittest.TestCase):
def setUp(self):
from frontend import app as app_module
self.app_module = app_module
self.client = app_module.app.test_client()
self.host_docs = [
{
"host": {
"id": "mac:dc:a6:32:67:55:dc",
"name": "SEELE",
"hostnames": ["SEELE"],
"ips": ["192.168.5.208"],
"macs": ["dc:a6:32:67:55:dc"],
"sources": ["opnsense-dhcp", "opnsense-arp"],
"last_seen": "2025-12-14T16:27:15.427091+00:00",
},
"ports": [{"port": 22, "state": "open", "service": {"name": "ssh"}}],
},
{
"host": {
"id": "mac:aa:bb:cc:dd:ee:ff",
"name": "core",
"hostnames": ["core.localdomain"],
"ips": ["192.168.5.34"],
"macs": ["aa:bb:cc:dd:ee:ff"],
"sources": ["inventory", "opnsense-arp"],
"last_seen": "2025-12-14T16:27:15.427091+00:00",
"notes": "Production Docker host",
},
"ports": [{"port": 443, "state": "open", "service": {"name": "https"}}],
},
]
def fake_requests_get(self, url, json=None, headers=None, auth=None, verify=None):
if url.endswith("/network-hosts/_search"):
query = (json or {}).get("query")
hits = _filter_hosts_by_query(self.host_docs, query)
return FakeResponse({"hits": {"hits": [{"_source": h} for h in hits]}})
if "/network-events-" in url and url.endswith("/_search"):
return FakeResponse({"hits": {"hits": []}})
return FakeResponse({}, status_code=404)
def test_rest_search_hostname_case_insensitive(self):
with patch.object(self.app_module.requests, "get", side_effect=self.fake_requests_get):
resp = self.client.get("/api/hosts?q=seele&limit=50")
self.assertEqual(resp.status_code, 200)
payload = resp.get_json()
self.assertEqual(payload["total"], 1)
self.assertEqual(payload["hosts"][0]["name"], "SEELE")
def test_rest_search_by_ip(self):
with patch.object(self.app_module.requests, "get", side_effect=self.fake_requests_get):
resp = self.client.get("/api/hosts?q=192.168.5.208")
payload = resp.get_json()
self.assertEqual(payload["total"], 1)
self.assertEqual(payload["hosts"][0]["id"], "mac:dc:a6:32:67:55:dc")
def test_rest_search_by_mac(self):
with patch.object(self.app_module.requests, "get", side_effect=self.fake_requests_get):
resp = self.client.get("/api/hosts?q=dc:a6:32:67:55:dc")
payload = resp.get_json()
self.assertEqual(payload["total"], 1)
self.assertEqual(payload["hosts"][0]["name"], "SEELE")
def test_mcp_tools_call_search_terms(self):
with patch.object(self.app_module.requests, "get", side_effect=self.fake_requests_get):
body = {
"jsonrpc": "2.0",
"id": 1,
"method": "tools/call",
"params": {"name": "list_hosts", "arguments": {"terms": ["seele"], "limit": 10}},
}
resp = self.client.post("/.well-known/mcp.json", data=json.dumps(body), content_type="application/json")
self.assertEqual(resp.status_code, 200)
payload = resp.get_json()
self.assertFalse(payload["result"]["isError"])
hosts = payload["result"]["structuredContent"]["hosts"]
self.assertEqual(len(hosts), 1)
self.assertEqual(hosts[0]["name"], "SEELE")
def test_mcp_resources_read_hosts_query(self):
with patch.object(self.app_module.requests, "get", side_effect=self.fake_requests_get):
body = {"jsonrpc": "2.0", "id": 2, "method": "resources/read", "params": {"uri": "network://hosts?q=seele&limit=5"}}
resp = self.client.post("/.well-known/mcp.json", data=json.dumps(body), content_type="application/json")
self.assertEqual(resp.status_code, 200)
result = resp.get_json()["result"]
self.assertEqual(result["contents"][0]["mimeType"], "application/json")
data = json.loads(result["contents"][0]["text"])
self.assertEqual(data["total"], 1)
self.assertEqual(data["hosts"][0]["name"], "SEELE")
def test_mcp_notifications_initialized_no_response(self):
with patch.object(self.app_module.requests, "get", side_effect=self.fake_requests_get):
body = {"jsonrpc": "2.0", "method": "notifications/initialized", "params": {}}
resp = self.client.post("/.well-known/mcp.json", data=json.dumps(body), content_type="application/json")
self.assertEqual(resp.status_code, 204)
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,24 @@
{
"policy": {
"phases": {
"hot": {
"min_age": "0ms",
"actions": {}
},
"warm": {
"min_age": "7d",
"actions": {
"forcemerge": {
"max_num_segments": 1
}
}
},
"delete": {
"min_age": "90d",
"actions": {
"delete": {}
}
}
}
}
}

View File

@ -0,0 +1,39 @@
{
"index_patterns": ["network-events-*"],
"template": {
"settings": {
"index.lifecycle.name": "network-events-ilm"
},
"mappings": {
"properties": {
"@timestamp": { "type": "date" },
"host": {
"properties": {
"ip": { "type": "ip" },
"ips": { "type": "ip" },
"mac": { "type": "keyword" },
"macs": { "type": "keyword" },
"id": { "type": "keyword" },
"name": { "type": "keyword" },
"hostname": { "type": "keyword" },
"hostnames": { "type": "keyword" }
}
},
"ports": {
"properties": {
"port": { "type": "integer" },
"proto": { "type": "keyword" },
"state": { "type": "keyword" },
"service": {
"properties": {
"name": { "type": "keyword" },
"product": { "type": "keyword" },
"version": { "type": "keyword" }
}
}
}
}
}
}
}
}

View File

@ -0,0 +1,40 @@
{
"index_patterns": ["network-hosts"],
"template": {
"mappings": {
"properties": {
"host": {
"properties": {
"id": { "type": "keyword" },
"name": { "type": "keyword" },
"fqdn": { "type": "keyword" },
"ips": { "type": "ip" },
"macs": { "type": "keyword" },
"first_seen": { "type": "date" },
"last_seen": { "type": "date" },
"last_state_change": { "type": "date" },
"state": { "type": "keyword" },
"role": { "type": "keyword" },
"tags": { "type": "keyword" },
"notes": { "type": "text" }
}
},
"ports": {
"properties": {
"port": { "type": "integer" },
"proto": { "type": "keyword" },
"state": { "type": "keyword" },
"first_seen": { "type": "date" },
"last_seen": { "type": "date" },
"service": {
"properties": {
"name": { "type": "keyword" },
"product": { "type": "keyword" }
}
}
}
}
}
}
}
}

View File

@ -0,0 +1,280 @@
inventory_targets:
- name: Blackmoon
hostname: blackmoon.localdomain
ip: 192.168.5.1
notes: Core OpnSense gateway; ping only
- name: Supermicro-BMC
hostname: 192.168.5.30
ip: 192.168.5.30
ports:
- 22
- 80
notes: "Supermicro IPMI (ATEN login portal on 80\u2192443) for rack chassis"
- name: Jet-Alone
hostname: jet-alone.localdomain
ip: 192.168.5.31
ports:
- 22
notes: GPU/LLM server
- name: Wille
hostname: wille.localdomain
ip: 192.168.5.33
ports:
- 22
- 80
- 443
notes: TrueNAS SCALE primary storage (iXsystems /ui interface)
- name: Core
hostname: core.localdomain
ip: 192.168.5.34
ports:
- 22
- 80
- 443
notes: Production Docker swarm (Traefik, Gitea, Authentik, Immich, etc.)
- name: NERV-III
hostname: NERV-III
ip: 192.168.5.35
ports:
- 22
notes: 'Standalone Proxmox host (Fedora CoreOS VMs: container-dev VM110 plus Ramiel
containers)'
- name: TP-Link-AP-1
hostname: 192.168.5.36
ip: 192.168.5.36
ports:
- 22
- 80
notes: TP-Link EAP/Omada AP web UI (login page on HTTP)
- name: TP-Link-AP-2
hostname: 192.168.5.39
ip: 192.168.5.39
ports:
- 22
- 80
notes: TP-Link EAP/Omada AP web UI (login page on HTTP)
- name: Subspace-Mote-1
hostname: subspace-mote-1.localdomain
ip: 192.168.5.41
ports:
- 22
notes: SBC cluster member
- name: BirdNET-GO
hostname: 192.168.5.71
ip: 192.168.5.71
ports:
- 22
- 8080
notes: Armbian (rz3w-02) running birdnet-go container (port 8080)
- name: rz3w-02
hostname: rz3w-02.localdomain
ports:
- 22
notes: Subspace node with metrics/logging
- name: Arael
hostname: arael.localdomain
ip: 192.168.5.44
ports:
- 22
notes: Debian host, purpose TBD
- name: Synology-NAS
hostname: 192.168.5.45
ip: 192.168.5.45
ports:
- 22
- 80
- 443
- 5000
notes: Synology DSM primary NAS (HTTP redirect to DSM on 5000/5001)
- name: Docker-Public
hostname: docker-public.localdomain
ip: 192.168.5.46
ports:
- 22
notes: Traefik/Docker public host (Traefik on 8080; hosts Invidious, Matomo, FreshRSS,
etc.)
- name: Frigate
hostname: frigate.localdomain
ip: 192.168.5.47
ports:
- 22
- 5000
notes: NVR VM
- name: HomeAssistant
hostname: homeassistant.localdomain
ip: 192.168.5.48
ports:
- 22
- 8123
notes: Home automation host
- name: Casper
hostname: casper.localdomain
ip: 192.168.5.50
ports:
- 22
notes: Logging/Metrics VM
- name: Ramiel
hostname: ramiel.localdomain
ip: 192.168.5.51
ports:
- 22
- 6443
notes: Cluster node
- name: Ramiel-III
hostname: ramiel-iii.localdomain
ip: 192.168.5.230
ports:
- 22
notes: Additional Ramiel host
- name: NERV
hostname: nerv.localdomain
ip: 192.168.5.203
ports:
- 22
- 8006
notes: Proxmox host
- name: Magi2
hostname: magi2.localdomain
ip: 192.168.5.202
ports:
- 22
- 8006
notes: Proxmox host (JSON listed as Magi)
- name: JHCI
hostname: jhci.localdomain
ip: 192.168.5.201
ports:
- 22
- 8006
notes: Proxmox host
- name: Balthasar
hostname: balthasar.localdomain
ip: 192.168.5.237
ports:
- 22
- 80
notes: Technitium DNS server (hosts DoH UI)
- name: Unit-00
hostname: unit-00.localdomain
ip: 192.168.5.222
ports:
- 22
notes: Client that connects to docker-dev
- name: TrueNAS-Backup
hostname: ARKII.localdomain
ip: 192.168.5.32
ports:
- 22
- 80
- 443
notes: "TrueNAS SCALE backup NAS (ARKII chassis) \u2013 HTTPS /ui, SSH pending credentials"
- name: Mokerlink-POE
hostname: 192.168.5.226
ip: 192.168.5.226
ports:
- 80
notes: Mokerlink POE-2G08110GSM switch (web login only)
- name: EtherNetIP-Controller
hostname: 192.168.5.17
ip: 192.168.5.17
ports:
- 2222
notes: CNC/3D printer controller interface
- name: P1S-Printer
hostname: P1S
ip: 192.168.5.42
notes: Bambu Lab P1S (LLMNR responder only; no TCP services)
- name: Container-Dev
hostname: container-dev
ip: 192.168.5.236
ports:
- 22
- 5355
notes: Fedora CoreOS VM (NERV-III VM110) for container dev; only key-based SSH +
LLMNR
- name: VPS-TransparentProxy-19222713430
hostname: 192.227.134.30
ip: 192.227.134.30
ports:
- 22
- 80
- 443
notes: Transparent HAProxy node (Debian 10) running haproxy + zerotier-one + telegraf
- name: VPS-TransparentProxy-1071722798
hostname: 107.172.27.98
ip: 107.172.27.98
ports:
- 22
- 80
- 443
notes: Transparent HAProxy node (Debian 12) running haproxy + tailscale + zerotier-one
+ telegraf/filebeat
- name: VPS-TransparentProxy-10717425061
hostname: 107.174.250.61
ip: 107.174.250.61
ports:
- 22
- 80
- 443
notes: Transparent HAProxy (Debian 12) with haproxy, docker/containerd, iperf3,
filebeat, tailscale, zerotier
- name: VPS-Headscale
hostname: 198.46.218.8
ip: 198.46.218.8
ports:
- 22
- 80
- 443
notes: Headscale coordination server (Ubuntu 20.04) running headscale, HAProxy,
Uptime Kuma, tailscale, zerotier
- name: VPS-MailInABox
hostname: 198.23.146.170
ip: 198.23.146.170
ports:
- 22
- 80
- 443
notes: mail.uplink.tel Mail-in-a-Box (Postfix, Dovecot, BIND, NSD, nginx, SpamPD,
Filebeat, Tailscale)
- name: VPS-FriendServer
hostname: 172.245.88.186
ip: 172.245.88.186
ports:
- 22
- 80
- 443
notes: '"Friend server managed" (Debian 12) hosting Apache, InspIRCd, MariaDB, Gitea
(docker), Tor, Tailscale'
- name: VPS-Meow
hostname: 107.174.64.22
ip: 107.174.64.22
ports:
- 22
- 80
- 443
notes: '"Meow" VPS (Debian 12) running Docker stack: traefik, wg-easy, wordpress/mysql,
nginx, filebrowser'
- name: VPS-Lukes
hostname: 23.94.206.75
ip: 23.94.206.75
ports:
- 22
- 80
- 443
notes: "Luke's VPS (Debian 12) \u2013 running Docker (Traefik, Caddy, GoatCounter,\
\ TTRSS stack, Radicale, filebrowser, ssh-tunnel)"
- name: VPS-Tailscale-Edge
hostname: 100.64.0.14
ip: 100.64.0.14
ports:
- 22
- 80
- 443
notes: 'Tailscale interface into mail.uplink.tel (Mail-in-a-Box stack: Postfix/Dovecot/BIND/nginx)'
- name: BirdNET-Pi
hostname: orangepizero2.localdomain
ip: 192.168.5.18
ports:
- 22
- 80
notes: Orangepi Zero2 running BirdNET-Pi (Caddy on port 80)

View File

@ -0,0 +1,77 @@
import os
import sys
import json
import requests
import urllib3
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if REPO_ROOT not in sys.path:
sys.path.insert(0, REPO_ROOT)
from collectors.common.es_auth import resolve_api_key, build_api_key_header
# Suppress insecure request warnings
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def load_json(path):
with open(path, 'r') as f:
return json.load(f)
def main():
es_url = os.getenv("ES_URL", "http://localhost:9200").rstrip('/')
env_api_id = os.getenv("ES_API_ID")
env_api_key = os.getenv("ES_API_KEY")
es_api_id, es_api_key = resolve_api_key(env_api_id, env_api_key)
es_user = os.getenv("ES_USER", "elastic")
es_pass = os.getenv("ES_PASS", "changeme")
verify_ssl = os.getenv("ES_VERIFY_SSL", "true").lower() == "true"
auth_args = {}
if es_api_id and es_api_key:
auth_args["headers"] = {"Authorization": build_api_key_header(es_api_id, es_api_key)}
print("Using Elasticsearch API key authentication for bootstrap.")
else:
auth_args["auth"] = (es_user, es_pass)
print("Using Elasticsearch basic authentication for bootstrap.")
print(f"Bootstrapping Elastic at {es_url}...")
def put(endpoint, data):
url = f"{es_url}{endpoint}"
print(f"PUT {url}")
try:
resp = requests.put(url, json=data, verify=verify_ssl, **auth_args)
print(f"Response: {resp.status_code} {resp.text}")
resp.raise_for_status()
except Exception as e:
print(f"Error: {e}")
# Don't exit, try next
# 1. ILM Policy
ilm_path = "ilm/network-events-ilm.json"
if os.path.exists(ilm_path):
data = load_json(ilm_path)
put("/_ilm/policy/network-events-ilm", data)
else:
print(f"Missing {ilm_path}")
# 2. Network Events Template
tpl_path = "ilm/network-events-template.json"
if os.path.exists(tpl_path):
data = load_json(tpl_path)
put("/_index_template/network-events", data)
else:
print(f"Missing {tpl_path}")
# 3. Network Hosts Template
tpl_path = "ilm/network-hosts-template.json"
if os.path.exists(tpl_path):
data = load_json(tpl_path)
put("/_index_template/network-hosts", data)
else:
print(f"Missing {tpl_path}")
print("Bootstrap complete.")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,9 @@
{
"mac:aa:bb:cc:dd:ee:ff": {
"role": "router",
"owner": "admin",
"location": "server-room",
"tags": ["critical", "gateway"],
"notes": "Main gateway"
}
}

View File

@ -0,0 +1,20 @@
---
services:
obsidian:
image: lscr.io/linuxserver/obsidian:latest
container_name: obsidian
security_opt:
- seccomp:unconfined #optional
environment:
- PUID=1000
- PGID=1000
- TZ=Etc/UTC
volumes:
- ./config:/config
ports:
- 3002:3000
- 3003:3001
devices:
- /dev/dri:/dev/dri #optional
shm_size: "1gb"
restart: unless-stopped

View File

@ -0,0 +1,15 @@
services:
snowflake-proxy:
network_mode: host
image: containers.torproject.org/tpo/anti-censorship/pluggable-transports/snowflake:latest
container_name: snowflake-proxy
restart: unless-stopped
# For a full list of Snowflake Proxy CLI parameters see
# https://gitlab.torproject.org/tpo/anti-censorship/pluggable-transports/snowflake/-/tree/main/proxy?ref_type=heads#running-a-standalone-snowflake-proxy
#command: [ "-ephemeral-ports-range", "30000:60000" ]
watchtower:
image: containrrr/watchtower
container_name: watchtower
volumes:
- /var/run/docker.sock:/var/run/docker.sock
command: snowflake-proxy

View File

@ -0,0 +1,8 @@
POSTGRES_USER=szuru
POSTGRES_PASSWORD=change_me
BUILD_INFO=local-dev
PORT=8080
THREADS=4
BASE_URL=/
MOUNT_DATA=./volumes/data
MOUNT_SQL=./volumes/postgres

View File

@ -0,0 +1,46 @@
## Example Docker Compose configuration
##
## Use this as a template to set up docker compose, or as guide to set up other
## orchestration services
services:
server:
image: szurubooru/server:latest
depends_on:
- sql
environment:
## These should be the names of the dependent containers listed below,
## or FQDNs/IP addresses if these services are running outside of Docker
POSTGRES_HOST: sql
## Credentials for database:
POSTGRES_USER:
POSTGRES_PASSWORD:
## Commented Values are Default:
#POSTGRES_DB: defaults to same as POSTGRES_USER
#POSTGRES_PORT: 5432
#LOG_SQL: 0 (1 for verbose SQL logs)
THREADS:
volumes:
- "${MOUNT_DATA}:/data"
- "./server/config.yaml:/opt/app/config.yaml"
client:
image: szurubooru/client:latest
depends_on:
- server
environment:
BACKEND_HOST: server
BASE_URL:
volumes:
- "${MOUNT_DATA}:/data:ro"
ports:
- "${PORT}:80"
sql:
image: postgres:11-alpine
restart: unless-stopped
environment:
POSTGRES_USER:
POSTGRES_PASSWORD:
volumes:
- "${MOUNT_SQL}:/var/lib/postgresql/data"

View File

@ -0,0 +1,3 @@
name: Hyrax Hub
domain: http://localhost:8080
secret: "CHANGE_ME"