Deploy all stacks on dev

Add dev stacks
2025-12-31 20:12:13 -05:00 · 2025-12-31 20:11:44 -05:00
50 changed files with 4955 additions and 2 deletions
--- a/.gitea/workflows/deploy.yml
+++ b/.gitea/workflows/deploy.yml
@ -22,9 +22,14 @@ jobs:
      - name: Find and deploy changed stacks
        env:
          DOMAIN: ${{ secrets.DOMAIN }}
+          DEPLOY_ALL: "1"
        run: |
-          # Find changed stacks
-          STACKS=$(git diff --name-only HEAD~1 HEAD 2>/dev/null | grep '^stacks/' | cut -d'/' -f2 | sort -u || echo "")
+          if [ "$DEPLOY_ALL" = "1" ]; then
+            STACKS=$(ls stacks/)
+          else
+            # Find changed stacks
+            STACKS=$(git diff --name-only HEAD~1 HEAD 2>/dev/null | grep '^stacks/' | cut -d'/' -f2 | sort -u || echo "")
+          fi

          if [ -z "$STACKS" ]; then
            echo "No stacks changed, deploying all..."
--- a/stacks/meshmon/.env.template
+++ b/stacks/meshmon/.env.template
@ -0,0 +1,8 @@
+MESHTASTIC_NODE_IP=192.168.5.242
+ALLOWED_ORIGINS=http://docker-dev:8383,https://meshmon.ghost.tel
+RATE_LIMIT_API=20000
+NODE_ENV=production
+SESSION_SECRET=change-me
+TRUST_PROXY=true
+COOKIE_SECURE=true
+DISABLE_ANONYMOUS=true
--- a/stacks/meshmon/docker-compose.yml
+++ b/stacks/meshmon/docker-compose.yml
@ -0,0 +1,14 @@
+services:
+  meshmonitor:
+    image: ghcr.io/yeraze/meshmonitor:latest
+    container_name: meshmonitor
+    ports:
+      - "8383:3001"
+    restart: unless-stopped
+    volumes:
+      - meshmonitor-data:/data
+    env_file:
+      - .env
+volumes:
+  meshmonitor-data:
+    driver: local
--- a/stacks/meshtastic-web/docker-compose.yml
+++ b/stacks/meshtastic-web/docker-compose.yml
@ -0,0 +1,7 @@
+services:
+  meshtastic-web:
+    image: ghcr.io/meshtastic/web:latest
+    container_name: meshtastic-web
+    restart: unless-stopped
+    ports:
+      - "8585:8080"
--- a/stacks/mllogwatcher/.dockerignore
+++ b/stacks/mllogwatcher/.dockerignore
@ -0,0 +1,7 @@
+.venv
+__pycache__/
+*.pyc
+.git
+.gitignore
+.env
+tmp/
--- a/stacks/mllogwatcher/.env.example
+++ b/stacks/mllogwatcher/.env.example
@ -0,0 +1,14 @@
+OPENROUTER_API_KEY=
+OPENROUTER_MODEL=openai/gpt-5.2-codex-max
+TRIAGE_ENABLE_COMMANDS=1
+TRIAGE_COMMAND_RUNNER=local
+TRIAGE_VERBOSE_LOGS=1
+TRIAGE_EMAIL_ENABLED=1
+TRIAGE_EMAIL_FROM=alertai@example.com
+TRIAGE_EMAIL_TO=admin@example.com
+TRIAGE_SMTP_HOST=smtp.example.com
+TRIAGE_SMTP_PORT=465
+TRIAGE_SMTP_USER=alertai@example.com
+TRIAGE_SMTP_PASSWORD=
+TRIAGE_SMTP_SSL=1
+TRIAGE_SMTP_STARTTLS=0
--- a/stacks/mllogwatcher/.env.template
+++ b/stacks/mllogwatcher/.env.template
@ -0,0 +1,14 @@
+OPENROUTER_API_KEY=
+OPENROUTER_MODEL=openai/gpt-5.2-codex-max
+TRIAGE_ENABLE_COMMANDS=1
+TRIAGE_COMMAND_RUNNER=local
+TRIAGE_VERBOSE_LOGS=1
+TRIAGE_EMAIL_ENABLED=1
+TRIAGE_EMAIL_FROM=alertai@example.com
+TRIAGE_EMAIL_TO=admin@example.com
+TRIAGE_SMTP_HOST=smtp.example.com
+TRIAGE_SMTP_PORT=465
+TRIAGE_SMTP_USER=alertai@example.com
+TRIAGE_SMTP_PASSWORD=
+TRIAGE_SMTP_SSL=1
+TRIAGE_SMTP_STARTTLS=0
--- a/stacks/mllogwatcher/Dockerfile
+++ b/stacks/mllogwatcher/Dockerfile
@ -0,0 +1,20 @@
+FROM python:3.11-slim
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1
+
+WORKDIR /var/core/mlLogWatcher
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends openssh-client && \
+    rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY alert_runbook.yaml ./alert_runbook.yaml
+COPY scripts ./scripts
+
+EXPOSE 8081
+
+CMD ["uvicorn", "scripts.grafana_alert_webhook:app", "--host", "0.0.0.0", "--port", "8081"]
--- a/stacks/mllogwatcher/README.md
+++ b/stacks/mllogwatcher/README.md
@ -0,0 +1,120 @@
+# ML Log Watcher Utilities
+
+This repository now contains two automation entry points that work together to
+triage Elasticsearch logs and Grafana alerts with the help of OpenRouter-hosted
+language models.
+
+## 1. `scripts/log_monitor.py`
+
+Existing script that queries Elasticsearch indices, pulls a recent window of
+logs, and asks an LLM for anomaly highlights. Run it ad-hoc or schedule via
+cron/systemd.
+
+```
+ELASTIC_HOST=https://casper.localdomain:9200 \
+ELASTIC_API_KEY=... \
+OPENROUTER_API_KEY=... \
+python3 scripts/log_monitor.py --index 'log*' --minutes 30
+```
+
+## 2. `scripts/grafana_alert_webhook.py`
+
+A FastAPI web server that accepts Grafana alert webhooks, finds the matching
+entry in `alert_runbook.yaml`, renders the LLM prompt, and posts it to
+OpenRouter. The response text is returned to Grafana (or any caller) immediately
+so automation can fan out to chat, ticketing, etc.
+
+### Dependencies
+
+```
+python3 -m venv .venv
+.venv/bin/pip install fastapi uvicorn pyyaml requests langchain
+```
+
+### Environment
+
+- `OPENROUTER_API_KEY` – required.
+- `OPENROUTER_MODEL` – optional (default `openai/gpt-4o-mini`).
+- `RUNBOOK_PATH` – optional (default `alert_runbook.yaml` in repo root).
+- `ANSIBLE_HOSTS_PATH` – optional (default `/etc/ansible/hosts`). When set, the webhook auto-loads the Ansible inventory so alerts targeting known hosts inherit their SSH user/port/key information.
+- `OPENROUTER_REFERER` / `OPENROUTER_TITLE` – forwarded headers if needed.
+- `TRIAGE_ENABLE_COMMANDS` – set to `1` to let the webhook execute runbook commands (default `0` keeps it in read-only mode).
+- `TRIAGE_COMMAND_RUNNER` – `ssh` (default) or `local`. When using ssh, also set `TRIAGE_SSH_USER` and optional `TRIAGE_SSH_OPTIONS`.
+- `TRIAGE_COMMAND_TIMEOUT`, `TRIAGE_MAX_COMMANDS`, `TRIAGE_OUTPUT_LIMIT`, `TRIAGE_DEFAULT_OS` – tune execution behavior.
+- `TRIAGE_VERBOSE_LOGS` – set to `1` to stream the entire LLM dialogue, prompts, and command outputs to the webhook logs for debugging.
+- `TRIAGE_EMAIL_ENABLED` – when `1`, the webhook emails the final LLM summary per alert. Requires `TRIAGE_EMAIL_FROM`, `TRIAGE_EMAIL_TO` (comma-separated), `TRIAGE_SMTP_HOST`, and optional `TRIAGE_SMTP_PORT`, `TRIAGE_SMTP_USER`, `TRIAGE_SMTP_PASSWORD`, `TRIAGE_SMTP_STARTTLS`, `TRIAGE_SMTP_SSL`.
+
+### Running
+
+```
+source .venv/bin/activate
+export OPENROUTER_API_KEY=...
+uvicorn scripts.grafana_alert_webhook:app --host 0.0.0.0 --port 8081
+```
+
+The server loads the runbook at startup and exposes:
+
+- `POST /alerts` – Grafana webhook target.
+- `POST /reload-runbook` – force runbook reload without restarting.
+
+When `TRIAGE_ENABLE_COMMANDS=1`, the server executes the relevant triage commands
+for each alert (via SSH or locally), captures stdout/stderr, and appends the
+results to both the OpenRouter prompt and the HTTP response JSON. This lets you
+automate evidence gathering directly from the runbook instructions. Use
+environment variables to control which user/host the commands target and to
+limit timeouts/output size. LangChain powers the multi-turn investigation flow:
+the LLM can call the provided tools (`run_local_command`, `run_ssh_command`) to
+gather additional evidence until it’s ready to deliver a final summary.
+When `/etc/ansible/hosts` (or `ANSIBLE_HOSTS_PATH`) is available the server
+automatically enriches the alert context with SSH metadata (user, host, port,
+identity file, and common args) so runbook commands default to using SSH against
+the alerting host instead of the webhook server.
+
+### Running with Docker Compose
+
+1. Copy `.env.example` to `.env` and fill in your OpenRouter key, email SMTP
+   settings, and other toggles.
+2. Place any SSH keys the webhook needs inside `./.ssh/` (the compose file
+   mounts this directory read-only inside the container).
+3. Run `docker compose up -d` to build and launch the webhook. It listens on
+   port `8081` by default and uses the mounted `alert_runbook.yaml` plus the
+   host `/etc/ansible/hosts`.
+4. Use `docker compose logs -f` to watch verbose LangChain output or restart
+   with `docker compose restart` when updating the code/runbook.
+
+### Sample payload
+
+```
+curl -X POST http://localhost:8081/alerts \
+  -H 'Content-Type: application/json' \
+  -d '{
+        "status":"firing",
+        "ruleUid":"edkmsdmlay2o0c",
+        "ruleUrl":"http://casper:3000/alerting/grafana/edkmsdmlay2o0c/view",
+        "alerts":[
+          {
+            "status":"firing",
+            "labels":{
+              "alertname":"High Mem.",
+              "host":"unit-02",
+              "rule_uid":"edkmsdmlay2o0c"
+            },
+            "annotations":{
+              "summary":"Memory usage above 95% for 10m",
+              "value":"96.2%"
+            },
+            "startsAt":"2025-09-22T17:20:00Z",
+            "endsAt":"0001-01-01T00:00:00Z"
+          }
+        ]
+      }'
+```
+
+With a valid OpenRouter key this returns a JSON body containing the LLM summary
+per alert plus any unmatched alerts (missing runbook entries or rule UIDs).
+
+### Testing without OpenRouter
+
+Set `OPENROUTER_API_KEY=dummy` and point the DNS entry to a mock (e.g. mitmproxy)
+if you need to capture outbound requests. Otherwise, hits will fail fast with
+HTTP 502 so Grafana knows the automation need to be retried.
--- a/stacks/mllogwatcher/alert_runbook.yaml
+++ b/stacks/mllogwatcher/alert_runbook.yaml
@ -0,0 +1,254 @@
+# Grafana alert triage playbook for the HomeLab telemetry stack.
+# Each entry contains the alert metadata, what the signal means,
+# the evidence to capture automatically, and the manual / scripted steps.
+metadata:
+  generated: "2025-09-22T00:00:00Z"
+  grafana_url: "http://casper:3000"
+  datasource: "InfluxDB telegraf (uid=P951FEA4DE68E13C5)"
+  llm_provider: "OpenRouter"
+alerts:
+  - name: "Data Stale"
+    rule_uid: "fdk9orif6fytcf"
+    description: "No CPU usage_user metrics have arrived for non-unit hosts within 5 minutes."
+    signal:
+      metric: "cpu.usage_user"
+      condition: "count(host samples over 5m) < 1"
+      impact: "Host is no longer reporting to Telegraf/Influx -> monitoring blind spot."
+    evidence_to_collect:
+      - "Influx: `from(bucket:\"telegraf\") |> range(start:-10m) |> filter(fn:(r)=>r._measurement==\"cpu\" and r.host==\"{{ host }}\") |> count()`"
+      - "Telegraf log tail"
+      - "System journal for network/auth errors"
+    triage:
+      - summary: "Verify Telegraf agent health"
+        linux: "sudo systemctl status telegraf && sudo journalctl -u telegraf -n 100"
+        windows: "Get-Service telegraf; Get-Content 'C:\\Program Files\\telegraf\\telegraf.log' -Tail 100"
+      - summary: "Check connectivity from host to Influx (`casper:8086`)"
+        linux: "curl -sSf http://casper:8086/ping"
+        windows: "Invoke-WebRequest -UseBasicParsing http://casper:8086/ping"
+      - summary: "Confirm host clock drift <5s (important for Influx line protocol timestamps)"
+        linux: "chronyc tracking"
+        windows: "w32tm /query /status"
+    remediation:
+      - "Restart Telegraf after config validation: `sudo telegraf --test --config /etc/telegraf/telegraf.conf` then `sudo systemctl restart telegraf`."
+      - "Re-apply Ansible telemetry playbook if multiple hosts fail."
+    llm_prompt: >
+      Alert {{ alertname }} fired for {{ host }}. Telegraf stopped sending cpu.usage_user metrics. Given the collected logs and command output, identify root causes (agent down, auth failures, firewall, time skew) and list the next action.
+
+  - name: "High CPU"
+    rule_uid: "fdkms407ubmdcc"
+    description: "Mean CPU usage_system over the last 10 minutes exceeds 85%."
+    signal:
+      metric: "cpu.usage_system"
+      condition: "mean over 10m > 85%"
+      impact: "Host is near saturation; scheduler latency and queueing likely."
+    evidence_to_collect:
+      - "Top CPU processes snapshot (Linux: `ps -eo pid,cmd,%cpu --sort=-%cpu | head -n 15`; Windows: `Get-Process | Sort-Object CPU -Descending | Select -First 15`)"
+      - "Load vs CPU core count"
+      - "Recent deploys / cron jobs metadata"
+    triage:
+      - summary: "Confirm sustained CPU pressure"
+        linux: "uptime && mpstat 1 5"
+        windows: "typeperf \"\\Processor(_Total)\\% Processor Time\" -sc 15"
+      - summary: "Check offending processes/services"
+        linux: "sudo ps -eo pid,user,comm,%cpu,%mem --sort=-%cpu | head"
+        windows: "Get-Process | Sort-Object CPU -Descending | Select -First 10 Name,CPU"
+      - summary: "Inspect cgroup / VM constraints if on Proxmox"
+        linux: "sudo pct status {{ vmid }} && sudo pct config {{ vmid }}"
+    remediation:
+      - "Throttle or restart runaway service; scale workload or tune limits."
+      - "Consider moving noisy neighbors off shared hypervisor."
+    llm_prompt: >
+      High CPU alert for {{ host }}. Review process table, recent deploys, and virtualization context; determine why cpu.usage_system stayed above 85% and recommend mitigation.
+
+  - name: "High Mem."
+    rule_uid: "edkmsdmlay2o0c"
+    description: "Mean memory used_percent over 10 minutes > 95% (excluding hosts jhci/nerv*/magi*)."
+    signal:
+      metric: "mem.used_percent"
+      condition: "mean over 10m > 95%"
+      impact: "OOM risk and swap thrash."
+    evidence_to_collect:
+      - "Free/available memory snapshot"
+      - "Top consumers (Linux: `sudo smem -rt rss | head`; Windows: `Get-Process | Sort-Object WorkingSet -Descending`)"
+      - "Swap in/out metrics"
+    triage:
+      - summary: "Validate actual memory pressure"
+        linux: "free -m && vmstat -SM 5 5"
+        windows: "Get-Counter '\\Memory\\Available MBytes'"
+      - summary: "Identify leaking services"
+        linux: "sudo ps -eo pid,user,comm,%mem,rss --sort=-%mem | head"
+        windows: "Get-Process | Sort-Object WS -Descending | Select -First 10 ProcessName,WS"
+      - summary: "Check recent kernel/OOM logs"
+        linux: "sudo dmesg | tail -n 50"
+        windows: "Get-WinEvent -LogName System -MaxEvents 50 | ? { $_.Message -match 'memory' }"
+    remediation:
+      - "Restart or reconfigure offender; add swap as stop-gap; increase VM memory allocation."
+    llm_prompt: >
+      High Mem alert for {{ host }}. After reviewing free memory, swap activity, and top processes, explain the likely cause and propose remediation steps with priority.
+
+  - name: "High Disk IO"
+    rule_uid: "bdkmtaru7ru2od"
+    description: "Mean merged_reads/writes per second converted to GB/s exceeds 10."
+    signal:
+      metric: "diskio.merged_reads + merged_writes"
+      condition: "mean over 10m > 10 GB/s"
+      impact: "Storage controller saturated; latency spikes, possible backlog."
+    evidence_to_collect:
+      - "iostat extended output"
+      - "Process level IO (pidstat/nethogs equivalent)"
+      - "ZFS/MDADM status for relevant pools"
+    triage:
+      - summary: "Inspect device queues"
+        linux: "iostat -xzd 5 3"
+        windows: "Get-WmiObject -Class Win32_PerfFormattedData_PerfDisk_LogicalDisk | Format-Table Name,DiskWritesPersec,DiskReadsPersec,AvgDisksecPerTransfer"
+      - summary: "Correlate to filesystem / VM"
+        linux: "sudo lsof +D /mnt/critical -u {{ user }}"
+      - summary: "Check backup or replication windows"
+        linux: "journalctl -u pvebackup -n 50"
+    remediation:
+      - "Pause heavy jobs, move backups off-peak, evaluate faster storage tiers."
+    llm_prompt: >
+      High Disk IO on {{ host }}. With iostat/pidstat output provided, decide whether activity is expected (backup, scrub) or abnormal and list mitigations.
+
+  - name: "Low Uptime"
+    rule_uid: "ddkmuadxvkm4ge"
+    description: "System uptime converted to minutes is below 10 -> host rebooted recently."
+    signal:
+      metric: "system.uptime"
+      condition: "last uptime_minutes < 10"
+      impact: "Unexpected reboot or crash; may need RCA."
+    evidence_to_collect:
+      - "Boot reason logs"
+      - "Last patch/maintenance window from Ansible inventory"
+      - "Smart log excerpt for power events"
+    triage:
+      - summary: "Confirm uptime and reason"
+        linux: "uptime && last -x | head"
+        windows: "Get-WinEvent -LogName System -MaxEvents 50 | ? { $_.Id -in 41,6006,6008 }"
+      - summary: "Check kernel panic or watchdog traces"
+        linux: "sudo journalctl -k -b -1 | tail -n 200"
+      - summary: "Validate patch automation logs"
+        linux: "sudo tail -n 100 /var/log/ansible-pull.log"
+    remediation:
+      - "Schedule deeper diagnostics if crash; reschedule workloads once stable."
+    llm_prompt: >
+      Low Uptime alert: host restarted within 10 minutes. Inspect boot reason logs and recommend whether this is maintenance or a fault needing follow-up.
+
+  - name: "High Load"
+    rule_uid: "ddkmul9x8gcn4d"
+    description: "system.load5 > 6 for 5 minutes."
+    signal:
+      metric: "system.load5"
+      condition: "last value > 6"
+      impact: "Runnable queue more than CPU threads -> latency growth."
+    evidence_to_collect:
+      - "Load vs CPU count (`nproc`)"
+      - "Process states (D/R blocked tasks)"
+      - "IO wait percentage"
+    triage:
+      - summary: "Correlate load to CPU and IO"
+        linux: "uptime && vmstat 1 5"
+      - summary: "Identify stuck IO"
+        linux: "sudo pidstat -d 1 5"
+      - summary: "Check Proxmox scheduler for resource contention"
+        linux: "pveperf && qm list"
+    remediation:
+      - "Reduce cron concurrency, add CPU, or fix IO bottleneck causing runnable queue growth."
+    llm_prompt: >
+      High Load alert on {{ host }}. Based on vmstat/pidstat output, explain whether CPU saturation, IO wait, or runnable pile-up is at fault and propose actions.
+
+  - name: "High Network Traffic (Download)"
+    rule_uid: "cdkpct82a7g8wd"
+    description: "Derivative of bytes_recv > 50 MB/s on any interface over last hour."
+    signal:
+      metric: "net.bytes_recv"
+      condition: "mean download throughput > 50 MB/s"
+      impact: "Link saturation, potential DDoS or backup window."
+    evidence_to_collect:
+      - "Interface counters (Linux: `ip -s link show {{ iface }}`; Windows: `Get-NetAdapterStatistics`)"
+      - "Top talkers (Linux: `sudo nethogs {{ iface }}` or `iftop -i {{ iface }}`)"
+      - "Firewall/IDS logs"
+    triage:
+      - summary: "Confirm interface experiencing spike"
+        linux: "sar -n DEV 1 5 | grep {{ iface }}"
+        windows: "Get-Counter -Counter '\\Network Interface({{ iface }})\\Bytes Received/sec' -Continuous -SampleInterval 1 -MaxSamples 5"
+      - summary: "Identify process or remote peer"
+        linux: "sudo ss -ntu state established | sort -k4"
+        windows: "Get-NetTCPConnection | Sort-Object -Property LocalPort"
+    remediation:
+      - "Throttle offending transfers, move backup replication, verify no compromised service."
+    llm_prompt: >
+      High download throughput on {{ host }} interface {{ iface }}. Review interface counters and connection list to determine if traffic is expected and advise throttling or blocking steps.
+
+  - name: "High Network Traffic (Upload)"
+    rule_uid: "aec650pbtvzswa"
+    description: "Derivative of bytes_sent > 30 MB/s for an interface."
+    signal:
+      metric: "net.bytes_sent"
+      condition: "mean upload throughput > 30 MB/s"
+      impact: "Excess upstream usage; may saturate ISP uplink."
+    evidence_to_collect:
+      - "Interface statistics"
+      - "NetFlow sample if available (`/var/log/telegraf/netflow.log`)"
+      - "List of active transfers"
+    triage:
+      - summary: "Measure upload curve"
+        linux: "bmon -p {{ iface }} -o ascii"
+        windows: "Get-Counter '\\Network Interface({{ iface }})\\Bytes Sent/sec' -Continuous -SampleInterval 1 -MaxSamples 5"
+      - summary: "Find process generating traffic"
+        linux: "sudo iftop -i {{ iface }} -t -s 30"
+        windows: "Get-NetAdapterStatistics -Name {{ iface }}"
+    remediation:
+      - "Pause replication jobs, confirm backups not stuck, search for data exfiltration."
+    llm_prompt: >
+      High upload alert for {{ host }} interface {{ iface }}. Using captured traffic samples, determine whether replication/backup explains the pattern or if anomalous traffic needs blocking.
+
+  - name: "High Disk Usage"
+    rule_uid: "cdma6i5k2gem8d"
+    description: "Disk used_percent >= 95% for Linux devices (filters out unwanted devices)."
+    signal:
+      metric: "disk.used_percent"
+      condition: "last value > 95%"
+      impact: "Filesystem full -> service crashes or write failures."
+    evidence_to_collect:
+      - "`df -h` or `Get-Volume` output for device"
+      - "Largest directories snapshot (Linux: `sudo du -xhd1 /path`; Windows: `Get-ChildItem | Sort Length`)"
+      - "Recent deploy or backup expansion logs"
+    triage:
+      - summary: "Validate usage"
+        linux: "df -h {{ mountpoint }}"
+        windows: "Get-Volume -FileSystemLabel {{ volume }}"
+      - summary: "Identify growth trend"
+        linux: "sudo journalctl -u telegraf -g 'disk usage' -n 20"
+      - summary: "Check for stale docker volumes"
+        linux: "docker system df && docker volume ls"
+    remediation:
+      - "Prune temp artifacts, expand disk/VM, move logs to remote storage."
+    llm_prompt: >
+      High Disk Usage alert on {{ host }} device {{ device }}. Summarize what consumed the space and recommend reclaim or expansion actions with priority.
+
+  - name: "CPU Heartbeat"
+    rule_uid: "eec62gqn3oetcf"
+    description: "Counts cpu.usage_system samples per host; fires if <1 sample arrives within window."
+    signal:
+      metric: "cpu.usage_system"
+      condition: "sample count within 10m < 1"
+      impact: "Indicates host stopped reporting metrics entirely (telemetry silent)."
+    evidence_to_collect:
+      - "Influx query for recent cpu samples"
+      - "Telegraf service and logs"
+      - "Network reachability from host to casper"
+    triage:
+      - summary: "Check host alive and reachable"
+        linux: "ping -c 3 {{ host }} && ssh {{ host }} uptime"
+        windows: "Test-Connection {{ host }} -Count 3"
+      - summary: "Inspect Telegraf state"
+        linux: "sudo systemctl status telegraf && sudo tail -n 100 /var/log/telegraf/telegraf.log"
+        windows: "Get-Service telegraf; Get-EventLog -LogName Application -Newest 50 | ? { $_.Source -match 'Telegraf' }"
+      - summary: "Validate API key / Influx auth"
+        linux: "sudo grep -n 'outputs.influxdb' -n /etc/telegraf/telegraf.conf"
+    remediation:
+      - "Re-issue Telegraf credentials, run `ansible-playbook telemetry.yml -l {{ host }}`."
+      - "If host intentionally offline, silence alert via Grafana maintenance window."
+    llm_prompt: >
+      CPU Heartbeat for {{ host }} indicates telemetry silent. Use connectivity tests and Telegraf logs to determine if host is down or just metrics disabled; propose fixes.
--- a/stacks/mllogwatcher/docker-compose.yml
+++ b/stacks/mllogwatcher/docker-compose.yml
@ -0,0 +1,14 @@
+version: "3.9"
+
+services:
+  grafana-alert-webhook:
+    build: .
+    env_file:
+      - .env
+    ports:
+      - "8081:8081"
+    volumes:
+      - ./alert_runbook.yaml:/var/core/mlLogWatcher/alert_runbook.yaml:ro
+      - /etc/ansible/hosts:/etc/ansible/hosts:ro
+      - ./.ssh:/var/core/mlLogWatcher/.ssh:ro
+    restart: unless-stopped
--- a/stacks/mllogwatcher/requirements.txt
+++ b/stacks/mllogwatcher/requirements.txt
@ -0,0 +1,5 @@
+fastapi==0.115.5
+uvicorn[standard]==0.32.0
+pyyaml==6.0.2
+requests==2.32.3
+langchain==0.2.15
--- a/stacks/mllogwatcher/scripts/grafana_alert_webhook.py
+++ b/stacks/mllogwatcher/scripts/grafana_alert_webhook.py
@ -0,0 +1,988 @@
+#!/usr/bin/env python3
+"""
+Minimal FastAPI web server that accepts Grafana alert webhooks, looks up the
+matching runbook entry, builds an LLM prompt, and calls OpenRouter to return a
+triage summary.
+
+Run with:
+    uvicorn scripts.grafana_alert_webhook:app --host 0.0.0.0 --port 8081
+
+Environment variables:
+  RUNBOOK_PATH           Path to alert_runbook.yaml (default: ./alert_runbook.yaml)
+  OPENROUTER_API_KEY     Required; API token for https://openrouter.ai
+  OPENROUTER_MODEL       Optional; default openai/gpt-4o-mini
+  OPENROUTER_REFERER     Optional referer header
+  OPENROUTER_TITLE       Optional title header (default: Grafana Alert Webhook)
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import re
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+import json
+import shlex
+import subprocess
+from textwrap import indent
+import smtplib
+from email.message import EmailMessage
+
+import requests
+import yaml
+from fastapi import FastAPI, HTTPException, Request
+from langchain.llms.base import LLM
+
+LOGGER = logging.getLogger("grafana_webhook")
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
+
+RUNBOOK_PATH = Path(os.environ.get("RUNBOOK_PATH", "alert_runbook.yaml"))
+ANSIBLE_HOSTS_PATH = Path(os.environ.get("ANSIBLE_HOSTS_PATH", "/etc/ansible/hosts"))
+OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY")
+OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "openai/gpt-4o-mini")
+OPENROUTER_REFERER = os.environ.get("OPENROUTER_REFERER")
+OPENROUTER_TITLE = os.environ.get("OPENROUTER_TITLE", "Grafana Alert Webhook")
+
+TRIAGE_ENABLE_COMMANDS = os.environ.get("TRIAGE_ENABLE_COMMANDS", "0").lower() in {"1", "true", "yes", "on"}
+TRIAGE_COMMAND_RUNNER = os.environ.get("TRIAGE_COMMAND_RUNNER", "ssh").lower()
+TRIAGE_SSH_USER = os.environ.get("TRIAGE_SSH_USER", "root")
+TRIAGE_SSH_OPTIONS = shlex.split(
+    os.environ.get("TRIAGE_SSH_OPTIONS", "-o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=5")
+)
+TRIAGE_COMMAND_TIMEOUT = int(os.environ.get("TRIAGE_COMMAND_TIMEOUT", "60"))
+TRIAGE_DEFAULT_OS = os.environ.get("TRIAGE_DEFAULT_OS", "linux").lower()
+TRIAGE_MAX_COMMANDS = int(os.environ.get("TRIAGE_MAX_COMMANDS", "3"))
+TRIAGE_OUTPUT_LIMIT = int(os.environ.get("TRIAGE_OUTPUT_LIMIT", "1200"))
+# LangChain-driven investigation loop
+TRIAGE_MAX_ITERATIONS = int(os.environ.get("TRIAGE_MAX_ITERATIONS", "3"))
+TRIAGE_FOLLOWUP_MAX_COMMANDS = int(os.environ.get("TRIAGE_FOLLOWUP_MAX_COMMANDS", "4"))
+TRIAGE_SYSTEM_PROMPT = os.environ.get(
+    "TRIAGE_SYSTEM_PROMPT",
+    (
+        "You are assisting with on-call investigations. Always reply with JSON containing:\n"
+        "analysis: your findings and next steps.\n"
+        "followup_commands: list of command specs (summary, command, optional runner/os) to gather more data.\n"
+        "complete: true when sufficient information is gathered.\n"
+        "Request commands only when more evidence is required."
+    ),
+)
+TRIAGE_VERBOSE_LOGS = os.environ.get("TRIAGE_VERBOSE_LOGS", "0").lower() in {"1", "true", "yes", "on"}
+TRIAGE_EMAIL_ENABLED = os.environ.get("TRIAGE_EMAIL_ENABLED", "0").lower() in {"1", "true", "yes", "on"}
+TRIAGE_EMAIL_FROM = os.environ.get("TRIAGE_EMAIL_FROM")
+TRIAGE_EMAIL_TO = [addr.strip() for addr in os.environ.get("TRIAGE_EMAIL_TO", "").split(",") if addr.strip()]
+TRIAGE_SMTP_HOST = os.environ.get("TRIAGE_SMTP_HOST")
+TRIAGE_SMTP_PORT = int(os.environ.get("TRIAGE_SMTP_PORT", "587"))
+TRIAGE_SMTP_USER = os.environ.get("TRIAGE_SMTP_USER")
+TRIAGE_SMTP_PASSWORD = os.environ.get("TRIAGE_SMTP_PASSWORD")
+TRIAGE_SMTP_STARTTLS = os.environ.get("TRIAGE_SMTP_STARTTLS", "1").lower() in {"1", "true", "yes", "on"}
+TRIAGE_SMTP_SSL = os.environ.get("TRIAGE_SMTP_SSL", "0").lower() in {"1", "true", "yes", "on"}
+TRIAGE_SMTP_TIMEOUT = int(os.environ.get("TRIAGE_SMTP_TIMEOUT", "20"))
+
+
+def log_verbose(title: str, content: Any) -> None:
+    """Emit structured verbose logs when TRIAGE_VERBOSE_LOGS is enabled."""
+    if not TRIAGE_VERBOSE_LOGS:
+        return
+    if isinstance(content, (dict, list)):
+        text = json.dumps(content, indent=2, sort_keys=True)
+    else:
+        text = str(content)
+    LOGGER.info("%s:\n%s", title, text)
+
+
+def email_notifications_configured() -> bool:
+    if not TRIAGE_EMAIL_ENABLED:
+        return False
+    if not (TRIAGE_SMTP_HOST and TRIAGE_EMAIL_FROM and TRIAGE_EMAIL_TO):
+        LOGGER.warning(
+            "Email notifications requested but TRIAGE_SMTP_HOST/TRIAGE_EMAIL_FROM/TRIAGE_EMAIL_TO are incomplete."
+        )
+        return False
+    return True
+
+
+def format_command_results_for_email(results: List[Dict[str, Any]]) -> str:
+    if not results:
+        return "No automation commands were executed."
+    lines: List[str] = []
+    for result in results:
+        lines.append(f"- {result.get('summary')} [{result.get('status')}] {result.get('command')}")
+        stdout = result.get("stdout")
+        stderr = result.get("stderr")
+        error = result.get("error")
+        if stdout:
+            lines.append(indent(truncate_text(stdout, 800), "    stdout: "))
+        if stderr:
+            lines.append(indent(truncate_text(stderr, 800), "    stderr: "))
+        if error and result.get("status") != "ok":
+            lines.append(f"    error: {error}")
+    return "\n".join(lines)
+
+
+def build_email_body(alert: Dict[str, Any], result: Dict[str, Any], context: Dict[str, Any]) -> str:
+    lines = [
+        f"Alert: {result.get('alertname')} ({result.get('rule_uid')})",
+        f"Host: {result.get('host') or context.get('host')}",
+        f"Status: {alert.get('status')}",
+        f"Value: {alert.get('value') or alert.get('annotations', {}).get('value')}",
+        f"Grafana Rule: {context.get('rule_url')}",
+        "",
+        "LLM Summary:",
+        result.get("llm_summary") or "(no summary returned)",
+        "",
+        "Command Results:",
+        format_command_results_for_email(result.get("command_results") or []),
+    ]
+    return "\n".join(lines)
+
+
+def send_summary_email(alert: Dict[str, Any], result: Dict[str, Any], context: Dict[str, Any]) -> None:
+    if not email_notifications_configured():
+        return
+    subject_host = result.get("host") or context.get("host") or "(unknown host)"
+    subject = f"[Grafana] {result.get('alertname')} - {subject_host}"
+    body = build_email_body(alert, result, context)
+    message = EmailMessage()
+    message["Subject"] = subject
+    message["From"] = TRIAGE_EMAIL_FROM
+    message["To"] = ", ".join(TRIAGE_EMAIL_TO)
+    message.set_content(body)
+    try:
+        smtp_class = smtplib.SMTP_SSL if TRIAGE_SMTP_SSL else smtplib.SMTP
+        with smtp_class(TRIAGE_SMTP_HOST, TRIAGE_SMTP_PORT, timeout=TRIAGE_SMTP_TIMEOUT) as client:
+            if TRIAGE_SMTP_STARTTLS and not TRIAGE_SMTP_SSL:
+                client.starttls()
+            if TRIAGE_SMTP_USER:
+                client.login(TRIAGE_SMTP_USER, TRIAGE_SMTP_PASSWORD or "")
+            client.send_message(message)
+        LOGGER.info("Sent summary email to %s for host %s", ", ".join(TRIAGE_EMAIL_TO), subject_host)
+    except Exception as exc:  # pylint: disable=broad-except
+        LOGGER.exception("Failed to send summary email: %s", exc)
+
+app = FastAPI(title="Grafana Alert Webhook", version="1.0.0")
+
+_RUNBOOK_INDEX: Dict[str, Dict[str, Any]] = {}
+_INVENTORY_INDEX: Dict[str, Dict[str, Any]] = {}
+_INVENTORY_GROUP_VARS: Dict[str, Dict[str, str]] = {}
+_TEMPLATE_PATTERN = re.compile(r"{{\s*([a-zA-Z0-9_]+)\s*}}")
+
+
+DEFAULT_SYSTEM_PROMPT = TRIAGE_SYSTEM_PROMPT
+
+
+class OpenRouterLLM(LLM):
+    """LangChain-compatible LLM that calls OpenRouter chat completions."""
+
+    api_key: str
+    model_name: str
+
+    def __init__(self, api_key: str, model_name: str, **kwargs: Any) -> None:
+        super().__init__(api_key=api_key, model_name=model_name, **kwargs)
+
+    @property
+    def _llm_type(self) -> str:
+        return "openrouter"
+
+    def __call__(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+        return self._call(prompt, stop=stop)
+
+    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+        payload = {
+            "model": self.model_name,
+            "messages": [
+                {"role": "system", "content": DEFAULT_SYSTEM_PROMPT},
+                {"role": "user", "content": prompt},
+            ],
+        }
+        log_verbose("OpenRouter request payload", payload)
+        if stop:
+            payload["stop"] = stop
+        LOGGER.info("Posting to OpenRouter model=%s via LangChain", self.model_name)
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        }
+        if OPENROUTER_REFERER:
+            headers["HTTP-Referer"] = OPENROUTER_REFERER
+        if OPENROUTER_TITLE:
+            headers["X-Title"] = OPENROUTER_TITLE
+        response = requests.post("https://openrouter.ai/api/v1/chat/completions", json=payload, headers=headers, timeout=90)
+        if response.status_code >= 400:
+            try:
+                detail = response.json()
+            except ValueError:
+                detail = response.text
+            raise RuntimeError(f"OpenRouter error {response.status_code}: {detail}")
+        data = response.json()
+        log_verbose("OpenRouter raw response", data)
+        choices = data.get("choices")
+        if not choices:
+            raise RuntimeError("OpenRouter returned no choices")
+        return choices[0]["message"]["content"].strip()
+
+
+def load_runbook() -> Dict[str, Dict[str, Any]]:
+    """Load runbook YAML into a dict keyed by rule_uid."""
+    if not RUNBOOK_PATH.exists():
+        raise FileNotFoundError(f"Runbook file not found: {RUNBOOK_PATH}")
+    with RUNBOOK_PATH.open("r", encoding="utf-8") as handle:
+        data = yaml.safe_load(handle) or {}
+    alerts = data.get("alerts", [])
+    index: Dict[str, Dict[str, Any]] = {}
+    for entry in alerts:
+        uid = entry.get("rule_uid")
+        if uid:
+            index[str(uid)] = entry
+    LOGGER.info("Loaded %d runbook entries from %s", len(index), RUNBOOK_PATH)
+    return index
+
+
+def _normalize_host_key(host: str) -> str:
+    return host.strip().lower()
+
+
+def _parse_key_value_tokens(tokens: List[str]) -> Dict[str, str]:
+    data: Dict[str, str] = {}
+    for token in tokens:
+        if "=" not in token:
+            continue
+        key, value = token.split("=", 1)
+        data[key] = value
+    return data
+
+
+def load_ansible_inventory() -> Tuple[Dict[str, Dict[str, Any]], Dict[str, Dict[str, str]]]:
+    """Parse a simple INI-style Ansible hosts file into host/group maps."""
+    if not ANSIBLE_HOSTS_PATH.exists():
+        LOGGER.warning("Ansible inventory not found at %s", ANSIBLE_HOSTS_PATH)
+        return {}, {}
+    hosts: Dict[str, Dict[str, Any]] = {}
+    group_vars: Dict[str, Dict[str, str]] = {}
+    current_group: Optional[str] = None
+    current_section: str = "hosts"
+
+    with ANSIBLE_HOSTS_PATH.open("r", encoding="utf-8") as handle:
+        for raw_line in handle:
+            line = raw_line.strip()
+            if not line or line.startswith("#"):
+                continue
+            if line.startswith("[") and line.endswith("]"):
+                header = line[1:-1].strip()
+                if ":" in header:
+                    group_name, suffix = header.split(":", 1)
+                    current_group = group_name
+                    current_section = suffix
+                else:
+                    current_group = header
+                    current_section = "hosts"
+                group_vars.setdefault(current_group, {})
+                continue
+            cleaned = line.split("#", 1)[0].strip()
+            if not cleaned:
+                continue
+            tokens = shlex.split(cleaned)
+            if not tokens:
+                continue
+            if current_section == "vars":
+                vars_dict = _parse_key_value_tokens(tokens)
+                group_vars.setdefault(current_group or "all", {}).update(vars_dict)
+                continue
+            host_token = tokens[0]
+            host_key = _normalize_host_key(host_token)
+            entry = hosts.setdefault(host_key, {"name": host_token, "definitions": [], "groups": set()})
+            vars_dict = _parse_key_value_tokens(tokens[1:])
+            entry["definitions"].append({"group": current_group, "vars": vars_dict})
+            if current_group:
+                entry["groups"].add(current_group)
+
+    LOGGER.info("Loaded %d Ansible inventory hosts from %s", len(hosts), ANSIBLE_HOSTS_PATH)
+    return hosts, group_vars
+
+
+def _lookup_inventory(host: Optional[str]) -> Optional[Dict[str, Any]]:
+    if not host:
+        return None
+    key = _normalize_host_key(host)
+    entry = _INVENTORY_INDEX.get(key)
+    if entry:
+        return entry
+    # try stripping domain suffix
+    short = key.split(".", 1)[0]
+    if short != key:
+        return _INVENTORY_INDEX.get(short)
+    return None
+
+
+def _merge_group_vars(groups: List[str], host_os: Optional[str]) -> Dict[str, str]:
+    merged: Dict[str, str] = {}
+    global_vars = _INVENTORY_GROUP_VARS.get("all")
+    if global_vars:
+        merged.update(global_vars)
+    normalized_os = (host_os or "").lower()
+    for group in groups:
+        vars_dict = _INVENTORY_GROUP_VARS.get(group)
+        if not vars_dict:
+            continue
+        connection = (vars_dict.get("ansible_connection") or "").lower()
+        if connection == "winrm" and normalized_os == "linux":
+            continue
+        merged.update(vars_dict)
+    return merged
+
+
+def _should_include_definition(group: Optional[str], vars_dict: Dict[str, str], host_os: Optional[str]) -> bool:
+    if not vars_dict:
+        return False
+    normalized_os = (host_os or "").lower()
+    connection = (vars_dict.get("ansible_connection") or "").lower()
+    if connection == "winrm" and normalized_os != "windows":
+        return False
+    if connection == "local":
+        return True
+    if group and "windows" in group.lower() and normalized_os == "linux" and not connection:
+        return False
+    return True
+
+
+def apply_inventory_context(context: Dict[str, Any]) -> None:
+    """Augment the alert context with SSH metadata from the Ansible inventory."""
+    host = context.get("host")
+    entry = _lookup_inventory(host)
+    if not entry:
+        return
+    merged_vars = _merge_group_vars(list(entry.get("groups", [])), context.get("host_os"))
+    for definition in entry.get("definitions", []):
+        group_name = definition.get("group")
+        vars_dict = definition.get("vars", {})
+        if _should_include_definition(group_name, vars_dict, context.get("host_os")):
+            merged_vars.update(vars_dict)
+    ansible_host = merged_vars.get("ansible_host") or entry.get("name")
+    ansible_user = merged_vars.get("ansible_user")
+    ansible_port = merged_vars.get("ansible_port")
+    ssh_common_args = merged_vars.get("ansible_ssh_common_args")
+    ssh_key = merged_vars.get("ansible_ssh_private_key_file")
+    connection = (merged_vars.get("ansible_connection") or "").lower()
+    host_os = (context.get("host_os") or "").lower()
+    if connection == "winrm" and host_os != "windows":
+        for key in (
+            "ansible_connection",
+            "ansible_port",
+            "ansible_password",
+            "ansible_winrm_server_cert_validation",
+            "ansible_winrm_scheme",
+        ):
+            merged_vars.pop(key, None)
+        connection = ""
+
+    context.setdefault("ssh_host", ansible_host or host)
+    if ansible_user:
+        context["ssh_user"] = ansible_user
+    if ansible_port:
+        context["ssh_port"] = ansible_port
+    if ssh_common_args:
+        context["ssh_common_args"] = ssh_common_args
+    if ssh_key:
+        context["ssh_identity_file"] = ssh_key
+    context.setdefault("inventory_groups", list(entry.get("groups", [])))
+    if connection == "local":
+        context.setdefault("preferred_runner", "local")
+    elif connection in {"", "ssh", "smart"}:
+        context.setdefault("preferred_runner", "ssh")
+    context.setdefault("inventory_groups", list(entry.get("groups", [])))
+
+
+def render_template(template: str, context: Dict[str, Any]) -> str:
+    """Very small mustache-style renderer for {{ var }} placeholders."""
+    def replace(match: re.Match[str]) -> str:
+        key = match.group(1)
+        return str(context.get(key, match.group(0)))
+
+    return _TEMPLATE_PATTERN.sub(replace, template)
+
+
+def extract_rule_uid(alert: Dict[str, Any], parent_payload: Dict[str, Any]) -> Optional[str]:
+    """Grafana webhooks may include rule UID in different fields."""
+    candidates: List[Any] = [
+        alert.get("ruleUid"),
+        alert.get("rule_uid"),
+        alert.get("ruleId"),
+        alert.get("uid"),
+        alert.get("labels", {}).get("rule_uid"),
+        alert.get("labels", {}).get("ruleUid"),
+        parent_payload.get("ruleUid"),
+        parent_payload.get("rule_uid"),
+        parent_payload.get("ruleId"),
+    ]
+    for candidate in candidates:
+        if candidate:
+            return str(candidate)
+    # Fall back to Grafana URL parsing if present
+    url = (
+        alert.get("ruleUrl")
+        or parent_payload.get("ruleUrl")
+        or alert.get("generatorURL")
+        or parent_payload.get("generatorURL")
+    )
+    if url and "/alerting/" in url:
+        return url.rstrip("/").split("/")[-2]
+    return None
+
+
+def derive_fallback_rule_uid(alert: Dict[str, Any], parent_payload: Dict[str, Any]) -> str:
+    """Construct a deterministic identifier when Grafana omits rule UIDs."""
+    labels = alert.get("labels", {})
+    candidates = [
+        alert.get("fingerprint"),
+        labels.get("alertname"),
+        labels.get("host"),
+        labels.get("instance"),
+        parent_payload.get("groupKey"),
+        parent_payload.get("title"),
+    ]
+    for candidate in candidates:
+        if candidate:
+            return str(candidate)
+    return "unknown-alert"
+
+
+def build_fallback_runbook_entry(alert: Dict[str, Any], parent_payload: Dict[str, Any]) -> Dict[str, Any]:
+    """Return a generic runbook entry so every alert can be processed."""
+    labels = alert.get("labels", {})
+    alertname = labels.get("alertname") or parent_payload.get("title") or "Grafana Alert"
+    host = labels.get("host") or labels.get("instance") or "(unknown host)"
+    return {
+        "name": f"{alertname} (auto)",
+        "llm_prompt": (
+            "Grafana alert {{ alertname }} fired for {{ host }}.\n"
+            "No dedicated runbook entry exists. Use the payload details, command outputs, "
+            "and your own reasoning to propose likely causes, evidence to gather, and remediation steps."
+        ),
+        "triage": [],
+        "evidence_to_collect": [],
+        "remediation": [],
+        "metadata": {"host": host},
+    }
+
+
+def summarize_dict(prefix: str, data: Optional[Dict[str, Any]]) -> str:
+    if not data:
+        return f"{prefix}: (none)"
+    parts = ", ".join(f"{key}={value}" for key, value in sorted(data.items()))
+    return f"{prefix}: {parts}"
+
+
+def determine_host_os(alert: Dict[str, Any]) -> str:
+    """Infer host operating system from labels or defaults."""
+    labels = alert.get("labels", {})
+    candidates = [
+        labels.get("os"),
+        labels.get("platform"),
+        labels.get("system"),
+        alert.get("os"),
+    ]
+    for candidate in candidates:
+        if candidate:
+            value = str(candidate).lower()
+            if "win" in value:
+                return "windows"
+            if any(token in value for token in ("linux", "unix", "darwin")):
+                return "linux"
+    host = (labels.get("host") or labels.get("instance") or "").lower()
+    if host.startswith("win") or host.endswith(".localdomain") and "win" in host:
+        return "windows"
+    inventory_os = infer_os_from_inventory(labels.get("host") or labels.get("instance"))
+    if inventory_os:
+        return inventory_os
+    return TRIAGE_DEFAULT_OS
+
+
+def infer_os_from_inventory(host: Optional[str]) -> Optional[str]:
+    if not host:
+        return None
+    entry = _lookup_inventory(host)
+    if not entry:
+        return None
+    for definition in entry.get("definitions", []):
+        vars_dict = definition.get("vars", {}) or {}
+        connection = (vars_dict.get("ansible_connection") or "").lower()
+        if connection == "winrm":
+            return "windows"
+    for group in entry.get("groups", []):
+        if "windows" in (group or "").lower():
+            return "windows"
+    return None
+
+
+def truncate_text(text: str, limit: int = TRIAGE_OUTPUT_LIMIT) -> str:
+    """Trim long outputs to keep prompts manageable."""
+    if not text:
+        return ""
+    cleaned = text.strip()
+    if len(cleaned) <= limit:
+        return cleaned
+    return cleaned[:limit] + "... [truncated]"
+
+
+def gather_command_specs(entry: Dict[str, Any], host_os: str) -> List[Dict[str, Any]]:
+    """Collect command specs from triage steps and optional automation sections."""
+    specs: List[Dict[str, Any]] = []
+    for step in entry.get("triage", []):
+        cmd = step.get(host_os)
+        if not cmd:
+            continue
+        specs.append(
+            {
+                "summary": step.get("summary") or entry.get("name") or "triage",
+                "shell": cmd,
+                "runner": step.get("runner"),
+                "os": host_os,
+            }
+        )
+    for item in entry.get("automation_commands", []):
+        target_os = item.get("os", host_os)
+        if target_os and target_os.lower() != host_os:
+            continue
+        specs.append(item)
+    if TRIAGE_MAX_COMMANDS > 0:
+        return specs[:TRIAGE_MAX_COMMANDS]
+    return specs
+
+
+def build_runner_command(
+    rendered_command: str,
+    runner: str,
+    context: Dict[str, Any],
+    spec: Dict[str, Any],
+) -> Tuple[Any, str, bool, str]:
+    """Return the subprocess args, display string, shell flag, and runner label."""
+    runner = runner or TRIAGE_COMMAND_RUNNER
+    runner = runner.lower()
+    if runner == "ssh":
+        host = spec.get("host") or context.get("ssh_host") or context.get("host")
+        if not host:
+            raise RuntimeError("Host not provided for ssh runner.")
+        ssh_user = spec.get("ssh_user") or context.get("ssh_user") or TRIAGE_SSH_USER
+        ssh_target = spec.get("ssh_target") or f"{ssh_user}@{host}"
+        ssh_options = list(TRIAGE_SSH_OPTIONS)
+        common_args = spec.get("ssh_common_args") or context.get("ssh_common_args")
+        if common_args:
+            ssh_options.extend(shlex.split(common_args))
+        ssh_port = spec.get("ssh_port") or context.get("ssh_port")
+        if ssh_port:
+            ssh_options.extend(["-p", str(ssh_port)])
+        identity_file = spec.get("ssh_identity_file") or context.get("ssh_identity_file")
+        if identity_file:
+            ssh_options.extend(["-i", identity_file])
+        command_list = ["ssh", *ssh_options, ssh_target, rendered_command]
+        display = " ".join(shlex.quote(part) for part in command_list)
+        return command_list, display, False, "ssh"
+    # default to local shell execution
+    display = rendered_command
+    return rendered_command, display, True, "local"
+
+
+def run_subprocess_command(
+    command: Any,
+    display: str,
+    summary: str,
+    use_shell: bool,
+    runner_label: str,
+) -> Dict[str, Any]:
+    """Execute subprocess command and capture results."""
+    LOGGER.info("Executing command (%s) via %s: %s", summary, runner_label, display)
+    try:
+        completed = subprocess.run(
+            command,
+            capture_output=True,
+            text=True,
+            timeout=TRIAGE_COMMAND_TIMEOUT,
+            shell=use_shell,
+            check=False,
+        )
+        result = {
+            "summary": summary,
+            "command": display,
+            "runner": runner_label,
+            "exit_code": completed.returncode,
+            "stdout": (completed.stdout or "").strip(),
+            "stderr": (completed.stderr or "").strip(),
+            "status": "ok" if completed.returncode == 0 else "failed",
+        }
+        log_verbose(f"Command result ({summary})", result)
+        return result
+    except subprocess.TimeoutExpired as exc:
+        result = {
+            "summary": summary,
+            "command": display,
+            "runner": runner_label,
+            "exit_code": None,
+            "stdout": truncate_text((exc.stdout or "").strip()),
+            "stderr": truncate_text((exc.stderr or "").strip()),
+            "status": "timeout",
+            "error": f"Command timed out after {TRIAGE_COMMAND_TIMEOUT}s",
+        }
+        log_verbose(f"Command timeout ({summary})", result)
+        return result
+    except Exception as exc:  # pylint: disable=broad-except
+        LOGGER.exception("Command execution failed (%s): %s", summary, exc)
+        result = {
+            "summary": summary,
+            "command": display,
+            "runner": runner_label,
+            "exit_code": None,
+            "stdout": "",
+            "stderr": "",
+            "status": "error",
+            "error": str(exc),
+        }
+        log_verbose(f"Command error ({summary})", result)
+        return result
+
+
+def run_command_spec(spec: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
+    summary = spec.get("summary") or spec.get("name") or "command"
+    shell_cmd = spec.get("shell")
+    if not shell_cmd:
+        return {"summary": summary, "status": "skipped", "error": "No shell command provided."}
+    rendered = render_template(shell_cmd, context)
+    preferred_runner = context.get("preferred_runner")
+    runner_choice = (spec.get("runner") or preferred_runner or TRIAGE_COMMAND_RUNNER).lower()
+    try:
+        command, display, use_shell, runner_label = build_runner_command(rendered, runner_choice, context, spec)
+    except RuntimeError as exc:
+        LOGGER.warning("Skipping command '%s': %s", summary, exc)
+        return {"summary": summary, "status": "skipped", "error": str(exc), "command": rendered}
+    return run_subprocess_command(command, display, summary, use_shell, runner_label)
+
+
+def execute_triage_commands(entry: Dict[str, Any], alert: Dict[str, Any], context: Dict[str, Any]) -> List[Dict[str, Any]]:
+    host_os = context.get("host_os") or determine_host_os(alert)
+    context["host_os"] = host_os
+    specs = gather_command_specs(entry, host_os)
+    if not specs:
+        LOGGER.info("No triage commands defined for host_os=%s", host_os)
+        return []
+    if not TRIAGE_ENABLE_COMMANDS:
+        LOGGER.info("Command execution disabled; %d commands queued but skipped.", len(specs))
+        return []
+    LOGGER.info("Executing up to %d triage commands for host_os=%s", len(specs), host_os)
+    results = []
+    for spec in specs:
+        results.append(run_command_spec(spec, context))
+    return results
+
+
+def format_command_results_for_llm(results: List[Dict[str, Any]]) -> str:
+    lines: List[str] = []
+    for idx, result in enumerate(results, start=1):
+        lines.append(f"{idx}. {result.get('summary')} [{result.get('status')}] {result.get('command')}")
+        stdout = result.get("stdout")
+        stderr = result.get("stderr")
+        error = result.get("error")
+        if stdout:
+            lines.append("   stdout:")
+            lines.append(indent(truncate_text(stdout), "      "))
+        if stderr:
+            lines.append("   stderr:")
+            lines.append(indent(truncate_text(stderr), "      "))
+        if error and result.get("status") != "ok":
+            lines.append(f"   error: {error}")
+    if not lines:
+        return "No command results were available."
+    return "\n".join(lines)
+
+
+def parse_structured_response(text: str) -> Optional[Dict[str, Any]]:
+    cleaned = text.strip()
+    try:
+        return json.loads(cleaned)
+    except json.JSONDecodeError:
+        start = cleaned.find("{")
+        end = cleaned.rfind("}")
+        if start != -1 and end != -1 and end > start:
+            snippet = cleaned[start : end + 1]
+            try:
+                return json.loads(snippet)
+            except json.JSONDecodeError:
+                return None
+    return None
+
+
+def normalize_followup_command(item: Dict[str, Any]) -> Dict[str, Any]:
+    return {
+        "summary": item.get("summary") or item.get("name") or "Follow-up command",
+        "shell": item.get("command") or item.get("shell"),
+        "runner": item.get("runner"),
+        "host": item.get("host") or item.get("target"),
+        "ssh_user": item.get("ssh_user"),
+        "os": (item.get("os") or item.get("platform") or "").lower() or None,
+    }
+
+
+def investigate_with_langchain(
+    entry: Dict[str, Any],
+    alert: Dict[str, Any],
+    parent_payload: Dict[str, Any],
+    context: Dict[str, Any],
+    initial_outputs: List[Dict[str, Any]],
+) -> Tuple[str, List[Dict[str, Any]]]:
+    command_outputs = list(initial_outputs)
+    prompt = build_prompt(entry, alert, parent_payload, context, command_outputs)
+    log_verbose("Initial investigation prompt", prompt)
+    if not OPENROUTER_API_KEY:
+        return "OPENROUTER_API_KEY is not configured; unable to analyze alert.", command_outputs
+
+    llm = OpenRouterLLM(api_key=OPENROUTER_API_KEY, model_name=OPENROUTER_MODEL)
+    dialogue = (
+        prompt
+        + "\n\nRespond with JSON containing fields analysis, followup_commands, and complete. "
+        "Request commands only when more evidence is required."
+    )
+    total_followup = 0
+    final_summary = ""
+    for iteration in range(TRIAGE_MAX_ITERATIONS):
+        log_verbose(f"LLM dialogue iteration {iteration + 1}", dialogue)
+        llm_text = llm(dialogue)
+        log_verbose(f"LLM iteration {iteration + 1} output", llm_text)
+        dialogue += f"\nAssistant:\n{llm_text}\n"
+        parsed = parse_structured_response(llm_text)
+        if parsed:
+            log_verbose(f"LLM iteration {iteration + 1} parsed response", parsed)
+        if not parsed:
+            final_summary = llm_text
+            break
+
+        analysis = parsed.get("analysis") or ""
+        followups = parsed.get("followup_commands") or parsed.get("commands") or []
+        final_summary = analysis
+        complete_flag = bool(parsed.get("complete"))
+
+        if complete_flag or not followups:
+            break
+
+        log_verbose(f"LLM iteration {iteration + 1} requested follow-ups", followups)
+        allowed = max(0, TRIAGE_FOLLOWUP_MAX_COMMANDS - total_followup)
+        if not TRIAGE_ENABLE_COMMANDS or allowed <= 0:
+            dialogue += (
+                "\nUser:\nCommand execution is disabled or budget exhausted. Provide final analysis with JSON format.\n"
+            )
+            continue
+
+        normalized_cmds: List[Dict[str, Any]] = []
+        for raw in followups:
+            if not isinstance(raw, dict):
+                continue
+            normalized = normalize_followup_command(raw)
+            if not normalized.get("shell"):
+                continue
+            cmd_os = normalized.get("os")
+            if cmd_os and cmd_os != context.get("host_os"):
+                continue
+            normalized_cmds.append(normalized)
+
+        log_verbose(f"Normalized follow-up commands (iteration {iteration + 1})", normalized_cmds)
+        if not normalized_cmds:
+            dialogue += "\nUser:\nNo valid commands to run. Finalize analysis in JSON format.\n"
+            continue
+
+        normalized_cmds = normalized_cmds[:allowed]
+        executed_batch: List[Dict[str, Any]] = []
+        for spec in normalized_cmds:
+            executed = run_command_spec(spec, context)
+            command_outputs.append(executed)
+            executed_batch.append(executed)
+            total_followup += 1
+
+        result_text = "Follow-up command results:\n" + format_command_results_for_llm(executed_batch)
+        dialogue += (
+            "\nUser:\n"
+            + result_text
+            + "\nUpdate your analysis and respond with JSON (analysis, followup_commands, complete).\n"
+        )
+        log_verbose("Executed follow-up commands", result_text)
+    else:
+        final_summary = final_summary or "Reached maximum iterations without a conclusive response."
+
+    if not final_summary:
+        final_summary = "LLM did not return a valid analysis."
+
+    log_verbose("Final LLM summary", final_summary)
+    return final_summary, command_outputs
+
+
+def build_context(alert: Dict[str, Any], parent_payload: Dict[str, Any]) -> Dict[str, Any]:
+    labels = alert.get("labels", {})
+    annotations = alert.get("annotations", {})
+    context = {
+        "alertname": labels.get("alertname") or alert.get("title") or parent_payload.get("title") or parent_payload.get("ruleName"),
+        "host": labels.get("host") or labels.get("instance"),
+        "iface": labels.get("interface"),
+        "device": labels.get("device"),
+        "vmid": labels.get("vmid"),
+        "status": alert.get("status") or parent_payload.get("status"),
+        "value": alert.get("value") or annotations.get("value"),
+        "rule_url": alert.get("ruleUrl") or parent_payload.get("ruleUrl"),
+    }
+    context.setdefault("ssh_user", TRIAGE_SSH_USER)
+    return context
+
+
+def build_prompt(
+    entry: Dict[str, Any],
+    alert: Dict[str, Any],
+    parent_payload: Dict[str, Any],
+    context: Dict[str, Any],
+    command_outputs: Optional[List[Dict[str, Any]]] = None,
+) -> str:
+    template = entry.get("llm_prompt", "Alert {{ alertname }} fired for {{ host }}.")
+    rendered_template = render_template(template, {k: v or "" for k, v in context.items()})
+
+    evidence = entry.get("evidence_to_collect", [])
+    triage_steps = entry.get("triage", [])
+    remediation = entry.get("remediation", [])
+
+    lines = [
+        rendered_template.strip(),
+        "",
+        "Alert payload summary:",
+        f"- Status: {context.get('status') or alert.get('status')}",
+        f"- Host: {context.get('host')}",
+        f"- Value: {context.get('value')}",
+        f"- StartsAt: {alert.get('startsAt')}",
+        f"- EndsAt: {alert.get('endsAt')}",
+        f"- RuleURL: {context.get('rule_url')}",
+        f"- Host OS (inferred): {context.get('host_os')}",
+        "- Note: All timestamps are UTC/RFC3339 as provided by Grafana.",
+        summarize_dict("- Labels", alert.get("labels")),
+        summarize_dict("- Annotations", alert.get("annotations")),
+    ]
+
+    if evidence:
+        lines.append("")
+        lines.append("Evidence to gather (for automation reference):")
+        for item in evidence:
+            lines.append(f"- {item}")
+
+    if triage_steps:
+        lines.append("")
+        lines.append("Suggested manual checks:")
+        for step in triage_steps:
+            summary = step.get("summary")
+            linux = step.get("linux")
+            windows = step.get("windows")
+            lines.append(f"- {summary}")
+            if linux:
+                lines.append(f"  Linux: {linux}")
+            if windows:
+                lines.append(f"  Windows: {windows}")
+
+    if remediation:
+        lines.append("")
+        lines.append("Remediation ideas:")
+        for item in remediation:
+            lines.append(f"- {item}")
+
+    if command_outputs:
+        lines.append("")
+        lines.append("Command execution results:")
+        for result in command_outputs:
+            status = result.get("status", "unknown")
+            cmd_display = result.get("command", "")
+            lines.append(f"- {result.get('summary')} [{status}] {cmd_display}")
+            stdout = result.get("stdout")
+            stderr = result.get("stderr")
+            error = result.get("error")
+            if stdout:
+                lines.append("  stdout:")
+                lines.append(indent(truncate_text(stdout), "    "))
+            if stderr:
+                lines.append("  stderr:")
+                lines.append(indent(truncate_text(stderr), "    "))
+            if error and status != "ok":
+                lines.append(f"  error: {error}")
+
+    return "\n".join(lines).strip()
+
+
+def get_alerts(payload: Dict[str, Any]) -> List[Dict[str, Any]]:
+    alerts = payload.get("alerts")
+    if isinstance(alerts, list) and alerts:
+        return alerts
+    return [payload]
+
+
+@app.on_event("startup")
+def startup_event() -> None:
+    global _RUNBOOK_INDEX, _INVENTORY_INDEX, _INVENTORY_GROUP_VARS
+    _RUNBOOK_INDEX = load_runbook()
+    _INVENTORY_INDEX, _INVENTORY_GROUP_VARS = load_ansible_inventory()
+    LOGGER.info(
+        "Alert webhook server ready with %d runbook entries and %d inventory hosts.",
+        len(_RUNBOOK_INDEX),
+        len(_INVENTORY_INDEX),
+    )
+
+
+@app.post("/alerts")
+async def handle_alert(request: Request) -> Dict[str, Any]:
+    payload = await request.json()
+    LOGGER.info("Received Grafana payload: %s", json.dumps(payload, indent=2, sort_keys=True))
+    results = []
+    unmatched = []
+    for alert in get_alerts(payload):
+        LOGGER.info("Processing alert: %s", json.dumps(alert, indent=2, sort_keys=True))
+        unmatched_reason: Optional[str] = None
+        alert_status = str(alert.get("status") or payload.get("status") or "").lower()
+        if alert_status and alert_status != "firing":
+            details = {"reason": "non_firing_status", "status": alert_status, "alert": alert}
+            unmatched.append(details)
+            LOGGER.info("Skipping alert with status=%s (only 'firing' alerts are processed).", alert_status)
+            continue
+        rule_uid = extract_rule_uid(alert, payload)
+        if not rule_uid:
+            unmatched_reason = "missing_rule_uid"
+            derived_uid = derive_fallback_rule_uid(alert, payload)
+            details = {"reason": unmatched_reason, "derived_rule_uid": derived_uid, "alert": alert}
+            unmatched.append(details)
+            LOGGER.warning("Alert missing rule UID, using fallback identifier %s", derived_uid)
+            rule_uid = derived_uid
+        entry = _RUNBOOK_INDEX.get(rule_uid)
+        runbook_matched = entry is not None
+        if not entry:
+            unmatched_reason = unmatched_reason or "no_runbook_entry"
+            details = {"reason": unmatched_reason, "rule_uid": rule_uid, "alert": alert}
+            unmatched.append(details)
+            LOGGER.warning("No runbook entry for rule_uid=%s, using generic fallback.", rule_uid)
+            entry = build_fallback_runbook_entry(alert, payload)
+        context = build_context(alert, payload)
+        context["host_os"] = determine_host_os(alert)
+        context["rule_uid"] = rule_uid
+        apply_inventory_context(context)
+        initial_outputs = execute_triage_commands(entry, alert, context)
+        try:
+            llm_text, command_outputs = investigate_with_langchain(entry, alert, payload, context, initial_outputs)
+        except Exception as exc:  # pylint: disable=broad-except
+            LOGGER.exception("Investigation failed for rule_uid=%s: %s", rule_uid, exc)
+            raise HTTPException(status_code=502, detail=f"LLM investigation error: {exc}") from exc
+        result = {
+            "rule_uid": rule_uid,
+            "alertname": entry.get("name"),
+            "host": alert.get("labels", {}).get("host"),
+            "llm_summary": llm_text,
+            "command_results": command_outputs,
+            "runbook_matched": runbook_matched,
+        }
+        if not runbook_matched and unmatched_reason:
+            result["fallback_reason"] = unmatched_reason
+        results.append(result)
+        send_summary_email(alert, result, context)
+    return {"processed": len(results), "results": results, "unmatched": unmatched}
+
+
+@app.post("/reload-runbook")
+def reload_runbook() -> Dict[str, Any]:
+    global _RUNBOOK_INDEX, _INVENTORY_INDEX, _INVENTORY_GROUP_VARS
+    _RUNBOOK_INDEX = load_runbook()
+    _INVENTORY_INDEX, _INVENTORY_GROUP_VARS = load_ansible_inventory()
+    return {"entries": len(_RUNBOOK_INDEX), "inventory_hosts": len(_INVENTORY_INDEX)}
--- a/stacks/mllogwatcher/scripts/log_monitor.py
+++ b/stacks/mllogwatcher/scripts/log_monitor.py
@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+"""
+Log anomaly checker that queries Elasticsearch and asks an OpenRouter-hosted LLM
+for a quick triage summary. Intended to be run on a schedule (cron/systemd).
+
+Required environment variables:
+  ELASTIC_HOST            e.g. https://casper.localdomain:9200
+  ELASTIC_API_KEY         Base64 ApiKey used for Elasticsearch requests
+  OPENROUTER_API_KEY      Token for https://openrouter.ai/
+
+Optional environment variables:
+  OPENROUTER_MODEL        Model identifier (default: openai/gpt-4o-mini)
+  OPENROUTER_REFERER      Passed through as HTTP-Referer header
+  OPENROUTER_TITLE        Passed through as X-Title header
+"""
+
+from __future__ import annotations
+
+import argparse
+import datetime as dt
+import os
+import sys
+from typing import Any, Iterable
+
+import requests
+
+
+def utc_iso(ts: dt.datetime) -> str:
+    """Return an ISO8601 string with Z suffix."""
+    return ts.replace(microsecond=0).isoformat() + "Z"
+
+
+def query_elasticsearch(
+    host: str,
+    api_key: str,
+    index_pattern: str,
+    minutes: int,
+    size: int,
+    verify: bool,
+) -> list[dict[str, Any]]:
+    """Fetch recent logs from Elasticsearch."""
+    end = dt.datetime.utcnow()
+    start = end - dt.timedelta(minutes=minutes)
+    url = f"{host.rstrip('/')}/{index_pattern}/_search"
+    payload = {
+        "size": size,
+        "sort": [{"@timestamp": {"order": "desc"}}],
+        "query": {
+            "range": {
+                "@timestamp": {
+                    "gte": utc_iso(start),
+                    "lte": utc_iso(end),
+                }
+            }
+        },
+        "_source": ["@timestamp", "message", "host.name", "container.image.name", "log.level"],
+    }
+    headers = {
+        "Authorization": f"ApiKey {api_key}",
+        "Content-Type": "application/json",
+    }
+    response = requests.post(url, json=payload, headers=headers, timeout=30, verify=verify)
+    response.raise_for_status()
+    hits = response.json().get("hits", {}).get("hits", [])
+    return hits
+
+
+def build_prompt(logs: Iterable[dict[str, Any]], limit_messages: int) -> str:
+    """Create the prompt that will be sent to the LLM."""
+    selected = []
+    for idx, hit in enumerate(logs):
+        if idx >= limit_messages:
+            break
+        source = hit.get("_source", {})
+        message = source.get("message") or source.get("event", {}).get("original") or ""
+        timestamp = source.get("@timestamp", "unknown time")
+        host = source.get("host", {}).get("name") or source.get("host", {}).get("hostname") or "unknown-host"
+        container = source.get("container", {}).get("image", {}).get("name") or ""
+        level = source.get("log", {}).get("level") or source.get("log.level") or ""
+        selected.append(
+            f"[{timestamp}] host={host} level={level} container={container}\n{message}".strip()
+        )
+
+    if not selected:
+        return "No logs were returned from Elasticsearch in the requested window."
+
+    prompt = (
+        "You are assisting with HomeLab observability. Review the following log entries collected from "
+        "Elasticsearch and highlight any notable anomalies, errors, or emerging issues. "
+        "Explain the impact and suggest next steps when applicable. "
+        "Use concise bullet points. Logs:\n\n"
+        + "\n\n".join(selected)
+    )
+    return prompt
+
+
+def call_openrouter(prompt: str, model: str, api_key: str, referer: str | None, title: str | None) -> str:
+    """Send prompt to OpenRouter and return the model response text."""
+    url = "https://openrouter.ai/api/v1/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
+    if referer:
+        headers["HTTP-Referer"] = referer
+    if title:
+        headers["X-Title"] = title
+
+    body = {
+        "model": model,
+        "messages": [
+            {"role": "system", "content": "You are a senior SRE helping analyze log anomalies."},
+            {"role": "user", "content": prompt},
+        ],
+    }
+
+    response = requests.post(url, json=body, headers=headers, timeout=60)
+    response.raise_for_status()
+    data = response.json()
+    choices = data.get("choices", [])
+    if not choices:
+        raise RuntimeError("OpenRouter response did not include choices")
+    return choices[0]["message"]["content"]
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Query Elasticsearch and summarize logs with OpenRouter.")
+    parser.add_argument("--host", default=os.environ.get("ELASTIC_HOST"), help="Elasticsearch host URL")
+    parser.add_argument("--api-key", default=os.environ.get("ELASTIC_API_KEY"), help="Elasticsearch ApiKey")
+    parser.add_argument("--index", default="log*", help="Index pattern (default: log*)")
+    parser.add_argument("--minutes", type=int, default=60, help="Lookback window in minutes (default: 60)")
+    parser.add_argument("--size", type=int, default=200, help="Max number of logs to fetch (default: 200)")
+    parser.add_argument("--message-limit", type=int, default=50, help="Max log lines sent to LLM (default: 50)")
+    parser.add_argument("--openrouter-model", default=os.environ.get("OPENROUTER_MODEL", "openai/gpt-4o-mini"))
+    parser.add_argument("--insecure", action="store_true", help="Disable TLS verification for Elasticsearch")
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = parse_args()
+    if not args.host or not args.api_key:
+        print("ELASTIC_HOST and ELASTIC_API_KEY must be provided via environment or CLI", file=sys.stderr)
+        return 1
+
+    logs = query_elasticsearch(
+        host=args.host,
+        api_key=args.api_key,
+        index_pattern=args.index,
+        minutes=args.minutes,
+        size=args.size,
+        verify=not args.insecure,
+    )
+
+    prompt = build_prompt(logs, limit_messages=args.message_limit)
+    if not prompt.strip() or prompt.startswith("No logs"):
+        print(prompt)
+        return 0
+
+    openrouter_key = os.environ.get("OPENROUTER_API_KEY")
+    if not openrouter_key:
+        print("OPENROUTER_API_KEY is required to summarize logs", file=sys.stderr)
+        return 1
+
+    referer = os.environ.get("OPENROUTER_REFERER")
+    title = os.environ.get("OPENROUTER_TITLE", "Elastic Log Monitor")
+    response_text = call_openrouter(
+        prompt=prompt,
+        model=args.openrouter_model,
+        api_key=openrouter_key,
+        referer=referer,
+        title=title,
+    )
+    print(response_text.strip())
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/stacks/mllogwatcher/testing.py
+++ b/stacks/mllogwatcher/testing.py
@ -0,0 +1,17 @@
+# pip install -qU langchain "langchain[anthropic]"
+from langchain.agents import create_agent
+
+def get_weather(city: str) -> str:
+    """Get weather for a given city."""
+    return f"It's always sunny in {city}!"
+
+agent = create_agent(
+    model="claude-sonnet-4-5-20250929",
+    tools=[get_weather],
+    system_prompt="You are a helpful assistant",
+)
+
+# Run the agent
+agent.invoke(
+    {"messages": [{"role": "user", "content": "what is the weather in sf"}]}
+)
--- a/stacks/mllogwatcher/worklog-2025-12-29.txt
+++ b/stacks/mllogwatcher/worklog-2025-12-29.txt
@ -0,0 +1,10 @@
+# Worklog – 2025-12-29
+
+1. Added containerization assets for grafana_alert_webhook:
+   - `Dockerfile`, `.dockerignore`, `docker-compose.yml`, `.env.example`, and consolidated `requirements.txt`.
+   - Compose mounts the runbook, `/etc/ansible/hosts`, and `.ssh` so SSH automation works inside the container.
+   - README now documents the compose workflow.
+2. Copied knight’s SSH key to `.ssh/webhook_id_rsa` and updated `jet-alone` inventory entry with `ansible_user` + `ansible_ssh_private_key_file` so remote commands can run non-interactively.
+3. Updated `OpenRouterLLM` to satisfy Pydantic’s field validation inside the container.
+4. Brought the webhook up under Docker Compose, tested alerts end-to-end, and reverted `OPENROUTER_MODEL` to the valid `openai/gpt-5.1-codex-max`.
+5. Created `/var/core/ansible/ops_baseline.yml` to install sysstat/iotop/smartmontools/hdparm and enforce synchronized Bash history (`/etc/profile.d/99-bash-history.sh`). Ran the playbook against the primary LAN hosts; noted remediation items for the few that failed (outdated mirrors, pending grub configuration, missing sudo password).
--- a/stacks/network-mcp/.env.example
+++ b/stacks/network-mcp/.env.example
@ -0,0 +1,26 @@
+ES_URL=http://elasticsearch:9200
+# Elasticsearch API Key authentication (preferred over user/pass)
+ES_API_ID=
+ES_API_KEY=
+
+# Or, Elasticsearch Basic authentication (if no API key)
+# ES_USER=elastic
+# ES_PASS=changeme
+ES_VERIFY_SSL=false
+
+OPNSENSE_URL=https://192.168.1.1
+OPNSENSE_API_KEY=your_key
+OPNSENSE_API_SECRET=your_secret
+
+COLLECTOR_INTERVAL_SECONDS=60
+
+NMAP_INTERVAL_SECONDS=300
+NMAP_PORT_RANGE=1-1024
+NMAP_BATCH_SIZE=10
+NMAP_DISCOVERY_ENABLED=true
+NMAP_DISCOVERY_INTERVAL_SECONDS=3600
+NMAP_DISCOVERY_VLANS=
+NMAP_DISCOVERY_EXTRA_ARGS="-sn -n"
+NMAP_QUICK_BATCH_SIZE=30
+NMAP_QUICK_EXTRA_ARGS="-sS --top-ports 100 -T4 --open -Pn"
+NMAP_FULL_INTERVAL_SECONDS=86400
--- a/stacks/network-mcp/.env.template
+++ b/stacks/network-mcp/.env.template
@ -0,0 +1,26 @@
+ES_URL=http://elasticsearch:9200
+# Elasticsearch API Key authentication (preferred over user/pass)
+ES_API_ID=
+ES_API_KEY=
+
+# Or, Elasticsearch Basic authentication (if no API key)
+# ES_USER=elastic
+# ES_PASS=changeme
+ES_VERIFY_SSL=false
+
+OPNSENSE_URL=https://192.168.1.1
+OPNSENSE_API_KEY=your_key
+OPNSENSE_API_SECRET=your_secret
+
+COLLECTOR_INTERVAL_SECONDS=60
+
+NMAP_INTERVAL_SECONDS=300
+NMAP_PORT_RANGE=1-1024
+NMAP_BATCH_SIZE=10
+NMAP_DISCOVERY_ENABLED=true
+NMAP_DISCOVERY_INTERVAL_SECONDS=3600
+NMAP_DISCOVERY_VLANS=
+NMAP_DISCOVERY_EXTRA_ARGS="-sn -n"
+NMAP_QUICK_BATCH_SIZE=30
+NMAP_QUICK_EXTRA_ARGS="-sS --top-ports 100 -T4 --open -Pn"
+NMAP_FULL_INTERVAL_SECONDS=86400
--- a/stacks/network-mcp/.gitignore
+++ b/stacks/network-mcp/.gitignore
@ -0,0 +1,11 @@
+.env
+.venv/
+__pycache__/
+*.pyc
+.DS_Store
+
+# Local/infra
+node_modules/
+
+# Logs
+*.log
--- a/stacks/network-mcp/PROJECT_SUMMARY.md
+++ b/stacks/network-mcp/PROJECT_SUMMARY.md
@ -0,0 +1,76 @@
+# Network MCP - Project Summary
+
+## Overview
+This project is a long-running Network MCP service that merges OPNsense discovery data, Nmap scans, and static inventory into Elasticsearch, then exposes both a minimal web UI and a full MCP JSON-RPC interface for LLM agents. It runs via Docker Compose and is now located at `/var/core/network-mcp`.
+
+## What We Built
+- **Collectors**
+  - OPNsense collector ingests DHCP/ARP/DNS and overlays inventory targets.
+  - Nmap collector performs discovery and port scans.
+  - Data lands in Elasticsearch: `network-hosts` (current state) and `network-events-*` (historical events).
+- **Inventory merge**
+  - Inventory data from `inventory_targets.yml` is merged onto live hosts by IP when a MAC is known (so live MAC-based records carry inventory notes/expected ports).
+- **Frontend**
+  - Flask UI + JSON API, containerized with Gunicorn and exposed on port `5001` for LAN access.
+- **MCP server**
+  - JSON-RPC endpoint at `/.well-known/mcp.json` (and `/api/mcp`) supports:
+    - `initialize`, `ping`, `tools/list`, `tools/call`
+    - `resources/list`, `resources/read`, `resources/templates/list`
+  - Tool schemas include titles, descriptions, input/output schemas, and annotations (read-only hints).
+  - Resource templates provide snapshot + query access (e.g. `network://hosts?q=...`).
+- **Search behavior**
+  - Host search is case-insensitive across name/hostname/IP/MAC.
+- **Tests**
+  - Unit tests for REST and MCP search by hostname/IP/MAC, MCP resource reads, and MCP notifications.
+
+## Key Endpoints
+- UI: `http://<host>:5001/`
+- REST:
+  - `GET /api/hosts` (supports `q`, `source`, `limit`)
+  - `GET /api/hosts/<host_id>`
+  - `GET /api/events`
+  - `GET /api/hosts/<host_id>/events`
+  - `GET /api/map`
+- MCP JSON-RPC: `POST /.well-known/mcp.json`
+
+## MCP Tools (JSON-RPC)
+- `list_hosts` (search by hostname/IP/MAC; case-insensitive)
+- `get_host` (optional events)
+- `list_events`
+- `host_events`
+- `network_map`
+
+## MCP Resources
+- `resources/list` -> `network://hosts`, `network://map`, `network://events`
+- `resources/templates/list` -> query templates such as:
+  - `network://hosts{?q,source,limit}`
+  - `network://host/{host_id}{?include_events,events_limit}`
+  - `network://events{?host_id,type,since,limit}`
+
+## Docker & Repo State
+- Repo path: `/var/core/network-mcp`
+- `inventory_targets.yml` lives in the repo and is mounted via compose.
+- Services run via `docker-compose up -d`.
+- Git repo initialized and initial commit created.
+
+## Gotchas / Pitfalls We Hit
+- **MCP handshake**: Codex sent `notifications/initialized` without `id` (notification). Returning a response caused the transport to close. Fixed by treating notifications as no-response.
+- **Case-sensitive search**: Elasticsearch wildcard on `.keyword` fields was case-sensitive, so `seele` didn’t match `SEELE`. Fixed via `case_insensitive: true` in wildcard queries.
+- **Inventory merge duplication**: Initial inventory-only docs were `ip:*` and live docs were `mac:*`, so both existed. Merge now attaches inventory to live MAC records by IP. Legacy `ip:*` docs may remain stale unless cleaned.
+- **MCP errors**: Tool errors are now returned as `CallToolResult` with `isError: true` (instead of JSON-RPC errors), so LLMs can see and correct issues.
+- **Service move**: Repo moved from `/var/core/ansible/network-mcp` to `/var/core/network-mcp`. Compose mount paths updated.
+
+## Verification Performed
+- REST search works for hostname/IP/MAC.
+- MCP `initialize`, `tools/list`, `tools/call` work.
+- MCP resource list/templates/read work.
+- Services verified running via `docker-compose up -d`.
+
+## Future Work Ideas
+- **Cleanup**: Add a cleanup job to remove stale `ip:*` docs after successful MAC merge.
+- **Resource subscriptions**: Implement `resources/subscribe` if clients need push updates.
+- **Auth**: Optional token on the MCP endpoint for shared LAN exposure.
+- **More UI**: Add filters/alerts for stale hosts or missing expected ports.
+- **Metrics**: Export collector stats to detect scan/ingest failures.
+- **Schema mapping**: Improve Elasticsearch mappings for search (e.g., lowercase normalizers for names/hostnames).
+
--- a/stacks/network-mcp/README.md
+++ b/stacks/network-mcp/README.md
@ -0,0 +1,105 @@
+# Network MCP
+
+A "source of truth" for network devices and ports, backed by Elasticsearch, OPNsense, and Nmap.
+
+## Architecture
+
+- **Elasticsearch**: Stores current state (`network-hosts`) and historical events (`network-events-*`).
+- **OPNsense Collector**: Fetches DHCP/ARP/DNS data to discover hosts.
+- **Nmap Collector**: Scans discovered hosts for open ports and OS info.
+
+## Setup
+
+1. **Environment Config**
+   Copy `.env.example` to `.env` and fill in your details:
+   ```bash
+   cp .env.example .env
+   # Edit .env
+   ```
+
+2. **Bootstrap Elastic**
+   Run the bootstrap script (requires `requests` installed locally, or you can run it inside a container):
+   ```bash
+   python3 scripts/bootstrap_indices.py
+   ```
+   *Note: Ensure you have connectivity to your Elasticsearch instance.*
+
+3. **Start Services**
+   ```bash
+   docker-compose up -d --build
+   ```
+   This brings up the collectors and the lightweight frontend (reachable on port `5001`).
+
+## Configuration
+
+- **Static Metadata**: Edit `static/host_metadata.json` to add manual notes, roles, or tags to hosts (keyed by `mac:xx:xx...`).
+- **Intervals**: Adjust polling intervals in `.env`.
+- **VLAN Discovery (default on)**: Discovery sweeps (`nmap -sn`) run periodically across the OPNsense interfaces listed in `NMAP_DISCOVERY_VLANS`. Adjust the list (or set the flag to `false`) if you only want targeted subnets.
+- **Quick vs Full Port Scans**: Each collector loop runs a fast, common-port sweep (`NMAP_QUICK_EXTRA_ARGS`, `NMAP_QUICK_BATCH_SIZE`) while a deeper service scan (`NMAP_PORT_RANGE`, `NMAP_BATCH_SIZE`) is triggered once per `NMAP_FULL_INTERVAL_SECONDS` (default daily). Tune these env vars to balance coverage vs. runtime.
+- **Inventory Overlay**: Entries in `./inventory_targets.yml` are mounted into the OPNsense collector and merged by IP—offline/static hosts from that file (names, notes, expected ports) now appear in `network-hosts` with `source: inventory`.
+
+## Data Model
+
+- **`network-hosts`**: Current state of every known host.
+- **`network-events-YYYY.MM.DD`**: Immutable log of scans and discovery events.
+
+## Usage
+
+Query `network-hosts` for the latest view of your network:
+```json
+GET network-hosts/_search
+{
+  "query": {
+    "match_all": {}
+  }
+}
+```
+
+### Quick Frontend
+
+A minimal Flask frontend is bundled in docker-compose (service `frontend`) and is exposed on port `5001` so it can be reached from other machines:
+
+```bash
+docker-compose up -d frontend
+```
+
+Then visit `http://<host-ip>:5001/` to see the merged view (inventory entries are marked with `source: inventory`). If you prefer to run it without Docker for debugging, follow the steps below:
+
+```bash
+cd network-mcp
+python3 -m venv .venv && source .venv/bin/activate
+pip install -r frontend/requirements.txt
+python frontend/app.py
+```
+
+### MCP / API Endpoints
+
+The frontend doubles as a Model Context Protocol server. It exposes the manifest at `/.well-known/mcp.json` (or `/api/mcp`) and supports the standard JSON-RPC handshake (`initialize`, `tools/list`, `tools/call`) on the same URL. Agents can either use the RPC tools below or hit the underlying REST endpoints directly.
+
+- MCP Resources are also available (`resources/list`, `resources/read`, `resources/templates/list`) for clients that prefer resource-style access to snapshots and queries.
+
+- `GET /api/hosts` – merged host list (supports `limit`, `source`, and repeated `q` params to fuzzy search names, hostnames, IPs, or MACs in a single call).
+- `GET /api/hosts/<host_id>` – single host document with optional `include_events=true`.
+- `GET /api/events` – recent scan/discovery events (`limit`, `host_id`, `type`, `since` filters).
+- `GET /api/hosts/<host_id>/events` – scoped events for a host.
+- `GET /api/map` – high-level “network map” grouping hosts by detected /24 (IPv4) or /64 (IPv6).
+
+RPC tool names (mirrored in the manifest) are:
+
+- `list_hosts` – accepts `{limit, source, terms}` and returns the merged host list.
+- `network_map` – optional `{limit}` for building /24-/64 summaries.
+- `get_host` – requires `{host_id}` plus optional `include_events`, `events_limit`.
+- `list_events` – `{limit, host_id, type, since}`.
+- `host_events` – requires `{host_id}` plus optional `limit`, `type`, `since`.
+
+Resource URI examples:
+
+- `network://hosts?q=seele&limit=50`
+- `network://host/mac:dc:a6:32:67:55:dc?include_events=true&events_limit=50`
+- `network://events?type=discovery&limit=100`
+
+All RPC and REST calls share the Elasticsearch credentials from `.env`, so an agent only needs HTTP access to port `5001` to query hosts, notes, and event timelines. Registering the MCP with Codex looks like:
+
+```bash
+codex mcp install network-mcp http://<host>:5001/.well-known/mcp.json
+```
--- a/stacks/network-mcp/collectors/common/init.py
+++ b/stacks/network-mcp/collectors/common/init.py
--- a/stacks/network-mcp/collectors/common/es_auth.py
+++ b/stacks/network-mcp/collectors/common/es_auth.py
@ -0,0 +1,55 @@
+import base64
+from typing import Optional, Tuple
+
+
+def _clean(value: Optional[str]) -> str:
+    """
+    Normalize values coming from env files where quotes might be preserved.
+    """
+    if not value:
+        return ""
+    return value.strip().strip('"').strip()
+
+
+def resolve_api_key(api_id: Optional[str], api_key: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
+    """
+    Accept various API key formats and return (api_id, api_key).
+    Supported formats:
+      - Explicit ES_API_ID and ES_API_KEY values.
+      - ES_API_KEY that already contains \"id:key\".
+      - ES_API_KEY that is the base64 encoding of \"id:key\".
+    """
+    cleaned_id = _clean(api_id)
+    cleaned_key = _clean(api_key)
+
+    if cleaned_id and cleaned_key:
+        return cleaned_id, cleaned_key
+
+    if not cleaned_key:
+        return None, None
+
+    # Raw "id:key" format
+    if ":" in cleaned_key:
+        potential_id, potential_key = cleaned_key.split(":", 1)
+        if potential_id and potential_key:
+            return potential_id, potential_key
+
+    # Base64 encoded "id:key" format
+    try:
+        decoded = base64.b64decode(cleaned_key, validate=True).decode()
+        if ":" in decoded:
+            potential_id, potential_key = decoded.split(":", 1)
+            if potential_id and potential_key:
+                return potential_id, potential_key
+    except Exception:
+        pass
+
+    return None, None
+
+
+def build_api_key_header(api_id: str, api_key: str) -> str:
+    """
+    Return the value for the Authorization header using ApiKey auth.
+    """
+    token = base64.b64encode(f"{api_id}:{api_key}".encode()).decode()
+    return f"ApiKey {token}"
--- a/stacks/network-mcp/collectors/common/es_client.py
+++ b/stacks/network-mcp/collectors/common/es_client.py
@ -0,0 +1,85 @@
+import os
+import time
+import urllib3
+from elasticsearch import Elasticsearch, helpers
+from .es_auth import resolve_api_key
+from .logging_config import setup_logging
+
+# Suppress insecure request warnings if SSL verification is disabled
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+logger = setup_logging("es_client")
+
+class ESClient:
+    def __init__(self):
+        self.url = os.getenv("ES_URL", "http://localhost:9200")
+        env_api_id = os.getenv("ES_API_ID")
+        env_api_key = os.getenv("ES_API_KEY")
+        self.api_id, self.api_key = resolve_api_key(env_api_id, env_api_key)
+        self.user = os.getenv("ES_USER", "elastic")
+        self.password = os.getenv("ES_PASS", "changeme")
+        self.verify_ssl = os.getenv("ES_VERIFY_SSL", "true").lower() == "true"
+
+        if self.api_id and self.api_key:
+            # Use API key authentication
+            self.client = Elasticsearch(
+                self.url,
+                api_key=(self.api_id, self.api_key),
+                verify_certs=self.verify_ssl,
+                ssl_show_warn=False
+            )
+            logger.info("Using Elasticsearch API key authentication.")
+        else:
+            # Fallback to basic auth
+            self.client = Elasticsearch(
+                self.url,
+                basic_auth=(self.user, self.password),
+                verify_certs=self.verify_ssl,
+                ssl_show_warn=False
+            )
+            logger.info("Using Elasticsearch basic authentication.")
+
+    def check_connection(self):
+        try:
+            return self.client.info()
+        except Exception as e:
+            logger.error(f"Failed to connect to Elasticsearch: {e}")
+            raise
+
+    def bulk_index(self, actions):
+        """
+        Bulk index a list of actions.
+        actions: list of dicts compatible with elasticsearch.helpers.bulk
+        """
+        if not actions:
+            return 0, []
+        
+        try:
+            success, failed = helpers.bulk(self.client, actions, stats_only=False, raise_on_error=False)
+            if failed:
+                logger.warning(f"Bulk index had failures: {len(failed)} items failed.")
+                for item in failed[:5]: # Log first 5 failures
+                    logger.warning(f"Failure sample: {item}")
+            else:
+                logger.info(f"Bulk index successful: {success} items.")
+            return success, failed
+        except Exception as e:
+            logger.error(f"Bulk index exception: {e}")
+            raise
+
+    def search_hosts(self, index="network-hosts", query=None, size=1000):
+        """
+        Search for hosts in network-hosts index.
+        """
+        if query is None:
+            query = {"match_all": {}}
+        
+        try:
+            resp = self.client.search(index=index, query=query, size=size)
+            return [hit["_source"] for hit in resp["hits"]["hits"]]
+        except Exception as e:
+            logger.error(f"Search failed: {e}")
+            return []
+
+def get_es_client():
+    return ESClient()
--- a/stacks/network-mcp/collectors/common/logging_config.py
+++ b/stacks/network-mcp/collectors/common/logging_config.py
@ -0,0 +1,21 @@
+import logging
+import os
+import sys
+
+def setup_logging(name: str = "collector") -> logging.Logger:
+    """
+    Sets up a structured logger.
+    """
+    logger = logging.getLogger(name)
+    level = os.getenv("LOG_LEVEL", "INFO").upper()
+    logger.setLevel(level)
+
+    if not logger.handlers:
+        handler = logging.StreamHandler(sys.stdout)
+        formatter = logging.Formatter(
+            '%(asctime)s [%(levelname)s] %(name)s: %(message)s'
+        )
+        handler.setFormatter(formatter)
+        logger.addHandler(handler)
+
+    return logger
--- a/stacks/network-mcp/collectors/common/nmap_parser.py
+++ b/stacks/network-mcp/collectors/common/nmap_parser.py
@ -0,0 +1,131 @@
+import subprocess
+import xml.etree.ElementTree as ET
+import shutil
+from typing import List, Dict, Optional
+from .logging_config import setup_logging
+
+logger = setup_logging("nmap_parser")
+
+def run_nmap_scan(ips: List[str], extra_args: Optional[List[str]] = None) -> List[Dict]:
+    """
+    Run nmap on the given IPs and return a list of parsed host dicts.
+    """
+    if not ips:
+        return []
+
+    if not shutil.which("nmap"):
+        logger.error("nmap binary not found in PATH")
+        return []
+
+    # Default args: -oX - (XML to stdout)
+    cmd = ["nmap", "-oX", "-"]
+    if extra_args:
+        cmd.extend(extra_args)
+    
+    # Append IPs
+    cmd.extend(ips)
+
+    logger.info(f"Running nmap command: {' '.join(cmd)}")
+    
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+        xml_output = result.stdout
+        return parse_nmap_xml(xml_output)
+    except subprocess.CalledProcessError as e:
+        logger.error(f"Nmap failed: {e.stderr}")
+        return []
+    except Exception as e:
+        logger.error(f"Error running nmap: {e}")
+        return []
+
+def parse_nmap_xml(xml_string: str) -> List[Dict]:
+    """
+    Parse Nmap XML output into our internal host/port structure.
+    """
+    try:
+        root = ET.fromstring(xml_string)
+    except ET.ParseError as e:
+        logger.error(f"Failed to parse Nmap XML: {e}")
+        return []
+
+    hosts = []
+    
+    for host_node in root.findall("host"):
+        # Helper to find basic info
+        ip = None
+        mac = None
+        hostname = None
+        vendor = None
+
+        # Addresses
+        for addr in host_node.findall("address"):
+            addr_type = addr.get("addrtype")
+            if addr_type == "ipv4":
+                ip = addr.get("addr")
+            elif addr_type == "mac":
+                mac = addr.get("addr")
+                vendor = addr.get("vendor")
+        
+        # Hostnames
+        hostnames_node = host_node.find("hostnames")
+        if hostnames_node is not None:
+            # Pick first for now
+            hn = hostnames_node.find("hostname")
+            if hn is not None:
+                hostname = hn.get("name")
+
+        # Ports
+        ports = []
+        ports_node = host_node.find("ports")
+        if ports_node is not None:
+            for port_node in ports_node.findall("port"):
+                state_node = port_node.find("state")
+                state = state_node.get("state") if state_node is not None else "unknown"
+                
+                # Only care about open ports usually, but keep all for now if needed
+                if state != "open":
+                    continue
+
+                port_id = int(port_node.get("portid"))
+                protocol = port_node.get("protocol")
+                
+                service_node = port_node.find("service")
+                service_name = service_node.get("name") if service_node is not None else "unknown"
+                product = service_node.get("product") if service_node is not None else None
+                version = service_node.get("version") if service_node is not None else None
+                
+                service_def = {
+                    "name": service_name,
+                }
+                if product: service_def["product"] = product
+                if version: service_def["version"] = version
+
+                ports.append({
+                    "port": port_id,
+                    "proto": protocol,
+                    "state": state,
+                    "service": service_def
+                })
+
+        # OS detection (basic)
+        os_match = None
+        os_node = host_node.find("os")
+        if os_node is not None:
+            os_match_node = os_node.find("osmatch")
+            if os_match_node is not None:
+                os_match = {
+                    "name": os_match_node.get("name"),
+                    "accuracy": os_match_node.get("accuracy")
+                }
+
+        host_data = {
+            "ip": ip,
+            "mac": mac, # might be None if scanning remote segment
+            "hostname": hostname,
+            "vendor": vendor,
+            "ports": ports,
+            "os_match": os_match
+        }
+        hosts.append(host_data)
+        
+    return hosts
--- a/stacks/network-mcp/collectors/common/opnsense_client.py
+++ b/stacks/network-mcp/collectors/common/opnsense_client.py
@ -0,0 +1,105 @@
+import os
+import requests
+import json
+import ipaddress
+from .logging_config import setup_logging
+
+logger = setup_logging("opnsense_client")
+
+class OPNsenseClient:
+    def __init__(self):
+        self.base_url = os.getenv("OPNSENSE_URL", "https://192.168.1.1").rstrip('/')
+        self.api_key = os.getenv("OPNSENSE_API_KEY")
+        self.api_secret = os.getenv("OPNSENSE_API_SECRET")
+        self.verify_ssl = os.getenv("ES_VERIFY_SSL", "true").lower() == "true" # Reusing verify flag or add explicit OPNSENSE_VERIFY_SSL
+        
+        if not self.api_key or not self.api_secret:
+            logger.warning("OPNSENSE_API_KEY or OPNSENSE_API_SECRET not set. API calls will fail.")
+
+    def _get(self, endpoint, params=None):
+        url = f"{self.base_url}{endpoint}"
+        try:
+            response = requests.get(
+                url,
+                auth=(self.api_key, self.api_secret),
+                verify=self.verify_ssl,
+                params=params,
+                timeout=10
+            )
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            logger.error(f"Failed to fetch {url}: {e}")
+            return {}
+
+    def get_dhcp_leases_v4(self):
+        # Endpoint: /api/dhcpv4/leases/search
+        # Note: 'search' endpoints in OPNsense often expect POST or GET with params for filtering.
+        # Often a simple GET works for 'searchLeases' or similar. 
+        # Standard OPNsense API for leases might be under /api/dhcpv4/leases/searchLeases
+        # Let's try the standard search endpoint.
+        data = self._get("/api/dhcpv4/leases/searchLease") 
+        # API return structure usually: {"rows": [...], "total": ...}
+        return data.get("rows", [])
+
+    def get_arp_table(self):
+        # Endpoint: /api/diagnostics/arp/search
+        # This endpoint returns the ARP table.
+        data = self._get("/api/diagnostics/interface/getArp")
+        # Structure varies, let's assume standard response list or rows
+        # If the standard plugin is used, it might be /api/diagnostics/interface/getArp
+        # Or /api/diagnostics/network/arp ... 
+        # NOTE: OPNsense API paths can be tricky. /api/diagnostics/interface/getArp is a common one.
+        # It returns a list directly or a dict with rows.
+        # Let's assume list of dicts or {"rows": []}
+        if isinstance(data, list):
+            return data
+        return data.get("rows", [])
+
+    def get_dns_overrides(self):
+        # Endpoint: /api/unbound/settings/searchHostOverride
+        data = self._get("/api/unbound/settings/searchHostOverride")
+        return data.get("rows", [])
+
+    def get_vlan_networks(self):
+        """
+        Build a list of IPv4 networks (CIDRs) from the routing table, grouped by interface description.
+        """
+        routes = self._get("/api/diagnostics/interface/getRoutes")
+        networks = []
+        if not isinstance(routes, list):
+            return networks
+
+        seen = set()
+        for route in routes:
+            if route.get("proto") != "ipv4":
+                continue
+            destination = route.get("destination")
+            if not destination or "/" not in destination or destination == "default":
+                continue
+            desc = route.get("intf_description")
+            if not desc:
+                continue
+            try:
+                network = ipaddress.ip_network(destination, strict=False)
+            except ValueError:
+                continue
+            # Skip host routes (/32) which are usually static peers
+            if network.prefixlen == 32:
+                continue
+            if network.prefixlen < 16:
+                continue
+
+            key = (desc, str(network))
+            if key in seen:
+                continue
+            seen.add(key)
+            networks.append({
+                "key": desc,
+                "name": desc,
+                "cidr": str(network)
+            })
+        return networks
+
+def get_opnsense_client():
+    return OPNsenseClient()
--- a/stacks/network-mcp/collectors/nmap_collector/Dockerfile
+++ b/stacks/network-mcp/collectors/nmap_collector/Dockerfile
@ -0,0 +1,14 @@
+FROM python:3.11-slim
+
+RUN apt-get update && apt-get install -y nmap && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+COPY collectors/common /app/collectors/common
+COPY collectors/nmap_collector /app/collectors/nmap_collector
+
+ENV PYTHONPATH=/app
+
+RUN pip install requests elasticsearch==8.15.1
+
+CMD ["python", "collectors/nmap_collector/main.py"]
--- a/stacks/network-mcp/collectors/nmap_collector/main.py
+++ b/stacks/network-mcp/collectors/nmap_collector/main.py
@ -0,0 +1,378 @@
+import os
+import time
+import datetime
+import sys
+import json
+import shlex
+
+# Ensure we can import from common
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
+
+from collectors.common.es_client import get_es_client
+from collectors.common.opnsense_client import get_opnsense_client
+from collectors.common.nmap_parser import run_nmap_scan
+from collectors.common.logging_config import setup_logging
+
+logger = setup_logging("nmap_collector")
+
+def get_now_iso():
+    return datetime.datetime.now(datetime.timezone.utc).isoformat()
+
+def chunk_list(lst, n):
+    for i in range(0, len(lst), n):
+        yield lst[i:i + n]
+
+def should_scan_vlan(vlan, allowlist):
+    if not allowlist:
+        return True
+    name = (vlan.get("name") or "").strip()
+    key = (vlan.get("key") or "").strip()
+    return name in allowlist or key in allowlist
+
+def build_discovery_update_action(host_id, mac, ip, hostname, vendor, ts_iso):
+    mac_norm = mac.lower() if mac else None
+    upsert_host = {
+        "host": {
+            "id": host_id,
+            "macs": [mac_norm] if mac_norm else [],
+            "ips": [ip] if ip else [],
+            "name": hostname,
+            "hostnames": [hostname] if hostname else [],
+            "vendor": vendor,
+            "sources": ["nmap-discovery"],
+            "last_seen": ts_iso,
+            "first_seen": ts_iso
+        }
+    }
+
+    script_source = """
+        if (ctx._source.host == null) { ctx._source.host = [:]; }
+        if (ctx._source.host.macs == null) { ctx._source.host.macs = []; }
+        if (ctx._source.host.ips == null) { ctx._source.host.ips = []; }
+        if (ctx._source.host.hostnames == null) { ctx._source.host.hostnames = []; }
+        if (ctx._source.host.sources == null) { ctx._source.host.sources = []; }
+
+        if (params.mac != null && !ctx._source.host.macs.contains(params.mac)) {
+            ctx._source.host.macs.add(params.mac);
+        }
+        if (params.ip != null && !ctx._source.host.ips.contains(params.ip)) {
+            ctx._source.host.ips.add(params.ip);
+        }
+        if (params.hostname != null && !ctx._source.host.hostnames.contains(params.hostname)) {
+            ctx._source.host.hostnames.add(params.hostname);
+        }
+        if (!ctx._source.host.sources.contains(params.source_tag)) {
+            ctx._source.host.sources.add(params.source_tag);
+        }
+        ctx._source.host.last_seen = params.ts;
+        if (ctx._source.host.name == null && params.hostname != null) {
+            ctx._source.host.name = params.hostname;
+        }
+        if (params.vendor != null && (ctx._source.host.vendor == null || ctx._source.host.vendor == \"\")) {
+            ctx._source.host.vendor = params.vendor;
+        }
+    """
+
+    return {
+        "_index": "network-hosts",
+        "_op_type": "update",
+        "_id": host_id,
+        "script": {
+            "source": script_source,
+            "lang": "painless",
+            "params": {
+                "mac": mac_norm,
+                "ip": ip,
+                "hostname": hostname,
+                "vendor": vendor,
+                "ts": ts_iso,
+                "source_tag": "nmap-discovery"
+            }
+        },
+        "upsert": upsert_host
+    }
+
+def run_vlan_discovery(es, opnsense_client, discovery_args, vlan_filter):
+    networks = opnsense_client.get_vlan_networks()
+    if not networks:
+        logger.info("VLAN discovery skipped: OPNsense returned no interfaces.")
+        return
+
+    scoped_networks = [n for n in networks if should_scan_vlan(n, vlan_filter)]
+    if not scoped_networks:
+        logger.info("VLAN discovery skipped: no interfaces matched NMAP_DISCOVERY_VLANS.")
+        return
+
+    actions = []
+    today = datetime.datetime.now().strftime("%Y.%m.%d")
+    event_index = f"network-events-{today}"
+
+    for vlan in scoped_networks:
+        cidr = vlan.get("cidr")
+        if not cidr:
+            continue
+        logger.info(f"VLAN discovery scan for {vlan.get('name')} ({cidr})")
+        scan_ts = get_now_iso()
+        scan_id = f"nmap_discovery_{vlan.get('name')}_{scan_ts}"
+        results = run_nmap_scan([cidr], discovery_args)
+
+        for res in results:
+            ip = res.get("ip")
+            if not ip:
+                continue
+
+            mac = res.get("mac")
+            hostname = res.get("hostname")
+            vendor = res.get("vendor")
+            host_id = f"mac:{mac.lower()}" if mac else None
+
+            event_doc = {
+                "@timestamp": scan_ts,
+                "source": "nmap-discovery",
+                "scan_id": scan_id,
+                "vlan": vlan.get("name"),
+                "cidr": cidr,
+                "host": {
+                    "id": host_id,
+                    "ip": ip,
+                    "mac": mac,
+                    "hostname": hostname,
+                    "vendor": vendor
+                }
+            }
+            actions.append({
+                "_index": event_index,
+                "_op_type": "index",
+                "_source": event_doc
+            })
+
+            if host_id:
+                actions.append(
+                    build_discovery_update_action(host_id, mac, ip, hostname, vendor, scan_ts)
+                )
+
+    if actions:
+        logger.info(f"VLAN discovery produced {len(actions)} Elasticsearch actions.")
+        es.bulk_index(actions)
+    else:
+        logger.info("VLAN discovery finished with no hosts discovered.")
+
+def main():
+    es = get_es_client()
+    opnsense_client = get_opnsense_client()
+    
+    interval = int(os.getenv("NMAP_INTERVAL_SECONDS", "300"))
+    full_batch_size = int(os.getenv("NMAP_BATCH_SIZE", "10"))
+    quick_batch_size = int(os.getenv("NMAP_QUICK_BATCH_SIZE", "30"))
+    port_range = os.getenv("NMAP_PORT_RANGE", "1-1024") # Full scan range
+    discovery_enabled = os.getenv("NMAP_DISCOVERY_ENABLED", "false").lower() == "true"
+    discovery_interval = int(os.getenv("NMAP_DISCOVERY_INTERVAL_SECONDS", "3600"))
+    discovery_vlan_filter = [v.strip() for v in os.getenv("NMAP_DISCOVERY_VLANS", "").split(",") if v.strip()]
+    discovery_extra_args = os.getenv("NMAP_DISCOVERY_EXTRA_ARGS", "-sn -n").strip()
+    if discovery_extra_args:
+        discovery_extra_args = shlex.split(discovery_extra_args)
+    else:
+        discovery_extra_args = ["-sn", "-n"]
+    discovery_last_run = time.time() - discovery_interval if discovery_enabled else 0.0
+    full_interval = int(os.getenv("NMAP_FULL_INTERVAL_SECONDS", "86400"))
+    quick_extra_str = os.getenv("NMAP_QUICK_EXTRA_ARGS", "-sS --top-ports 100 -T4 --open -Pn").strip()
+    quick_extra_args = shlex.split(quick_extra_str) if quick_extra_str else ["-sS", "--top-ports", "100", "-T4", "--open", "-Pn"]
+    last_full_scan = time.time()
+    
+    # Construct base nmap args
+    # -sV for service version, -O for OS detection (requires root usually), --open to only show open
+    # We run as root in docker (usually) or need capabilities.
+    extra_args = ["-sV", "--open"]
+    
+    # Check if port_range looks like a range or specific ports
+    if port_range:
+        extra_args.extend(["-p", port_range])
+    
+    # Add user provided extra args
+    user_args = os.getenv("NMAP_EXTRA_ARGS", "")
+    if user_args:
+        extra_args.extend(user_args.split())
+
+    logger.info("Starting Nmap collector loop...")
+
+    while True:
+        try:
+            start_time = time.time()
+            ts_iso = get_now_iso()
+            now = time.time()
+            use_full_scan = (now - last_full_scan) >= full_interval
+            scan_type = "full" if use_full_scan else "quick"
+            scan_id = f"nmap_{scan_type}_{ts_iso}"
+            current_batch_size = full_batch_size if use_full_scan else quick_batch_size
+            scan_args = extra_args if use_full_scan else quick_extra_args
+
+            if use_full_scan:
+                last_full_scan = now
+                logger.info("Running scheduled full service scan.")
+            else:
+                logger.info("Running quick common-port sweep.")
+
+            if discovery_enabled and (time.time() - discovery_last_run) >= discovery_interval:
+                run_vlan_discovery(es, opnsense_client, discovery_extra_args, discovery_vlan_filter)
+                discovery_last_run = time.time()
+            
+            # 1. Get targets from ES
+            # We only want hosts that have an IP.
+            hosts = es.search_hosts(index="network-hosts", size=1000)
+            
+            # Extract IPs to scan. Map IP -> Host ID to correlate back
+            targets = []
+            ip_to_host_id = {}
+            
+            for h in hosts:
+                # h is {"host": {...}, "ports": [...]}
+                host_info = h.get("host", {})
+                hid = host_info.get("id")
+                ips = host_info.get("ips", [])
+                
+                if not hid or not ips:
+                    continue
+                    
+                # Pick the "best" IP? Or scan all? 
+                # Scaning all might be duplicate work if they point to same box.
+                # Let's pick the first one for now.
+                target_ip = ips[0] 
+                targets.append(target_ip)
+                ip_to_host_id[target_ip] = hid
+
+            logger.info(f"Found {len(targets)} targets to scan ({scan_type}).")
+
+            total_processed = 0
+            logger.info(f"Scanning {scan_type} run with {len(targets)} targets.")
+            scan_results = run_nmap_scan(targets, scan_args)
+            actions = []
+            today = datetime.datetime.now().strftime("%Y.%m.%d")
+            event_index = f"network-events-{today}"
+
+            for res in scan_results:
+                ip = res.get("ip")
+                if not ip or ip not in ip_to_host_id:
+                    continue
+                
+                hid = ip_to_host_id[ip]
+                total_processed += 1
+                
+                for p in res["ports"]:
+                    p["last_seen"] = ts_iso
+                    p["last_scan_id"] = scan_id
+                
+                event_doc = {
+                    "@timestamp": ts_iso,
+                    "source": "nmap",
+                    "scan_id": scan_id,
+                    "host": {"id": hid, "ip": ip},
+                    "ports": res["ports"],
+                    "os": res.get("os_match")
+                }
+                actions.append({
+                    "_index": event_index,
+                    "_op_type": "index",
+                    "_source": event_doc
+                })
+                
+                script_source = """
+                        if (ctx._source.host == null) { ctx._source.host = [:]; }
+                        if (ctx._source.host.sources == null) { ctx._source.host.sources = []; }
+                        if (!ctx._source.host.sources.contains('nmap')) {
+                            ctx._source.host.sources.add('nmap');
+                        }
+                        ctx._source.host.last_seen = params.ts;
+                        
+                        if (params.os != null) {
+                            ctx._source.host.os = params.os;
+                        }
+
+                        if (ctx._source.ports == null) {
+                            ctx._source.ports = [];
+                        }
+
+                        for (new_p in params.new_ports) {
+                            boolean found = false;
+                            for (old_p in ctx._source.ports) {
+                                if (old_p.port == new_p.port && old_p.proto == new_p.proto) {
+                                    old_p.last_seen = params.ts;
+                                    old_p.state = new_p.state;
+                                    old_p.service = new_p.service;
+                                    old_p.last_scan_id = params.scan_id;
+                                    found = true;
+                                    break;
+                                }
+                            }
+                            if (!found) {
+                                new_p.first_seen = params.ts;
+                                ctx._source.ports.add(new_p);
+                            }
+                        }
+                    """
+                    
+                actions.append({
+                    "_index": "network-hosts",
+                    "_op_type": "update",
+                    "_id": hid,
+                    "script": {
+                        "source": script_source,
+                        "lang": "painless",
+                        "params": {
+                            "ts": ts_iso,
+                            "os": res.get("os_match"),
+                            "new_ports": res["ports"],
+                            "scan_id": scan_id
+                        }
+                    }
+                })
+
+                for p in res["ports"]:
+                    svc_id = f"{hid}:{p['proto']}:{p['port']}"
+                    svc_script = """
+                        ctx._source.last_seen = params.ts;
+                        ctx._source.state = params.state;
+                        ctx._source.service = params.service;
+                        if (ctx._source.first_seen == null) {
+                            ctx._source.first_seen = params.ts;
+                        }
+                    """
+                    actions.append({
+                        "_index": "network-services",
+                        "_op_type": "update",
+                        "_id": svc_id,
+                        "script": {
+                            "source": svc_script,
+                            "lang": "painless",
+                            "params": {
+                                "ts": ts_iso,
+                                "state": p["state"],
+                                "service": p["service"]
+                            }
+                        },
+                        "upsert": {
+                            "host_id": hid,
+                            "host_ip": ip,
+                            "port": p["port"],
+                            "proto": p["proto"],
+                            "service": p["service"],
+                            "state": p["state"],
+                            "last_seen": ts_iso,
+                            "first_seen": ts_iso,
+                            "sources": ["nmap"]
+                        }
+                    })
+
+            if actions:
+                es.bulk_index(actions)
+
+            elapsed = time.time() - start_time
+            sleep_time = max(0, interval - elapsed)
+            logger.info(f"Nmap {scan_type} cycle done. Scanned {total_processed} hosts in {elapsed:.2f}s. Sleeping {sleep_time:.2f}s")
+            time.sleep(sleep_time)
+
+        except Exception as e:
+            logger.error(f"Error in Nmap loop: {e}")
+            time.sleep(10)
+
+if __name__ == "__main__":
+    main()
--- a/stacks/network-mcp/collectors/opnsense_collector/Dockerfile
+++ b/stacks/network-mcp/collectors/opnsense_collector/Dockerfile
@ -0,0 +1,14 @@
+FROM python:3.11-slim
+
+WORKDIR /app
+
+COPY collectors/common /app/collectors/common
+COPY collectors/opnsense_collector /app/collectors/opnsense_collector
+
+# We need to make sure the module path works. 
+# The main.py does sys.path.append, but cleanest is to set PYTHONPATH.
+ENV PYTHONPATH=/app
+
+RUN pip install requests elasticsearch==8.15.1 pyyaml
+
+CMD ["python", "collectors/opnsense_collector/main.py"]
--- a/stacks/network-mcp/collectors/opnsense_collector/main.py
+++ b/stacks/network-mcp/collectors/opnsense_collector/main.py
@ -0,0 +1,261 @@
+import os
+import time
+import json
+import datetime
+import sys
+import yaml
+
+# Ensure we can import from common
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
+
+from collectors.common.es_client import get_es_client
+from collectors.common.opnsense_client import get_opnsense_client
+from collectors.common.logging_config import setup_logging
+
+logger = setup_logging("opnsense_collector")
+
+def load_static_metadata(path="/app/static/host_metadata.json"):
+    if not os.path.exists(path):
+        logger.info(f"No static metadata found at {path}")
+        return {}
+    try:
+        with open(path, 'r') as f:
+            return json.load(f)
+    except Exception as e:
+        logger.error(f"Failed to load static metadata: {e}")
+        return {}
+
+def load_inventory_targets(path=None):
+    path = path or os.getenv("INVENTORY_FILE", "/app/inventory_targets.yml")
+    if not os.path.exists(path):
+        logger.info(f"No inventory targets found at {path}")
+        return []
+    try:
+        with open(path, 'r') as f:
+            data = yaml.safe_load(f) or {}
+            return data.get("inventory_targets", [])
+    except Exception as e:
+        logger.error(f"Failed to load inventory targets: {e}")
+        return []
+
+def normalize_mac(mac):
+    if not mac:
+        return None
+    return mac.lower().replace("-", ":")
+
+def get_now_iso():
+    return datetime.datetime.now(datetime.timezone.utc).isoformat()
+
+def main():
+    es = get_es_client()
+    opn = get_opnsense_client()
+    
+    interval = int(os.getenv("COLLECTOR_INTERVAL_SECONDS", "60"))
+    
+    logger.info("Starting OPNsense collector loop...")
+
+    while True:
+        try:
+            start_time = time.time()
+            ts_iso = get_now_iso()
+            
+            # 1. Fetch Data
+            dhcp_v4 = opn.get_dhcp_leases_v4()
+            arp_table = opn.get_arp_table()
+            dns_overrides = opn.get_dns_overrides()
+            
+            static_meta = load_static_metadata()
+            inventory_entries = load_inventory_targets()
+            
+            # 2. Process Data -> hosts map
+            # Key: identifier (mac:xx... or ip:xxx)
+            hosts_map = {}
+
+            def create_host_entry(identifier):
+                return {
+                    "id": identifier,
+                    "macs": set(),
+                    "ips": set(),
+                    "hostnames": set(),
+                    "sources": set(),
+                    "preferred_name": None,
+                    "inventory_notes": None,
+                    "inventory_ports": None
+                }
+
+            def get_or_create_host(mac):
+                norm_mac = normalize_mac(mac)
+                if not norm_mac:
+                    return None
+                identifier = f"mac:{norm_mac}"
+                host = hosts_map.setdefault(identifier, create_host_entry(identifier))
+                host["macs"].add(norm_mac)
+                return host
+
+            def get_or_create_host_by_ip(ip):
+                if not ip:
+                    return None
+                identifier = f"ip:{ip}"
+                host = hosts_map.setdefault(identifier, create_host_entry(identifier))
+                host["ips"].add(ip)
+                return host
+
+            # Process DHCP
+            for lease in dhcp_v4:
+                # Structure depends on OPNsense version, but usually has 'mac', 'address', 'hostname'
+                mac = lease.get('mac') or lease.get('hw_address')
+                ip = lease.get('address') or lease.get('ip')
+                hostname = lease.get('hostname')
+                
+                host = get_or_create_host(mac)
+                if host:
+                    if ip: host["ips"].add(ip)
+                    if hostname: host["hostnames"].add(hostname)
+                    host["sources"].add("opnsense-dhcp")
+
+            # Process ARP
+            for entry in arp_table:
+                # Structure: 'mac', 'ip', 'hostname' (sometimes)
+                mac = entry.get('mac')
+                ip = entry.get('ip')
+                hostname = entry.get('hostname')
+                
+                host = get_or_create_host(mac)
+                if host:
+                    if ip: host["ips"].add(ip)
+                    if hostname and hostname != "?": host["hostnames"].add(hostname)
+                    host["sources"].add("opnsense-arp")
+
+            # Process DNS Overrides (mapped by IP when possible)
+            ip_to_identifier = {}
+            for identifier, h in hosts_map.items():
+                for ip in h["ips"]:
+                    ip_to_identifier[ip] = identifier
+
+            for override in dns_overrides:
+                ip = override.get('ip')
+                domain = override.get('domain')
+                hostname = override.get('hostname')
+                full_fqdn = f"{hostname}.{domain}" if hostname and domain else hostname
+                
+                if ip and ip in ip_to_identifier:
+                    identifier = ip_to_identifier[ip]
+                    if full_fqdn:
+                        hosts_map[identifier]["hostnames"].add(full_fqdn)
+                    hosts_map[identifier]["sources"].add("opnsense-dns")
+
+            # Process inventory targets (by IP)
+            for entry in inventory_entries:
+                ip = entry.get("ip")
+                if not ip:
+                    continue
+                identifier = ip_to_identifier.get(ip)
+                if identifier:
+                    host = hosts_map.get(identifier)
+                    if host is None:
+                        host = get_or_create_host_by_ip(ip)
+                        ip_to_identifier[ip] = host["id"]
+                else:
+                    host = get_or_create_host_by_ip(ip)
+                    if host:
+                        ip_to_identifier[ip] = host["id"]
+                if not host:
+                    continue
+                hostname = entry.get("hostname")
+                name = entry.get("name")
+                if hostname:
+                    host["hostnames"].add(hostname)
+                if name:
+                    host["hostnames"].add(name)
+                    host["preferred_name"] = name
+                host["sources"].add("inventory")
+                notes = entry.get("notes")
+                if notes:
+                    host["inventory_notes"] = notes
+                ports = entry.get("ports")
+                if ports:
+                    host["inventory_ports"] = ports
+
+            # 3. Build Actions
+            actions = []
+            today = datetime.datetime.now().strftime("%Y.%m.%d")
+            event_index = f"network-events-{today}"
+            
+            for _, h_data in hosts_map.items():
+                name = h_data.get("preferred_name")
+                if not name and h_data["hostnames"]:
+                    name = next(iter(h_data["hostnames"]))
+
+                final_host = {
+                    "host": {
+                        "id": h_data["id"],
+                        "macs": list(h_data["macs"]),
+                        "ips": list(h_data["ips"]),
+                        "name": name,
+                        "hostnames": list(h_data["hostnames"]),
+                        "last_seen": ts_iso,
+                        "sources": list(h_data["sources"])
+                    }
+                }
+
+                if h_data.get("inventory_notes"):
+                    final_host["host"]["notes"] = h_data["inventory_notes"]
+                if h_data.get("inventory_ports"):
+                    final_host["host"]["expected_ports"] = h_data["inventory_ports"]
+                # Merge Static Metadata
+                if h_data["id"] in static_meta:
+                    meta = static_meta[h_data["id"]]
+                    # Merge fields
+                    for k, v in meta.items():
+                        final_host["host"][k] = v
+                        
+                # 3a. Event Document
+                event_doc = {
+                    "@timestamp": ts_iso,
+                    "source": "opnsense",
+                    "scan_id": f"opnsense_{ts_iso}",
+                    "host": final_host["host"]
+                }
+                actions.append({
+                    "_index": event_index,
+                    "_op_type": "index",
+                    "_source": event_doc
+                })
+                
+                # 3b. Host Upsert
+                # We use a script upsert or doc_as_upsert. 
+                # doc_as_upsert is simpler but replaces lists. 
+                # Ideally we merge lists (ips, macs), but for now replacing with latest 'truth' from OPNsense + Static is okay.
+                # However, we don't want to lose 'ports' info from Nmap.
+                # So we must NOT overwrite 'ports'.
+                
+                host_update_doc = {
+                    "host": final_host["host"]
+                }
+                
+                actions.append({
+                    "_index": "network-hosts",
+                    "_op_type": "update",
+                    "_id": h_data["id"],
+                    "doc": host_update_doc,
+                    "doc_as_upsert": True
+                })
+
+            # 4. Send to ES
+            if actions:
+                logger.info(f"Sending {len(actions)} actions to Elasticsearch...")
+                success, failed = es.bulk_index(actions)
+            else:
+                logger.info("No hosts found or no actions generated.")
+
+            elapsed = time.time() - start_time
+            sleep_time = max(0, interval - elapsed)
+            logger.info(f"Cycle done in {elapsed:.2f}s. Sleeping for {sleep_time:.2f}s")
+            time.sleep(sleep_time)
+
+        except Exception as e:
+            logger.error(f"Error in main loop: {e}")
+            time.sleep(10)
+
+if __name__ == "__main__":
+    main()
--- a/stacks/network-mcp/docker-compose.yml
+++ b/stacks/network-mcp/docker-compose.yml
@ -0,0 +1,43 @@
+version: "3.9"
+
+services:
+  frontend:
+    build:
+      context: .
+      dockerfile: frontend/Dockerfile
+    restart: always
+    env_file:
+      - .env
+    environment:
+      FRONTEND_PORT: "5001"
+    ports:
+      - "5001:5001"
+
+  opnsense_collector:
+    build:
+      context: .
+      dockerfile: collectors/opnsense_collector/Dockerfile
+    restart: always
+    env_file:
+      - .env
+    volumes:
+      - ./static:/app/static
+      - ./inventory_targets.yml:/app/inventory_targets.yml:ro
+    environment:
+      COLLECTOR_INTERVAL_SECONDS: "60"
+      INVENTORY_FILE: "/app/inventory_targets.yml"
+
+  nmap_collector:
+    build:
+      context: .
+      dockerfile: collectors/nmap_collector/Dockerfile
+    restart: always
+    cap_add:
+      - NET_RAW
+      - NET_ADMIN
+    env_file:
+      - .env
+    environment:
+      NMAP_INTERVAL_SECONDS: "300"
+      NMAP_PORT_RANGE: "1-1024"
+      NMAP_BATCH_SIZE: "10"
--- a/stacks/network-mcp/frontend/Dockerfile
+++ b/stacks/network-mcp/frontend/Dockerfile
@ -0,0 +1,15 @@
+FROM python:3.11-slim
+
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+
+WORKDIR /app
+
+COPY frontend/requirements.txt /tmp/requirements.txt
+RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
+
+COPY frontend/ /app/
+
+EXPOSE 5001
+
+CMD ["gunicorn", "--bind", "0.0.0.0:5001", "app:app"]
--- a/stacks/network-mcp/frontend/init.py
+++ b/stacks/network-mcp/frontend/init.py
@ -0,0 +1,2 @@
+"""Network MCP frontend package (used for local testing/imports)."""
+
--- a/stacks/network-mcp/frontend/app.py
+++ b/stacks/network-mcp/frontend/app.py
@ -0,0 +1,934 @@
+import base64
+import json
+import os
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+from urllib.parse import parse_qs, unquote, urlparse
+
+import requests
+from dotenv import load_dotenv
+from flask import Flask, abort, jsonify, render_template, request
+
+BASE_DIR = Path(__file__).resolve().parent.parent
+env_path = BASE_DIR / ".env"
+if env_path.exists():
+    load_dotenv(env_path)
+
+ES_URL = os.getenv("ES_URL", "http://localhost:9200").rstrip("/")
+ES_VERIFY_SSL = os.getenv("ES_VERIFY_SSL", "false").lower() == "true"
+
+app = Flask(__name__)
+
+HOST_SEARCH_LIMIT = int(os.getenv("FRONTEND_HOST_LIMIT", "1000"))
+DEFAULT_EVENT_LIMIT = int(os.getenv("FRONTEND_EVENT_LIMIT", "200"))
+SERVER_VERSION = os.getenv("NETWORK_MCP_VERSION", "0.1.0")
+
+REST_TOOLS = [
+    {
+        "name": "list_hosts",
+        "description": "Return the merged view of every known device on the network (searchable by hostname, IP, or MAC).",
+        "method": "GET",
+        "path": "/api/hosts",
+    },
+    {
+        "name": "network_map",
+        "description": "Summarize hosts grouped by detected /24 (IPv4) or /64 (IPv6) networks.",
+        "method": "GET",
+        "path": "/api/map",
+    },
+    {
+        "name": "get_host",
+        "description": "Fetch a single host document by ID (e.g. ip:192.168.5.10).",
+        "method": "GET",
+        "path": "/api/hosts/{host_id}",
+    },
+    {
+        "name": "list_events",
+        "description": "List recent scan/discovery events with filters for host, type, or time range.",
+        "method": "GET",
+        "path": "/api/events",
+    },
+    {
+        "name": "host_events",
+        "description": "List the recent events associated with a specific host.",
+        "method": "GET",
+        "path": "/api/hosts/{host_id}/events",
+    },
+]
+
+
+def tool_schema(description: str, properties: Dict[str, Any], required: Optional[List[str]] = None, title: Optional[str] = None):
+    schema: Dict[str, Any] = {
+        "type": "object",
+        "description": description,
+        "properties": properties,
+        "additionalProperties": False,
+    }
+    if required:
+        schema["required"] = required
+    if title:
+        schema["title"] = title
+    return schema
+
+
+PORT_SCHEMA = tool_schema(
+    "Observed port entry.",
+    {
+        "port": {"type": "integer", "description": "Port number."},
+        "state": {"type": "string", "description": "State reported by nmap (e.g. open, closed)."},
+        "service": {"type": "string", "description": "Detected service name, if available."},
+    },
+    required=["port"],
+    title="Port",
+)
+
+HOST_SCHEMA = tool_schema(
+    "Host summary merged from inventory, OPNsense, and nmap.",
+    {
+        "id": {"type": "string", "description": "Stable host identifier (ip:* or mac:*)."},
+        "name": {"type": "string", "description": "Best-known display name."},
+        "ips": {"type": "array", "items": {"type": "string"}, "description": "Associated IP addresses."},
+        "macs": {"type": "array", "items": {"type": "string"}, "description": "Observed MAC addresses."},
+        "hostnames": {"type": "array", "items": {"type": "string"}, "description": "DNS or hostnames discovered."},
+        "sources": {"type": "array", "items": {"type": "string"}, "description": "Data sources contributing to this record."},
+        "last_seen": {"type": "string", "description": "ISO timestamp of the most recent observation."},
+        "notes": {"type": "string", "description": "Inventory notes/annotations, if present."},
+        "expected_ports": {"type": "array", "items": {"type": "string"}, "description": "Ports expected per inventory targets."},
+        "ports": {"type": "array", "items": PORT_SCHEMA, "description": "Latest observed open ports."},
+    },
+    required=["id"],
+    title="Host",
+)
+
+EVENT_SCHEMA = tool_schema(
+    "Scan or discovery event emitted by collectors.",
+    {
+        "id": {"type": "string", "description": "Event document identifier."},
+        "timestamp": {"type": "string", "description": "Observation timestamp (@timestamp)."},
+        "source": {"type": "string", "description": "Collector that produced the event (nmap, opnsense, inventory)."},
+        "event": {"type": "object", "description": "Event metadata (type, outcome)."},
+        "host": HOST_SCHEMA,
+        "ports": {"type": "array", "items": PORT_SCHEMA, "description": "Ports included with the event (if any)."},
+    },
+    required=["id", "timestamp"],
+    title="Event",
+)
+
+NETWORK_ENTRY_SCHEMA = tool_schema(
+    "Network grouping entry showing hosts per /24 or /64.",
+    {
+        "cidr": {"type": "string", "description": "CIDR label (e.g. 192.168.5.0/24)."},
+        "hosts": {"type": "array", "items": HOST_SCHEMA, "description": "Hosts that belong to this network."},
+    },
+    required=["cidr", "hosts"],
+    title="NetworkEntry",
+)
+
+
+MCP_TOOL_DEFINITIONS = {
+    "list_hosts": {
+        "title": "List Hosts",
+        "description": "Return the merged view of every known device on the network with optional filtering by source or identifier.",
+        "annotations": {"readOnlyHint": True, "destructiveHint": False, "openWorldHint": False},
+        "inputSchema": tool_schema(
+            "Filter options when listing hosts.",
+            {
+                "limit": {"type": "integer", "minimum": 1, "maximum": 5000, "title": "Limit", "description": "Maximum number of hosts to return."},
+                "source": {"type": "string", "title": "Source filter", "description": "Only include hosts that contain this source tag (e.g. inventory, nmap, opnsense-arp)."},
+                "terms": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "title": "Search terms",
+                    "description": "Identifiers (names, hostnames, IPs, or MACs) to match. Equivalent to repeated q parameters in the REST API.",
+                },
+            },
+            title="ListHostsInput",
+        ),
+        "outputSchema": tool_schema(
+            "Host list result payload.",
+            {
+                "total": {"type": "integer", "description": "Number of hosts returned."},
+                "hosts": {"type": "array", "items": HOST_SCHEMA, "description": "Host entries sorted by last-seen time."},
+            },
+            required=["total", "hosts"],
+            title="ListHostsResult",
+        ),
+    },
+    "network_map": {
+        "title": "Network Map",
+        "description": "Summarize hosts grouped by detected /24 (IPv4) or /64 (IPv6) ranges.",
+        "annotations": {"readOnlyHint": True, "destructiveHint": False, "openWorldHint": False},
+        "inputSchema": tool_schema(
+            "Options when generating the network grouping.",
+            {
+                "limit": {"type": "integer", "minimum": 1, "maximum": 5000, "title": "Host limit", "description": "Maximum number of hosts to consider when building the map."},
+            },
+            title="NetworkMapInput",
+        ),
+        "outputSchema": tool_schema(
+            "Grouped view of networks and their hosts.",
+            {
+                "host_count": {"type": "integer", "description": "Number of hosts examined for this map."},
+                "networks": {"type": "array", "items": NETWORK_ENTRY_SCHEMA, "description": "List of network segments and their hosts."},
+            },
+            required=["host_count", "networks"],
+            title="NetworkMapResult",
+        ),
+    },
+    "get_host": {
+        "title": "Get Host",
+        "description": "Fetch a single host document by ID, optionally including recent events.",
+        "annotations": {"readOnlyHint": True, "destructiveHint": False, "openWorldHint": False},
+        "inputSchema": tool_schema(
+            "Parameters for retrieving an individual host.",
+            {
+                "host_id": {"type": "string", "title": "Host ID", "description": "Host identifier (e.g. ip:192.168.5.10, mac:aa:bb:cc...)."},
+                "include_events": {"type": "boolean", "title": "Include events", "description": "If true, include recent events for the host."},
+                "events_limit": {"type": "integer", "minimum": 1, "maximum": 1000, "title": "Events limit", "description": "Number of events to include if requested."},
+            },
+            required=["host_id"],
+            title="GetHostInput",
+        ),
+        "outputSchema": tool_schema(
+            "Host payload with optional embedded events.",
+            {
+                "host": HOST_SCHEMA,
+                "events": {"type": "array", "items": EVENT_SCHEMA, "description": "Recent events when include_events=true."},
+            },
+            required=["host"],
+            title="GetHostResult",
+        ),
+    },
+    "list_events": {
+        "title": "List Events",
+        "description": "List recent scan/discovery events with optional filters.",
+        "annotations": {"readOnlyHint": True, "destructiveHint": False, "openWorldHint": False},
+        "inputSchema": tool_schema(
+            "Filters applied when querying events.",
+            {
+                "limit": {"type": "integer", "minimum": 1, "maximum": 1000, "title": "Limit", "description": "Maximum number of events to return."},
+                "host_id": {"type": "string", "title": "Host filter", "description": "Only include events for this host identifier."},
+                "type": {"type": "string", "title": "Event type", "description": "Restrict to a specific event type (e.g. scan, discovery)."},
+                "since": {"type": "string", "title": "Since timestamp", "description": "ISO8601 timestamp used as a lower bound for @timestamp."},
+            },
+            title="ListEventsInput",
+        ),
+        "outputSchema": tool_schema(
+            "Event search result.",
+            {
+                "total": {"type": "integer", "description": "Number of events returned."},
+                "events": {"type": "array", "items": EVENT_SCHEMA, "description": "Event documents sorted by timestamp."},
+            },
+            required=["total", "events"],
+            title="ListEventsResult",
+        ),
+    },
+    "host_events": {
+        "title": "Host Events",
+        "description": "List recent events associated with a specific host.",
+        "annotations": {"readOnlyHint": True, "destructiveHint": False, "openWorldHint": False},
+        "inputSchema": tool_schema(
+            "Parameters when retrieving events bound to a single host.",
+            {
+                "host_id": {"type": "string", "title": "Host ID", "description": "Host identifier to filter by."},
+                "limit": {"type": "integer", "minimum": 1, "maximum": 1000, "title": "Limit", "description": "Maximum number of events to return."},
+                "type": {"type": "string", "title": "Event type", "description": "Restrict to a specific event type (e.g. scan, discovery)."},
+                "since": {"type": "string", "title": "Since timestamp", "description": "ISO8601 timestamp used as a lower bound for @timestamp."},
+            },
+            required=["host_id"],
+            title="HostEventsInput",
+        ),
+        "outputSchema": tool_schema(
+            "Event list scoped to a host.",
+            {
+                "total": {"type": "integer", "description": "Number of events returned for the host."},
+                "events": {"type": "array", "items": EVENT_SCHEMA, "description": "Host-specific event entries."},
+            },
+            required=["total", "events"],
+            title="HostEventsResult",
+        ),
+    },
+}
+
+
+def resolve_api_key(api_id: str, api_key: str):
+    if api_id and api_key:
+        return api_id, api_key
+    if not api_key:
+        return None, None
+    if ":" in api_key:
+        possible_id, possible_key = api_key.split(":", 1)
+        return possible_id, possible_key
+    try:
+        decoded = base64.b64decode(api_key).decode()
+        if ":" in decoded:
+            possible_id, possible_key = decoded.split(":", 1)
+            return possible_id, possible_key
+    except Exception:
+        pass
+    return None, None
+
+
+def build_es_request():
+    headers = {}
+    auth = None
+
+    api_id = os.getenv("ES_API_ID")
+    api_key = os.getenv("ES_API_KEY")
+    api_id, api_key = resolve_api_key(api_id, api_key)
+    if api_id and api_key:
+        token = base64.b64encode(f"{api_id}:{api_key}".encode()).decode()
+        headers["Authorization"] = f"ApiKey {token}"
+    else:
+        auth = (os.getenv("ES_USER", "elastic"), os.getenv("ES_PASS", "changeme"))
+    return headers, auth
+
+
+def normalize_host(doc: Dict) -> Dict:
+    host = doc.get("host", {})
+    ports = doc.get("ports", [])
+    return {
+        "id": host.get("id"),
+        "name": host.get("name") or host.get("id"),
+        "ips": host.get("ips", []),
+        "macs": host.get("macs", []),
+        "hostnames": host.get("hostnames", []),
+        "sources": host.get("sources", []),
+        "last_seen": host.get("last_seen"),
+        "notes": host.get("notes"),
+        "expected_ports": host.get("expected_ports", []),
+        "ports": [
+            {
+                "port": p.get("port"),
+                "state": p.get("state"),
+                "service": (p.get("service") or {}).get("name"),
+            }
+            for p in ports
+        ],
+    }
+
+
+def parse_search_terms(raw_terms: List[str]) -> List[str]:
+    terms: List[str] = []
+    for raw in raw_terms:
+        if not raw:
+            continue
+        cleaned = raw.replace(",", " ")
+        for chunk in cleaned.split():
+            chunk = chunk.strip()
+            if chunk:
+                terms.append(chunk)
+    return terms
+
+
+def coerce_string_list(value: Any) -> List[str]:
+    if value is None:
+        return []
+    if isinstance(value, str):
+        return [value]
+    if isinstance(value, (list, tuple)):
+        return [str(item) for item in value if item is not None]
+    return []
+
+
+def clamp_int(value: Any, default: int, min_value: int, max_value: int) -> int:
+    try:
+        if value is None:
+            return default
+        parsed = int(value)
+    except (TypeError, ValueError):
+        return default
+    return max(min_value, min(max_value, parsed))
+
+
+def coerce_bool(value: Any, default: bool = False) -> bool:
+    if value is None:
+        return default
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, str):
+        return value.lower() in {"1", "true", "yes", "on"}
+    return default
+
+
+def build_search_clause(term: str) -> Dict:
+    wildcard = f"*{term}*"
+    return {
+        "bool": {
+            "should": [
+                {"wildcard": {"host.name.keyword": {"value": wildcard, "case_insensitive": True}}},
+                {"wildcard": {"host.hostnames.keyword": {"value": wildcard, "case_insensitive": True}}},
+                {"wildcard": {"host.id.keyword": {"value": wildcard, "case_insensitive": True}}},
+                {"wildcard": {"host.ips": {"value": wildcard, "case_insensitive": True}}},
+                {"wildcard": {"host.macs": {"value": wildcard, "case_insensitive": True}}},
+            ],
+            "minimum_should_match": 1,
+        }
+    }
+
+
+def fetch_hosts(limit: int = HOST_SEARCH_LIMIT, source: Optional[str] = None, search_terms: Optional[List[str]] = None):
+    headers, auth = build_es_request()
+    body = {
+        "size": limit,
+        "sort": [{"host.last_seen": {"order": "desc"}}],
+    }
+    filters: List[Dict] = []
+    if source:
+        filters.append({"term": {"host.sources.keyword": source}})
+    if search_terms:
+        should_clauses = [build_search_clause(term) for term in search_terms]
+        filters.append({"bool": {"should": should_clauses, "minimum_should_match": 1}})
+    if filters:
+        body["query"] = {"bool": {"filter": filters}}
+    resp = requests.get(
+        f"{ES_URL}/network-hosts/_search",
+        json=body,
+        headers=headers,
+        auth=auth,
+        verify=ES_VERIFY_SSL,
+    )
+    resp.raise_for_status()
+    return [normalize_host(hit.get("_source", {})) for hit in resp.json()["hits"]["hits"]]
+
+
+def fetch_host_by_id(host_id: str) -> Optional[Dict]:
+    headers, auth = build_es_request()
+    body = {"size": 1, "query": {"term": {"host.id.keyword": host_id}}}
+    resp = requests.get(
+        f"{ES_URL}/network-hosts/_search",
+        json=body,
+        headers=headers,
+        auth=auth,
+        verify=ES_VERIFY_SSL,
+    )
+    resp.raise_for_status()
+    hits = resp.json()["hits"]["hits"]
+    if not hits:
+        return None
+    return normalize_host(hits[0].get("_source", {}))
+
+
+def fetch_events(host_id: Optional[str] = None, limit: int = DEFAULT_EVENT_LIMIT, event_type: Optional[str] = None, since: Optional[str] = None):
+    headers, auth = build_es_request()
+    filters: List[Dict] = []
+    if host_id:
+        filters.append({"term": {"host.id.keyword": host_id}})
+    if event_type:
+        filters.append({"term": {"event.type.keyword": event_type}})
+    if since:
+        filters.append({"range": {"@timestamp": {"gte": since}}})
+    body: Dict = {
+        "size": limit,
+        "sort": [{"@timestamp": {"order": "desc"}}],
+    }
+    if filters:
+        body["query"] = {"bool": {"filter": filters}}
+    resp = requests.get(
+        f"{ES_URL}/network-events-*/_search",
+        json=body,
+        headers=headers,
+        auth=auth,
+        verify=ES_VERIFY_SSL,
+    )
+    if resp.status_code == 404:
+        return []
+    resp.raise_for_status()
+    events = []
+    for hit in resp.json()["hits"]["hits"]:
+        doc = hit.get("_source", {})
+        events.append(
+            {
+                "id": hit.get("_id"),
+                "timestamp": doc.get("@timestamp"),
+                "event": doc.get("event", {}),
+                "host": doc.get("host", {}),
+                "observed": doc.get("observed"),
+                "scan": doc.get("scan"),
+                "ports": doc.get("ports", []),
+                "source": doc.get("source"),
+            }
+        )
+    return events
+
+
+def derive_network_label(ip: str) -> str:
+    if not ip:
+        return "unknown"
+    if ":" in ip:
+        parts = ip.split(":")
+        prefix = ":".join(parts[:4])
+        return f"{prefix}::/64"
+    octets = ip.split(".")
+    if len(octets) == 4:
+        return f"{octets[0]}.{octets[1]}.{octets[2]}.0/24"
+    return "unknown"
+
+
+def build_network_map(hosts: List[Dict]):
+    networks: Dict[str, Dict] = {}
+    for host in hosts:
+        seen = set()
+        for ip in host.get("ips", []):
+            label = derive_network_label(ip)
+            if label in seen:
+                continue
+            seen.add(label)
+            entry = networks.setdefault(label, {"cidr": label, "hosts": []})
+            entry["hosts"].append(
+                {
+                    "id": host.get("id"),
+                    "name": host.get("name"),
+                    "ips": host.get("ips", []),
+                    "sources": host.get("sources", []),
+                    "last_seen": host.get("last_seen"),
+                }
+            )
+    sorted_networks = sorted(networks.values(), key=lambda n: n["cidr"])
+    for entry in sorted_networks:
+        entry["hosts"].sort(key=lambda h: h.get("name") or h.get("id") or "")
+    return sorted_networks
+
+
+def bool_arg(value: Optional[str], default: bool = False) -> bool:
+    if value is None:
+        return default
+    return value.lower() in {"1", "true", "yes", "on"}
+
+
+def build_manifest(base_url: str) -> Dict:
+    base = base_url.rstrip("/")
+    tools = []
+    for tool in REST_TOOLS:
+        tools.append(
+            {
+                "name": tool["name"],
+                "description": tool["description"],
+                "method": tool["method"],
+                "path": tool["path"],
+                "url": f"{base}{tool['path']}",
+            }
+        )
+    return {
+        "name": "network-mcp",
+        "description": "Network discovery source-of-truth backed by Elasticsearch, Nmap, and OPNsense.",
+        "schema": "1.0",
+        "tools": tools,
+        "auth": "env",
+    }
+
+
+def tool_result(summary: str, data: Dict[str, Any]):
+    return summary, data
+
+
+def handle_tool_list_hosts(arguments: Dict[str, Any]):
+    limit = clamp_int(arguments.get("limit"), HOST_SEARCH_LIMIT, 1, 5000)
+    raw_terms = coerce_string_list(arguments.get("terms"))
+    search_terms = parse_search_terms(raw_terms)
+    hosts = fetch_hosts(limit=limit, source=arguments.get("source"), search_terms=search_terms or None)
+    return tool_result(f"Returned {len(hosts)} hosts.", {"hosts": hosts, "total": len(hosts)})
+
+
+def handle_tool_network_map(arguments: Dict[str, Any]):
+    limit = clamp_int(arguments.get("limit"), HOST_SEARCH_LIMIT, 1, 5000)
+    hosts = fetch_hosts(limit=limit)
+    network_map = build_network_map(hosts)
+    return tool_result(f"Computed {len(network_map)} networks.", {"networks": network_map, "host_count": len(hosts)})
+
+
+def handle_tool_get_host(arguments: Dict[str, Any]):
+    host_id = arguments.get("host_id")
+    if not host_id:
+        raise ValueError("host_id is required")
+    host = fetch_host_by_id(host_id)
+    if not host:
+        raise KeyError(f"Host {host_id} not found")
+    include_events = coerce_bool(arguments.get("include_events"), default=False)
+    result = {"host": host}
+    if include_events:
+        events_limit = clamp_int(arguments.get("events_limit"), DEFAULT_EVENT_LIMIT, 1, 1000)
+        result["events"] = fetch_events(host_id=host_id, limit=events_limit)
+    return tool_result(f"Fetched host {host_id}.", result)
+
+
+def handle_tool_list_events(arguments: Dict[str, Any]):
+    limit = clamp_int(arguments.get("limit"), DEFAULT_EVENT_LIMIT, 1, 1000)
+    events = fetch_events(
+        host_id=arguments.get("host_id"),
+        limit=limit,
+        event_type=arguments.get("type"),
+        since=arguments.get("since"),
+    )
+    return tool_result(f"Returned {len(events)} events.", {"events": events, "total": len(events)})
+
+
+def handle_tool_host_events(arguments: Dict[str, Any]):
+    host_id = arguments.get("host_id")
+    if not host_id:
+        raise ValueError("host_id is required")
+    limit = clamp_int(arguments.get("limit"), DEFAULT_EVENT_LIMIT, 1, 1000)
+    events = fetch_events(host_id=host_id, limit=limit, event_type=arguments.get("type"), since=arguments.get("since"))
+    return tool_result(f"Returned {len(events)} events for {host_id}.", {"events": events, "total": len(events)})
+
+
+TOOL_HANDLERS = {
+    "list_hosts": handle_tool_list_hosts,
+    "network_map": handle_tool_network_map,
+    "get_host": handle_tool_get_host,
+    "list_events": handle_tool_list_events,
+    "host_events": handle_tool_host_events,
+}
+
+
+def list_mcp_tools():
+    tools = []
+    for name, meta in MCP_TOOL_DEFINITIONS.items():
+        tool = {
+            "name": name,
+            "description": meta.get("description"),
+            "inputSchema": meta.get("inputSchema", {"type": "object"}),
+        }
+        title = meta.get("title")
+        if title:
+            tool["title"] = title
+        output_schema = meta.get("outputSchema")
+        if output_schema:
+            tool["outputSchema"] = output_schema
+        annotations = meta.get("annotations")
+        if annotations:
+            tool["annotations"] = annotations
+        tools.append(tool)
+    return tools
+
+
+def call_tool_by_name(name: str, arguments: Optional[Dict[str, Any]] = None):
+    if name not in TOOL_HANDLERS:
+        raise KeyError(f"Unknown tool: {name}")
+    handler = TOOL_HANDLERS[name]
+    summary, data = handler(arguments or {})
+    return summary, data
+
+
+def list_mcp_resources(base_uri: str = "network://"):
+    return [
+        {
+            "uri": f"{base_uri}hosts",
+            "name": "hosts",
+            "title": "Hosts (Snapshot)",
+            "mimeType": "application/json",
+            "description": "Snapshot of merged hosts (inventory + opnsense + nmap). Use resources/templates/list for search parameters.",
+        },
+        {
+            "uri": f"{base_uri}map",
+            "name": "map",
+            "title": "Network Map (Snapshot)",
+            "mimeType": "application/json",
+            "description": "Snapshot of networks grouped by /24 (IPv4) or /64 (IPv6).",
+        },
+        {
+            "uri": f"{base_uri}events",
+            "name": "events",
+            "title": "Recent Events (Snapshot)",
+            "mimeType": "application/json",
+            "description": "Recent scan/discovery events. Use resources/templates/list for filters (host_id/type/since).",
+        },
+    ]
+
+
+def list_mcp_resource_templates(base_uri: str = "network://"):
+    return [
+        {
+            "uriTemplate": f"{base_uri}hosts{{?q,source,limit}}",
+            "name": "hosts_query",
+            "title": "Hosts Query",
+            "mimeType": "application/json",
+            "description": "Query hosts by q (hostname/IP/MAC/name, case-insensitive), source, and limit. Repeat q to provide multiple terms.",
+        },
+        {
+            "uriTemplate": f"{base_uri}host/{{host_id}}{{?include_events,events_limit}}",
+            "name": "host_detail",
+            "title": "Host Detail",
+            "mimeType": "application/json",
+            "description": "Fetch a single host by host_id (e.g. mac:aa:bb.. or ip:192.168.5.10). Optionally include events.",
+        },
+        {
+            "uriTemplate": f"{base_uri}events{{?host_id,type,since,limit}}",
+            "name": "events_query",
+            "title": "Events Query",
+            "mimeType": "application/json",
+            "description": "Query recent events with optional filters host_id, type, since (ISO8601), and limit.",
+        },
+        {
+            "uriTemplate": f"{base_uri}map{{?limit}}",
+            "name": "map_query",
+            "title": "Network Map",
+            "mimeType": "application/json",
+            "description": "Build a network map from up to limit hosts.",
+        },
+    ]
+
+
+def read_mcp_resource(uri: str):
+    parsed = urlparse(uri)
+    if parsed.scheme != "network":
+        raise ValueError(f"Unsupported resource URI scheme: {parsed.scheme}")
+
+    netloc = parsed.netloc
+    query = parse_qs(parsed.query or "")
+
+    if netloc == "hosts":
+        limit = clamp_int((query.get("limit") or [HOST_SEARCH_LIMIT])[0], HOST_SEARCH_LIMIT, 1, 5000)
+        source = (query.get("source") or [None])[0]
+        q_terms = query.get("q") or []
+        search_terms = parse_search_terms(q_terms)
+        payload = {"hosts": fetch_hosts(limit=limit, source=source, search_terms=search_terms or None)}
+        payload["total"] = len(payload["hosts"])
+        return {"contents": [{"uri": uri, "mimeType": "application/json", "text": json.dumps(payload)}]}
+
+    if netloc == "map":
+        limit = clamp_int((query.get("limit") or [HOST_SEARCH_LIMIT])[0], HOST_SEARCH_LIMIT, 1, 5000)
+        hosts = fetch_hosts(limit=limit)
+        payload = {"networks": build_network_map(hosts), "host_count": len(hosts)}
+        return {"contents": [{"uri": uri, "mimeType": "application/json", "text": json.dumps(payload)}]}
+
+    if netloc == "events":
+        limit = clamp_int((query.get("limit") or [DEFAULT_EVENT_LIMIT])[0], DEFAULT_EVENT_LIMIT, 1, 1000)
+        host_id = (query.get("host_id") or [None])[0]
+        event_type = (query.get("type") or [None])[0]
+        since = (query.get("since") or [None])[0]
+        events = fetch_events(host_id=host_id, limit=limit, event_type=event_type, since=since)
+        payload = {"events": events, "total": len(events)}
+        return {"contents": [{"uri": uri, "mimeType": "application/json", "text": json.dumps(payload)}]}
+
+    if netloc == "host":
+        host_id = unquote((parsed.path or "").lstrip("/"))
+        if not host_id:
+            raise ValueError("Host resource requires /<host_id> path")
+        include_events = coerce_bool((query.get("include_events") or [False])[0], default=False)
+        events_limit = clamp_int((query.get("events_limit") or [DEFAULT_EVENT_LIMIT])[0], DEFAULT_EVENT_LIMIT, 1, 1000)
+        host = fetch_host_by_id(host_id)
+        if not host:
+            raise KeyError(f"Host {host_id} not found")
+        payload = {"host": host}
+        if include_events:
+            payload["events"] = fetch_events(host_id=host_id, limit=events_limit)
+        return {"contents": [{"uri": uri, "mimeType": "application/json", "text": json.dumps(payload)}]}
+
+    raise ValueError(f"Unknown resource URI: {uri}")
+
+
+def jsonrpc_error(rpc_id: Any, code: int, message: str):
+    return {
+        "jsonrpc": "2.0",
+        "id": rpc_id,
+        "error": {"code": code, "message": message},
+    }
+
+
+def build_initialize_result(protocol_version: Optional[str] = None):
+    protocol_version = protocol_version or "2025-11-25"
+    return {
+        "protocolVersion": protocol_version,
+        "capabilities": {
+            "tools": {"listChanged": False},
+            "resources": {"listChanged": False, "subscribe": False},
+        },
+        "serverInfo": {"name": "network-mcp", "version": SERVER_VERSION},
+        "instructions": "Start with list_hosts (search by hostname/IP/MAC), then use get_host for details and list_events/host_events for timelines; network_map gives a quick /24-/64 overview.",
+    }
+
+
+def process_rpc_request(payload: Dict[str, Any]):
+    if not isinstance(payload, dict):
+        return jsonrpc_error(None, -32600, "Invalid request")
+    rpc_id = payload.get("id")
+    method = payload.get("method")
+    params = payload.get("params") or {}
+    is_notification = rpc_id is None
+
+    if method == "initialize":
+        requested = params.get("protocolVersion")
+        requested_str = str(requested) if requested is not None else None
+        return {"jsonrpc": "2.0", "id": rpc_id, "result": build_initialize_result(requested_str)}
+
+    if method == "ping":
+        return {"jsonrpc": "2.0", "id": rpc_id, "result": {}}
+
+    if method == "tools/list":
+        result = {"tools": list_mcp_tools(), "nextCursor": None}
+        return {"jsonrpc": "2.0", "id": rpc_id, "result": result}
+
+    if method == "resources/list":
+        result = {"resources": list_mcp_resources(), "nextCursor": None}
+        return {"jsonrpc": "2.0", "id": rpc_id, "result": result}
+
+    if method == "resources/templates/list":
+        result = {"resourceTemplates": list_mcp_resource_templates(), "nextCursor": None}
+        return {"jsonrpc": "2.0", "id": rpc_id, "result": result}
+
+    if method == "resources/read":
+        uri = (params or {}).get("uri")
+        if not uri:
+            return jsonrpc_error(rpc_id, -32602, "uri is required")
+        try:
+            result = read_mcp_resource(uri)
+            return {"jsonrpc": "2.0", "id": rpc_id, "result": result}
+        except ValueError as exc:
+            return jsonrpc_error(rpc_id, -32602, str(exc))
+        except KeyError as exc:
+            message = exc.args[0] if exc.args else str(exc)
+            return jsonrpc_error(rpc_id, -32004, message)
+
+    if method == "notifications/initialized":
+        # No response for notifications.
+        return None
+
+    if method == "tools/call":
+        name = params.get("name")
+        if not name:
+            if is_notification:
+                return None
+            return jsonrpc_error(rpc_id, -32602, "Tool name is required")
+        arguments = params.get("arguments") or {}
+        try:
+            summary, data = call_tool_by_name(name, arguments)
+            result = {
+                "content": [{"type": "text", "text": summary}],
+                "structuredContent": data,
+                "isError": False,
+            }
+            if is_notification:
+                return None
+            return {"jsonrpc": "2.0", "id": rpc_id, "result": result}
+        except ValueError as exc:
+            if is_notification:
+                return None
+            result = {
+                "content": [{"type": "text", "text": f"Tool argument error: {exc}"}],
+                "structuredContent": {"error": str(exc)},
+                "isError": True,
+            }
+            return {"jsonrpc": "2.0", "id": rpc_id, "result": result}
+        except KeyError as exc:
+            message = exc.args[0] if exc.args else str(exc)
+            if is_notification:
+                return None
+            result = {
+                "content": [{"type": "text", "text": f"Tool error: {message}"}],
+                "structuredContent": {"error": message},
+                "isError": True,
+            }
+            return {"jsonrpc": "2.0", "id": rpc_id, "result": result}
+        except Exception as exc:  # pragma: no cover - defensive
+            if is_notification:
+                return None
+            return jsonrpc_error(rpc_id, -32603, f"Internal error: {exc}")
+
+    if is_notification:
+        return None
+
+    return jsonrpc_error(rpc_id, -32601, f"Method {method} not found")
+
+
+def process_rpc_envelope(payload: Any):
+    if isinstance(payload, list):
+        responses = []
+        for entry in payload:
+            response = process_rpc_request(entry)
+            if response is not None:
+                responses.append(response)
+        return responses
+    if isinstance(payload, dict):
+        return process_rpc_request(payload)
+    return jsonrpc_error(None, -32600, "Invalid request")
+
+
+@app.route("/api/hosts")
+def api_hosts():
+    limit = min(int(request.args.get("limit", HOST_SEARCH_LIMIT)), 5000)
+    q_args = request.args.getlist("q")
+    search_terms = parse_search_terms(q_args)
+    hosts = fetch_hosts(
+        limit=limit,
+        source=request.args.get("source"),
+        search_terms=search_terms if search_terms else None,
+    )
+    return jsonify({"hosts": hosts, "total": len(hosts)})
+
+
+@app.route("/api/hosts/<path:host_id>")
+def api_host_detail(host_id: str):
+    host = fetch_host_by_id(host_id)
+    if not host:
+        abort(404, description=f"Host {host_id} not found")
+    include_events = bool_arg(request.args.get("include_events"), default=False)
+    result = {"host": host}
+    if include_events:
+        limit = min(int(request.args.get("events_limit", DEFAULT_EVENT_LIMIT)), 1000)
+        result["events"] = fetch_events(host_id=host_id, limit=limit)
+    return jsonify(result)
+
+
+@app.route("/api/events")
+def api_events():
+    limit = min(int(request.args.get("limit", DEFAULT_EVENT_LIMIT)), 1000)
+    events = fetch_events(
+        host_id=request.args.get("host_id"),
+        limit=limit,
+        event_type=request.args.get("type"),
+        since=request.args.get("since"),
+    )
+    return jsonify({"events": events, "total": len(events)})
+
+
+@app.route("/api/hosts/<path:host_id>/events")
+def api_host_events(host_id: str):
+    limit = min(int(request.args.get("limit", DEFAULT_EVENT_LIMIT)), 1000)
+    events = fetch_events(host_id=host_id, limit=limit, event_type=request.args.get("type"), since=request.args.get("since"))
+    return jsonify({"events": events, "total": len(events)})
+
+
+@app.route("/api/map")
+def api_map():
+    limit = min(int(request.args.get("limit", HOST_SEARCH_LIMIT)), 5000)
+    hosts = fetch_hosts(limit=limit)
+    network_map = build_network_map(hosts)
+    return jsonify({"networks": network_map, "host_count": len(hosts)})
+
+
+@app.route("/.well-known/mcp.json", methods=["GET", "POST", "OPTIONS"])
+@app.route("/api/mcp", methods=["GET", "POST", "OPTIONS"])
+def api_manifest():
+    if request.method == "OPTIONS":
+        return ("", 204, {"Allow": "GET,POST,OPTIONS"})
+    if request.method == "POST":
+        payload = request.get_json(silent=True)
+        if payload is None:
+            return jsonify(jsonrpc_error(None, -32700, "Invalid JSON")), 400
+        rpc_response = process_rpc_envelope(payload)
+        if rpc_response is None or (isinstance(rpc_response, list) and not rpc_response):
+            return ("", 204)
+        return jsonify(rpc_response)
+    manifest = build_manifest(request.url_root.rstrip("/"))
+    return jsonify(manifest)
+
+
+@app.route("/")
+def index():
+    hosts = fetch_hosts()
+    total = len(hosts)
+    with_ports = sum(1 for h in hosts if h["ports"])
+    inventory_hosts = sum(1 for h in hosts if "inventory" in h["sources"])
+    return render_template(
+        "index.html",
+        hosts=hosts,
+        total=total,
+        with_ports=with_ports,
+        inventory_hosts=inventory_hosts,
+        es_url=ES_URL,
+    )
+
+
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=int(os.getenv("FRONTEND_PORT", "5001")))
--- a/stacks/network-mcp/frontend/requirements.txt
+++ b/stacks/network-mcp/frontend/requirements.txt
@ -0,0 +1,4 @@
+Flask==2.2.5
+requests==2.31.0
+python-dotenv==0.21.1
+gunicorn==21.2.0
--- a/stacks/network-mcp/frontend/templates/index.html
+++ b/stacks/network-mcp/frontend/templates/index.html
@ -0,0 +1,206 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Network MCP Hosts</title>
+  <style>
+    body {
+      font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
+      background: #0f172a;
+      color: #e2e8f0;
+      margin: 0;
+      padding: 0 1.5rem 2rem;
+    }
+    header {
+      padding: 2rem 0 1rem;
+    }
+    h1 {
+      margin: 0;
+    }
+    .metrics {
+      display: flex;
+      gap: 1rem;
+      flex-wrap: wrap;
+      margin: 1rem 0 2rem;
+    }
+    .metric-card {
+      background: #1e293b;
+      padding: 1rem 1.5rem;
+      border-radius: 0.75rem;
+      border: 1px solid #334155;
+      min-width: 160px;
+    }
+    .metric-card h3 {
+      margin: 0;
+      font-size: 0.9rem;
+      color: #94a3b8;
+    }
+    .metric-card p {
+      margin: 0.4rem 0 0;
+      font-size: 1.5rem;
+      font-weight: bold;
+      color: #f1f5f9;
+    }
+    .hosts-grid {
+      display: grid;
+      grid-template-columns: repeat(auto-fill, minmax(320px, 1fr));
+      gap: 1rem;
+    }
+    .host-card {
+      background: #1e293b;
+      border-radius: 0.75rem;
+      border: 1px solid #334155;
+      padding: 1rem;
+      display: flex;
+      flex-direction: column;
+      gap: 0.6rem;
+    }
+    .host-card h2 {
+      margin: 0;
+      font-size: 1.1rem;
+      color: #f8fafc;
+      display: flex;
+      justify-content: space-between;
+      align-items: center;
+    }
+    .sources span {
+      display: inline-block;
+      font-size: 0.7rem;
+      padding: 0.15rem 0.4rem;
+      margin-right: 0.3rem;
+      border-radius: 0.4rem;
+      background: #0f766e;
+    }
+    .hosts-card ul {
+      margin: 0;
+      padding-left: 1.2rem;
+    }
+    .port-list {
+      display: flex;
+      flex-wrap: wrap;
+      gap: 0.4rem;
+    }
+    .port-chip {
+      background: #0f172a;
+      border: 1px solid #334155;
+      border-radius: 9999px;
+      padding: 0.2rem 0.6rem;
+      font-size: 0.8rem;
+    }
+    .notes {
+      font-size: 0.9rem;
+      color: #cbd5f5;
+    }
+    .source-tag-inventory {
+      background: #a855f7;
+    }
+    .source-tag-opnsense {
+      background: #0284c7;
+    }
+    .source-tag-nmap {
+      background: #ea580c;
+    }
+    .source-tag-discovery {
+      background: #0ea5e9;
+    }
+    .section-title {
+      font-size: 0.9rem;
+      color: #94a3b8;
+      margin: 0;
+    }
+    .ips, .notes, .last-seen {
+      font-size: 0.9rem;
+    }
+  </style>
+</head>
+<body>
+  <header>
+    <h1>Network MCP Overview</h1>
+    <p class="last-seen">Elasticsearch: {{ es_url }}</p>
+  </header>
+
+  <section class="metrics">
+    <div class="metric-card">
+      <h3>Total Hosts</h3>
+      <p>{{ total }}</p>
+    </div>
+    <div class="metric-card">
+      <h3>With Port Data</h3>
+      <p>{{ with_ports }}</p>
+    </div>
+    <div class="metric-card">
+      <h3>Inventory Entries</h3>
+      <p>{{ inventory_hosts }}</p>
+    </div>
+  </section>
+
+  <section class="hosts-grid">
+    {% for host in hosts %}
+      <article class="host-card">
+        <h2>{{ host.name }}
+          {% if host.notes %}
+            <span title="Inventory notes available">📝</span>
+          {% endif %}
+        </h2>
+        <div class="sources">
+          {% for source in host.sources %}
+            {% set tag_class = "" %}
+            {% if source == "inventory" %}
+              {% set tag_class = "source-tag-inventory" %}
+            {% elif source.startswith("opnsense") %}
+              {% set tag_class = "source-tag-opnsense" %}
+            {% elif source == "nmap" %}
+              {% set tag_class = "source-tag-nmap" %}
+            {% elif source == "nmap-discovery" %}
+              {% set tag_class = "source-tag-discovery" %}
+            {% endif %}
+            <span class="{{ tag_class }}">{{ source }}</span>
+          {% endfor %}
+        </div>
+        <div class="ips">
+          <strong>IPs:</strong> {{ host.ips|join(", ") if host.ips else "—" }}
+        </div>
+        {% if host.macs %}
+        <div class="ips">
+          <strong>MACs:</strong> {{ host.macs|join(", ") }}
+        </div>
+        {% endif %}
+        {% if host.hostnames %}
+        <div class="ips">
+          <strong>Hostnames:</strong> {{ host.hostnames|join(", ") }}
+        </div>
+        {% endif %}
+        <div class="last-seen">
+          <strong>Last seen:</strong> {{ host.last_seen or "unknown" }}
+        </div>
+        {% if host.notes %}
+        <div class="notes">
+          <strong>Notes:</strong> {{ host.notes }}
+        </div>
+        {% endif %}
+        {% if host.expected_ports %}
+        <div>
+          <p class="section-title">Expected Ports</p>
+          <div class="port-list">
+            {% for port in host.expected_ports %}
+              <span class="port-chip">{{ port }}</span>
+            {% endfor %}
+          </div>
+        </div>
+        {% endif %}
+        {% if host.ports %}
+        <div>
+          <p class="section-title">Observed Ports</p>
+          <div class="port-list">
+            {% for port in host.ports %}
+              <span class="port-chip">{{ port.port }} {{ port.service or "" }}</span>
+            {% endfor %}
+          </div>
+        </div>
+        {% endif %}
+      </article>
+    {% endfor %}
+  </section>
+</body>
+</html>
--- a/stacks/network-mcp/frontend/tests/init.py
+++ b/stacks/network-mcp/frontend/tests/init.py
@ -0,0 +1,2 @@
+"""Unit tests for the Network MCP frontend."""
+
--- a/stacks/network-mcp/frontend/tests/test_mcp.py
+++ b/stacks/network-mcp/frontend/tests/test_mcp.py
@ -0,0 +1,203 @@
+import json
+import unittest
+from unittest.mock import patch
+
+
+class FakeResponse:
+    def __init__(self, payload, status_code=200):
+        self._payload = payload
+        self.status_code = status_code
+
+    def json(self):
+        return self._payload
+
+    def raise_for_status(self):
+        if self.status_code >= 400:
+            raise RuntimeError(f"HTTP {self.status_code}")
+
+
+def _wildcard_match(pattern: str, value: str, case_insensitive: bool) -> bool:
+    if value is None:
+        return False
+    if case_insensitive:
+        pattern = pattern.lower()
+        value = value.lower()
+    if pattern.startswith("*") and pattern.endswith("*"):
+        needle = pattern.strip("*")
+        return needle in value
+    return pattern == value
+
+
+def _extract_wildcard_clause(field_clause):
+    # Supports either {"field": "*term*"} or {"field": {"value":"*term*", "case_insensitive":true}}
+    if not isinstance(field_clause, dict):
+        return None, None, None
+    if len(field_clause) != 1:
+        return None, None, None
+    field, value = next(iter(field_clause.items()))
+    if isinstance(value, str):
+        return field, value, False
+    if isinstance(value, dict):
+        return field, value.get("value"), bool(value.get("case_insensitive"))
+    return None, None, None
+
+
+def _filter_hosts_by_query(host_docs, query):
+    if not query:
+        return host_docs
+    bool_query = query.get("bool") if isinstance(query, dict) else None
+    if not bool_query:
+        return host_docs
+    filters = bool_query.get("filter") or []
+    if not filters:
+        return host_docs
+
+    matched = host_docs
+    for f in filters:
+        if "term" in f and "host.sources.keyword" in f["term"]:
+            src = f["term"]["host.sources.keyword"]
+            matched = [h for h in matched if src in (h.get("host", {}).get("sources") or [])]
+            continue
+
+        if "bool" in f and "should" in f["bool"]:
+            shoulds = f["bool"]["should"]
+
+            def matches_any(host_doc):
+                host = host_doc.get("host", {})
+                haystacks = {
+                    "host.name.keyword": [host.get("name")],
+                    "host.hostnames.keyword": host.get("hostnames") or [],
+                    "host.id.keyword": [host.get("id")],
+                    "host.ips": host.get("ips") or [],
+                    "host.macs": host.get("macs") or [],
+                }
+                for clause in shoulds:
+                    if "bool" in clause and "should" in clause["bool"]:
+                        # nested should from multiple search terms
+                        nested_shoulds = clause["bool"]["should"]
+                        for nested in nested_shoulds:
+                            if "wildcard" not in nested:
+                                continue
+                            field, value, ci = _extract_wildcard_clause(nested["wildcard"])
+                            if not field or value is None:
+                                continue
+                            for candidate in haystacks.get(field, []):
+                                if _wildcard_match(value, str(candidate or ""), ci):
+                                    return True
+                    if "wildcard" in clause:
+                        field, value, ci = _extract_wildcard_clause(clause["wildcard"])
+                        if not field or value is None:
+                            continue
+                        for candidate in haystacks.get(field, []):
+                            if _wildcard_match(value, str(candidate or ""), ci):
+                                return True
+                return False
+
+            matched = [h for h in matched if matches_any(h)]
+            continue
+    return matched
+
+
+class TestNetworkMCP(unittest.TestCase):
+    def setUp(self):
+        from frontend import app as app_module
+
+        self.app_module = app_module
+        self.client = app_module.app.test_client()
+
+        self.host_docs = [
+            {
+                "host": {
+                    "id": "mac:dc:a6:32:67:55:dc",
+                    "name": "SEELE",
+                    "hostnames": ["SEELE"],
+                    "ips": ["192.168.5.208"],
+                    "macs": ["dc:a6:32:67:55:dc"],
+                    "sources": ["opnsense-dhcp", "opnsense-arp"],
+                    "last_seen": "2025-12-14T16:27:15.427091+00:00",
+                },
+                "ports": [{"port": 22, "state": "open", "service": {"name": "ssh"}}],
+            },
+            {
+                "host": {
+                    "id": "mac:aa:bb:cc:dd:ee:ff",
+                    "name": "core",
+                    "hostnames": ["core.localdomain"],
+                    "ips": ["192.168.5.34"],
+                    "macs": ["aa:bb:cc:dd:ee:ff"],
+                    "sources": ["inventory", "opnsense-arp"],
+                    "last_seen": "2025-12-14T16:27:15.427091+00:00",
+                    "notes": "Production Docker host",
+                },
+                "ports": [{"port": 443, "state": "open", "service": {"name": "https"}}],
+            },
+        ]
+
+    def fake_requests_get(self, url, json=None, headers=None, auth=None, verify=None):
+        if url.endswith("/network-hosts/_search"):
+            query = (json or {}).get("query")
+            hits = _filter_hosts_by_query(self.host_docs, query)
+            return FakeResponse({"hits": {"hits": [{"_source": h} for h in hits]}})
+        if "/network-events-" in url and url.endswith("/_search"):
+            return FakeResponse({"hits": {"hits": []}})
+        return FakeResponse({}, status_code=404)
+
+    def test_rest_search_hostname_case_insensitive(self):
+        with patch.object(self.app_module.requests, "get", side_effect=self.fake_requests_get):
+            resp = self.client.get("/api/hosts?q=seele&limit=50")
+            self.assertEqual(resp.status_code, 200)
+            payload = resp.get_json()
+            self.assertEqual(payload["total"], 1)
+            self.assertEqual(payload["hosts"][0]["name"], "SEELE")
+
+    def test_rest_search_by_ip(self):
+        with patch.object(self.app_module.requests, "get", side_effect=self.fake_requests_get):
+            resp = self.client.get("/api/hosts?q=192.168.5.208")
+            payload = resp.get_json()
+            self.assertEqual(payload["total"], 1)
+            self.assertEqual(payload["hosts"][0]["id"], "mac:dc:a6:32:67:55:dc")
+
+    def test_rest_search_by_mac(self):
+        with patch.object(self.app_module.requests, "get", side_effect=self.fake_requests_get):
+            resp = self.client.get("/api/hosts?q=dc:a6:32:67:55:dc")
+            payload = resp.get_json()
+            self.assertEqual(payload["total"], 1)
+            self.assertEqual(payload["hosts"][0]["name"], "SEELE")
+
+    def test_mcp_tools_call_search_terms(self):
+        with patch.object(self.app_module.requests, "get", side_effect=self.fake_requests_get):
+            body = {
+                "jsonrpc": "2.0",
+                "id": 1,
+                "method": "tools/call",
+                "params": {"name": "list_hosts", "arguments": {"terms": ["seele"], "limit": 10}},
+            }
+            resp = self.client.post("/.well-known/mcp.json", data=json.dumps(body), content_type="application/json")
+            self.assertEqual(resp.status_code, 200)
+            payload = resp.get_json()
+            self.assertFalse(payload["result"]["isError"])
+            hosts = payload["result"]["structuredContent"]["hosts"]
+            self.assertEqual(len(hosts), 1)
+            self.assertEqual(hosts[0]["name"], "SEELE")
+
+    def test_mcp_resources_read_hosts_query(self):
+        with patch.object(self.app_module.requests, "get", side_effect=self.fake_requests_get):
+            body = {"jsonrpc": "2.0", "id": 2, "method": "resources/read", "params": {"uri": "network://hosts?q=seele&limit=5"}}
+            resp = self.client.post("/.well-known/mcp.json", data=json.dumps(body), content_type="application/json")
+            self.assertEqual(resp.status_code, 200)
+            result = resp.get_json()["result"]
+            self.assertEqual(result["contents"][0]["mimeType"], "application/json")
+            data = json.loads(result["contents"][0]["text"])
+            self.assertEqual(data["total"], 1)
+            self.assertEqual(data["hosts"][0]["name"], "SEELE")
+
+    def test_mcp_notifications_initialized_no_response(self):
+        with patch.object(self.app_module.requests, "get", side_effect=self.fake_requests_get):
+            body = {"jsonrpc": "2.0", "method": "notifications/initialized", "params": {}}
+            resp = self.client.post("/.well-known/mcp.json", data=json.dumps(body), content_type="application/json")
+            self.assertEqual(resp.status_code, 204)
+
+
+if __name__ == "__main__":
+    unittest.main()
+
--- a/stacks/network-mcp/ilm/network-events-ilm.json
+++ b/stacks/network-mcp/ilm/network-events-ilm.json
@ -0,0 +1,24 @@
+{
+  "policy": {
+    "phases": {
+      "hot": {
+        "min_age": "0ms",
+        "actions": {}
+      },
+      "warm": {
+        "min_age": "7d",
+        "actions": {
+          "forcemerge": {
+            "max_num_segments": 1
+          }
+        }
+      },
+      "delete": {
+        "min_age": "90d",
+        "actions": {
+          "delete": {}
+        }
+      }
+    }
+  }
+}
--- a/stacks/network-mcp/ilm/network-events-template.json
+++ b/stacks/network-mcp/ilm/network-events-template.json
@ -0,0 +1,39 @@
+{
+  "index_patterns": ["network-events-*"],
+  "template": {
+    "settings": {
+      "index.lifecycle.name": "network-events-ilm"
+    },
+    "mappings": {
+      "properties": {
+        "@timestamp": { "type": "date" },
+        "host": {
+          "properties": {
+            "ip": { "type": "ip" },
+            "ips": { "type": "ip" },
+            "mac": { "type": "keyword" },
+            "macs": { "type": "keyword" },
+            "id": { "type": "keyword" },
+            "name": { "type": "keyword" },
+            "hostname": { "type": "keyword" },
+            "hostnames": { "type": "keyword" }
+          }
+        },
+        "ports": {
+          "properties": {
+            "port": { "type": "integer" },
+            "proto": { "type": "keyword" },
+            "state": { "type": "keyword" },
+            "service": {
+              "properties": {
+                "name": { "type": "keyword" },
+                "product": { "type": "keyword" },
+                "version": { "type": "keyword" }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
--- a/stacks/network-mcp/ilm/network-hosts-template.json
+++ b/stacks/network-mcp/ilm/network-hosts-template.json
@ -0,0 +1,40 @@
+{
+  "index_patterns": ["network-hosts"],
+  "template": {
+    "mappings": {
+      "properties": {
+        "host": {
+          "properties": {
+            "id": { "type": "keyword" },
+            "name": { "type": "keyword" },
+            "fqdn": { "type": "keyword" },
+            "ips": { "type": "ip" },
+            "macs": { "type": "keyword" },
+            "first_seen": { "type": "date" },
+            "last_seen": { "type": "date" },
+            "last_state_change": { "type": "date" },
+            "state": { "type": "keyword" },
+            "role": { "type": "keyword" },
+            "tags": { "type": "keyword" },
+            "notes": { "type": "text" }
+          }
+        },
+        "ports": {
+          "properties": {
+            "port": { "type": "integer" },
+            "proto": { "type": "keyword" },
+            "state": { "type": "keyword" },
+            "first_seen": { "type": "date" },
+            "last_seen": { "type": "date" },
+            "service": {
+              "properties": {
+                "name": { "type": "keyword" },
+                "product": { "type": "keyword" }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
--- a/stacks/network-mcp/inventory_targets.yml
+++ b/stacks/network-mcp/inventory_targets.yml
@ -0,0 +1,280 @@
+inventory_targets:
+- name: Blackmoon
+  hostname: blackmoon.localdomain
+  ip: 192.168.5.1
+  notes: Core OpnSense gateway; ping only
+- name: Supermicro-BMC
+  hostname: 192.168.5.30
+  ip: 192.168.5.30
+  ports:
+  - 22
+  - 80
+  notes: "Supermicro IPMI (ATEN login portal on 80\u2192443) for rack chassis"
+- name: Jet-Alone
+  hostname: jet-alone.localdomain
+  ip: 192.168.5.31
+  ports:
+  - 22
+  notes: GPU/LLM server
+- name: Wille
+  hostname: wille.localdomain
+  ip: 192.168.5.33
+  ports:
+  - 22
+  - 80
+  - 443
+  notes: TrueNAS SCALE primary storage (iXsystems /ui interface)
+- name: Core
+  hostname: core.localdomain
+  ip: 192.168.5.34
+  ports:
+  - 22
+  - 80
+  - 443
+  notes: Production Docker swarm (Traefik, Gitea, Authentik, Immich, etc.)
+- name: NERV-III
+  hostname: NERV-III
+  ip: 192.168.5.35
+  ports:
+  - 22
+  notes: 'Standalone Proxmox host (Fedora CoreOS VMs: container-dev VM110 plus Ramiel
+    containers)'
+- name: TP-Link-AP-1
+  hostname: 192.168.5.36
+  ip: 192.168.5.36
+  ports:
+  - 22
+  - 80
+  notes: TP-Link EAP/Omada AP web UI (login page on HTTP)
+- name: TP-Link-AP-2
+  hostname: 192.168.5.39
+  ip: 192.168.5.39
+  ports:
+  - 22
+  - 80
+  notes: TP-Link EAP/Omada AP web UI (login page on HTTP)
+- name: Subspace-Mote-1
+  hostname: subspace-mote-1.localdomain
+  ip: 192.168.5.41
+  ports:
+  - 22
+  notes: SBC cluster member
+- name: BirdNET-GO
+  hostname: 192.168.5.71
+  ip: 192.168.5.71
+  ports:
+  - 22
+  - 8080
+  notes: Armbian (rz3w-02) running birdnet-go container (port 8080)
+- name: rz3w-02
+  hostname: rz3w-02.localdomain
+  ports:
+  - 22
+  notes: Subspace node with metrics/logging
+- name: Arael
+  hostname: arael.localdomain
+  ip: 192.168.5.44
+  ports:
+  - 22
+  notes: Debian host, purpose TBD
+- name: Synology-NAS
+  hostname: 192.168.5.45
+  ip: 192.168.5.45
+  ports:
+  - 22
+  - 80
+  - 443
+  - 5000
+  notes: Synology DSM primary NAS (HTTP redirect to DSM on 5000/5001)
+- name: Docker-Public
+  hostname: docker-public.localdomain
+  ip: 192.168.5.46
+  ports:
+  - 22
+  notes: Traefik/Docker public host (Traefik on 8080; hosts Invidious, Matomo, FreshRSS,
+    etc.)
+- name: Frigate
+  hostname: frigate.localdomain
+  ip: 192.168.5.47
+  ports:
+  - 22
+  - 5000
+  notes: NVR VM
+- name: HomeAssistant
+  hostname: homeassistant.localdomain
+  ip: 192.168.5.48
+  ports:
+  - 22
+  - 8123
+  notes: Home automation host
+- name: Casper
+  hostname: casper.localdomain
+  ip: 192.168.5.50
+  ports:
+  - 22
+  notes: Logging/Metrics VM
+- name: Ramiel
+  hostname: ramiel.localdomain
+  ip: 192.168.5.51
+  ports:
+  - 22
+  - 6443
+  notes: Cluster node
+- name: Ramiel-III
+  hostname: ramiel-iii.localdomain
+  ip: 192.168.5.230
+  ports:
+  - 22
+  notes: Additional Ramiel host
+- name: NERV
+  hostname: nerv.localdomain
+  ip: 192.168.5.203
+  ports:
+  - 22
+  - 8006
+  notes: Proxmox host
+- name: Magi2
+  hostname: magi2.localdomain
+  ip: 192.168.5.202
+  ports:
+  - 22
+  - 8006
+  notes: Proxmox host (JSON listed as Magi)
+- name: JHCI
+  hostname: jhci.localdomain
+  ip: 192.168.5.201
+  ports:
+  - 22
+  - 8006
+  notes: Proxmox host
+- name: Balthasar
+  hostname: balthasar.localdomain
+  ip: 192.168.5.237
+  ports:
+  - 22
+  - 80
+  notes: Technitium DNS server (hosts DoH UI)
+- name: Unit-00
+  hostname: unit-00.localdomain
+  ip: 192.168.5.222
+  ports:
+  - 22
+  notes: Client that connects to docker-dev
+- name: TrueNAS-Backup
+  hostname: ARKII.localdomain
+  ip: 192.168.5.32
+  ports:
+  - 22
+  - 80
+  - 443
+  notes: "TrueNAS SCALE backup NAS (ARKII chassis) \u2013 HTTPS /ui, SSH pending credentials"
+- name: Mokerlink-POE
+  hostname: 192.168.5.226
+  ip: 192.168.5.226
+  ports:
+  - 80
+  notes: Mokerlink POE-2G08110GSM switch (web login only)
+- name: EtherNetIP-Controller
+  hostname: 192.168.5.17
+  ip: 192.168.5.17
+  ports:
+  - 2222
+  notes: CNC/3D printer controller interface
+- name: P1S-Printer
+  hostname: P1S
+  ip: 192.168.5.42
+  notes: Bambu Lab P1S (LLMNR responder only; no TCP services)
+- name: Container-Dev
+  hostname: container-dev
+  ip: 192.168.5.236
+  ports:
+  - 22
+  - 5355
+  notes: Fedora CoreOS VM (NERV-III VM110) for container dev; only key-based SSH +
+    LLMNR
+- name: VPS-TransparentProxy-19222713430
+  hostname: 192.227.134.30
+  ip: 192.227.134.30
+  ports:
+  - 22
+  - 80
+  - 443
+  notes: Transparent HAProxy node (Debian 10) running haproxy + zerotier-one + telegraf
+- name: VPS-TransparentProxy-1071722798
+  hostname: 107.172.27.98
+  ip: 107.172.27.98
+  ports:
+  - 22
+  - 80
+  - 443
+  notes: Transparent HAProxy node (Debian 12) running haproxy + tailscale + zerotier-one
+    + telegraf/filebeat
+- name: VPS-TransparentProxy-10717425061
+  hostname: 107.174.250.61
+  ip: 107.174.250.61
+  ports:
+  - 22
+  - 80
+  - 443
+  notes: Transparent HAProxy (Debian 12) with haproxy, docker/containerd, iperf3,
+    filebeat, tailscale, zerotier
+- name: VPS-Headscale
+  hostname: 198.46.218.8
+  ip: 198.46.218.8
+  ports:
+  - 22
+  - 80
+  - 443
+  notes: Headscale coordination server (Ubuntu 20.04) running headscale, HAProxy,
+    Uptime Kuma, tailscale, zerotier
+- name: VPS-MailInABox
+  hostname: 198.23.146.170
+  ip: 198.23.146.170
+  ports:
+  - 22
+  - 80
+  - 443
+  notes: mail.uplink.tel Mail-in-a-Box (Postfix, Dovecot, BIND, NSD, nginx, SpamPD,
+    Filebeat, Tailscale)
+- name: VPS-FriendServer
+  hostname: 172.245.88.186
+  ip: 172.245.88.186
+  ports:
+  - 22
+  - 80
+  - 443
+  notes: '"Friend server managed" (Debian 12) hosting Apache, InspIRCd, MariaDB, Gitea
+    (docker), Tor, Tailscale'
+- name: VPS-Meow
+  hostname: 107.174.64.22
+  ip: 107.174.64.22
+  ports:
+  - 22
+  - 80
+  - 443
+  notes: '"Meow" VPS (Debian 12) running Docker stack: traefik, wg-easy, wordpress/mysql,
+    nginx, filebrowser'
+- name: VPS-Lukes
+  hostname: 23.94.206.75
+  ip: 23.94.206.75
+  ports:
+  - 22
+  - 80
+  - 443
+  notes: "Luke's VPS (Debian 12) \u2013 running Docker (Traefik, Caddy, GoatCounter,\
+    \ TTRSS stack, Radicale, filebrowser, ssh-tunnel)"
+- name: VPS-Tailscale-Edge
+  hostname: 100.64.0.14
+  ip: 100.64.0.14
+  ports:
+  - 22
+  - 80
+  - 443
+  notes: 'Tailscale interface into mail.uplink.tel (Mail-in-a-Box stack: Postfix/Dovecot/BIND/nginx)'
+- name: BirdNET-Pi
+  hostname: orangepizero2.localdomain
+  ip: 192.168.5.18
+  ports:
+  - 22
+  - 80
+  notes: Orangepi Zero2 running BirdNET-Pi (Caddy on port 80)
--- a/stacks/network-mcp/scripts/bootstrap_indices.py
+++ b/stacks/network-mcp/scripts/bootstrap_indices.py
@ -0,0 +1,77 @@
+import os
+import sys
+import json
+import requests
+import urllib3
+
+REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if REPO_ROOT not in sys.path:
+    sys.path.insert(0, REPO_ROOT)
+
+from collectors.common.es_auth import resolve_api_key, build_api_key_header
+
+# Suppress insecure request warnings
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+def load_json(path):
+    with open(path, 'r') as f:
+        return json.load(f)
+
+def main():
+    es_url = os.getenv("ES_URL", "http://localhost:9200").rstrip('/')
+    env_api_id = os.getenv("ES_API_ID")
+    env_api_key = os.getenv("ES_API_KEY")
+    es_api_id, es_api_key = resolve_api_key(env_api_id, env_api_key)
+    es_user = os.getenv("ES_USER", "elastic")
+    es_pass = os.getenv("ES_PASS", "changeme")
+    verify_ssl = os.getenv("ES_VERIFY_SSL", "true").lower() == "true"
+
+    auth_args = {}
+    if es_api_id and es_api_key:
+        auth_args["headers"] = {"Authorization": build_api_key_header(es_api_id, es_api_key)}
+        print("Using Elasticsearch API key authentication for bootstrap.")
+    else:
+        auth_args["auth"] = (es_user, es_pass)
+        print("Using Elasticsearch basic authentication for bootstrap.")
+    
+    print(f"Bootstrapping Elastic at {es_url}...")
+
+    def put(endpoint, data):
+        url = f"{es_url}{endpoint}"
+        print(f"PUT {url}")
+        try:
+            resp = requests.put(url, json=data, verify=verify_ssl, **auth_args)
+            print(f"Response: {resp.status_code} {resp.text}")
+            resp.raise_for_status()
+        except Exception as e:
+            print(f"Error: {e}")
+            # Don't exit, try next
+
+    # 1. ILM Policy
+    ilm_path = "ilm/network-events-ilm.json"
+    if os.path.exists(ilm_path):
+        data = load_json(ilm_path)
+        put("/_ilm/policy/network-events-ilm", data)
+    else:
+        print(f"Missing {ilm_path}")
+
+    # 2. Network Events Template
+    tpl_path = "ilm/network-events-template.json"
+    if os.path.exists(tpl_path):
+        data = load_json(tpl_path)
+        put("/_index_template/network-events", data)
+    else:
+        print(f"Missing {tpl_path}")
+
+    # 3. Network Hosts Template
+    tpl_path = "ilm/network-hosts-template.json"
+    if os.path.exists(tpl_path):
+        data = load_json(tpl_path)
+        put("/_index_template/network-hosts", data)
+    else:
+        print(f"Missing {tpl_path}")
+
+    print("Bootstrap complete.")
+
+if __name__ == "__main__":
+    main()
--- a/stacks/network-mcp/static/host_metadata.json
+++ b/stacks/network-mcp/static/host_metadata.json
@ -0,0 +1,9 @@
+{
+  "mac:aa:bb:cc:dd:ee:ff": {
+    "role": "router",
+    "owner": "admin",
+    "location": "server-room",
+    "tags": ["critical", "gateway"],
+    "notes": "Main gateway"
+  }
+}
--- a/stacks/obby/docker-compose.yml
+++ b/stacks/obby/docker-compose.yml
@ -0,0 +1,20 @@
+---
+services:
+  obsidian:
+    image: lscr.io/linuxserver/obsidian:latest
+    container_name: obsidian
+    security_opt:
+      - seccomp:unconfined #optional
+    environment:
+      - PUID=1000
+      - PGID=1000
+      - TZ=Etc/UTC
+    volumes:
+      - ./config:/config
+    ports:
+      - 3002:3000
+      - 3003:3001
+    devices:
+      - /dev/dri:/dev/dri #optional
+    shm_size: "1gb"
+    restart: unless-stopped
--- a/stacks/snowflake/docker-compose.yml
+++ b/stacks/snowflake/docker-compose.yml
@ -0,0 +1,15 @@
+services:
+    snowflake-proxy:
+        network_mode: host
+        image: containers.torproject.org/tpo/anti-censorship/pluggable-transports/snowflake:latest
+        container_name: snowflake-proxy
+        restart: unless-stopped
+        # For a full list of Snowflake Proxy CLI parameters see
+        # https://gitlab.torproject.org/tpo/anti-censorship/pluggable-transports/snowflake/-/tree/main/proxy?ref_type=heads#running-a-standalone-snowflake-proxy
+        #command: [ "-ephemeral-ports-range", "30000:60000" ]
+    watchtower:
+        image: containrrr/watchtower
+        container_name: watchtower
+        volumes:
+          - /var/run/docker.sock:/var/run/docker.sock
+        command: snowflake-proxy
--- a/stacks/szurubooru/.env.template
+++ b/stacks/szurubooru/.env.template
@ -0,0 +1,8 @@
+POSTGRES_USER=szuru
+POSTGRES_PASSWORD=change_me
+BUILD_INFO=local-dev
+PORT=8080
+THREADS=4
+BASE_URL=/
+MOUNT_DATA=./volumes/data
+MOUNT_SQL=./volumes/postgres
--- a/stacks/szurubooru/docker-compose.yml
+++ b/stacks/szurubooru/docker-compose.yml
@ -0,0 +1,46 @@
+## Example Docker Compose configuration
+##
+## Use this as a template to set up docker compose, or as guide to set up other
+## orchestration services
+services:
+
+  server:
+    image: szurubooru/server:latest
+    depends_on:
+      - sql
+    environment:
+      ## These should be the names of the dependent containers listed below,
+      ## or FQDNs/IP addresses if these services are running outside of Docker
+      POSTGRES_HOST: sql
+      ## Credentials for database:
+      POSTGRES_USER:
+      POSTGRES_PASSWORD:
+      ## Commented Values are Default:
+      #POSTGRES_DB: defaults to same as POSTGRES_USER
+      #POSTGRES_PORT: 5432
+      #LOG_SQL: 0 (1 for verbose SQL logs)
+      THREADS:
+    volumes:
+      - "${MOUNT_DATA}:/data"
+      - "./server/config.yaml:/opt/app/config.yaml"
+
+  client:
+    image: szurubooru/client:latest
+    depends_on:
+      - server
+    environment:
+      BACKEND_HOST: server
+      BASE_URL:
+    volumes:
+      - "${MOUNT_DATA}:/data:ro"
+    ports:
+      - "${PORT}:80"
+
+  sql:
+    image: postgres:11-alpine
+    restart: unless-stopped
+    environment:
+      POSTGRES_USER:
+      POSTGRES_PASSWORD:
+    volumes:
+      - "${MOUNT_SQL}:/var/lib/postgresql/data"
--- a/stacks/szurubooru/server/config.yaml
+++ b/stacks/szurubooru/server/config.yaml
@ -0,0 +1,3 @@
+name: Hyrax Hub
+domain: http://localhost:8080
+secret: "CHANGE_ME"
Author	SHA1	Message	Date
knight	aeac252491	Deploy all stacks on dev Some checks failed Deploy Stacks / deploy-prod (push) Has been skipped Details Deploy Stacks / deploy-dev (push) Failing after 17s Details	2025-12-31 20:12:13 -05:00
knight	13989e2b59	Add dev stacks	2025-12-31 20:11:44 -05:00
				`@ -0,0 +1,2 @@`
				`"""Network MCP frontend package (used for local testing/imports)."""`
				`@ -0,0 +1,2 @@`
				`"""Unit tests for the Network MCP frontend."""`