Initial commit: network MCP service

This commit is contained in:
2025-12-29 10:50:28 -05:00
commit 6da9fa7a9b
27 changed files with 3084 additions and 0 deletions

View File

View File

@@ -0,0 +1,55 @@
import base64
from typing import Optional, Tuple
def _clean(value: Optional[str]) -> str:
"""
Normalize values coming from env files where quotes might be preserved.
"""
if not value:
return ""
return value.strip().strip('"').strip()
def resolve_api_key(api_id: Optional[str], api_key: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
"""
Accept various API key formats and return (api_id, api_key).
Supported formats:
- Explicit ES_API_ID and ES_API_KEY values.
- ES_API_KEY that already contains \"id:key\".
- ES_API_KEY that is the base64 encoding of \"id:key\".
"""
cleaned_id = _clean(api_id)
cleaned_key = _clean(api_key)
if cleaned_id and cleaned_key:
return cleaned_id, cleaned_key
if not cleaned_key:
return None, None
# Raw "id:key" format
if ":" in cleaned_key:
potential_id, potential_key = cleaned_key.split(":", 1)
if potential_id and potential_key:
return potential_id, potential_key
# Base64 encoded "id:key" format
try:
decoded = base64.b64decode(cleaned_key, validate=True).decode()
if ":" in decoded:
potential_id, potential_key = decoded.split(":", 1)
if potential_id and potential_key:
return potential_id, potential_key
except Exception:
pass
return None, None
def build_api_key_header(api_id: str, api_key: str) -> str:
"""
Return the value for the Authorization header using ApiKey auth.
"""
token = base64.b64encode(f"{api_id}:{api_key}".encode()).decode()
return f"ApiKey {token}"

View File

@@ -0,0 +1,85 @@
import os
import time
import urllib3
from elasticsearch import Elasticsearch, helpers
from .es_auth import resolve_api_key
from .logging_config import setup_logging
# Suppress insecure request warnings if SSL verification is disabled
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
logger = setup_logging("es_client")
class ESClient:
def __init__(self):
self.url = os.getenv("ES_URL", "http://localhost:9200")
env_api_id = os.getenv("ES_API_ID")
env_api_key = os.getenv("ES_API_KEY")
self.api_id, self.api_key = resolve_api_key(env_api_id, env_api_key)
self.user = os.getenv("ES_USER", "elastic")
self.password = os.getenv("ES_PASS", "changeme")
self.verify_ssl = os.getenv("ES_VERIFY_SSL", "true").lower() == "true"
if self.api_id and self.api_key:
# Use API key authentication
self.client = Elasticsearch(
self.url,
api_key=(self.api_id, self.api_key),
verify_certs=self.verify_ssl,
ssl_show_warn=False
)
logger.info("Using Elasticsearch API key authentication.")
else:
# Fallback to basic auth
self.client = Elasticsearch(
self.url,
basic_auth=(self.user, self.password),
verify_certs=self.verify_ssl,
ssl_show_warn=False
)
logger.info("Using Elasticsearch basic authentication.")
def check_connection(self):
try:
return self.client.info()
except Exception as e:
logger.error(f"Failed to connect to Elasticsearch: {e}")
raise
def bulk_index(self, actions):
"""
Bulk index a list of actions.
actions: list of dicts compatible with elasticsearch.helpers.bulk
"""
if not actions:
return 0, []
try:
success, failed = helpers.bulk(self.client, actions, stats_only=False, raise_on_error=False)
if failed:
logger.warning(f"Bulk index had failures: {len(failed)} items failed.")
for item in failed[:5]: # Log first 5 failures
logger.warning(f"Failure sample: {item}")
else:
logger.info(f"Bulk index successful: {success} items.")
return success, failed
except Exception as e:
logger.error(f"Bulk index exception: {e}")
raise
def search_hosts(self, index="network-hosts", query=None, size=1000):
"""
Search for hosts in network-hosts index.
"""
if query is None:
query = {"match_all": {}}
try:
resp = self.client.search(index=index, query=query, size=size)
return [hit["_source"] for hit in resp["hits"]["hits"]]
except Exception as e:
logger.error(f"Search failed: {e}")
return []
def get_es_client():
return ESClient()

View File

@@ -0,0 +1,21 @@
import logging
import os
import sys
def setup_logging(name: str = "collector") -> logging.Logger:
"""
Sets up a structured logger.
"""
logger = logging.getLogger(name)
level = os.getenv("LOG_LEVEL", "INFO").upper()
logger.setLevel(level)
if not logger.handlers:
handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter(
'%(asctime)s [%(levelname)s] %(name)s: %(message)s'
)
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger

View File

@@ -0,0 +1,131 @@
import subprocess
import xml.etree.ElementTree as ET
import shutil
from typing import List, Dict, Optional
from .logging_config import setup_logging
logger = setup_logging("nmap_parser")
def run_nmap_scan(ips: List[str], extra_args: Optional[List[str]] = None) -> List[Dict]:
"""
Run nmap on the given IPs and return a list of parsed host dicts.
"""
if not ips:
return []
if not shutil.which("nmap"):
logger.error("nmap binary not found in PATH")
return []
# Default args: -oX - (XML to stdout)
cmd = ["nmap", "-oX", "-"]
if extra_args:
cmd.extend(extra_args)
# Append IPs
cmd.extend(ips)
logger.info(f"Running nmap command: {' '.join(cmd)}")
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
xml_output = result.stdout
return parse_nmap_xml(xml_output)
except subprocess.CalledProcessError as e:
logger.error(f"Nmap failed: {e.stderr}")
return []
except Exception as e:
logger.error(f"Error running nmap: {e}")
return []
def parse_nmap_xml(xml_string: str) -> List[Dict]:
"""
Parse Nmap XML output into our internal host/port structure.
"""
try:
root = ET.fromstring(xml_string)
except ET.ParseError as e:
logger.error(f"Failed to parse Nmap XML: {e}")
return []
hosts = []
for host_node in root.findall("host"):
# Helper to find basic info
ip = None
mac = None
hostname = None
vendor = None
# Addresses
for addr in host_node.findall("address"):
addr_type = addr.get("addrtype")
if addr_type == "ipv4":
ip = addr.get("addr")
elif addr_type == "mac":
mac = addr.get("addr")
vendor = addr.get("vendor")
# Hostnames
hostnames_node = host_node.find("hostnames")
if hostnames_node is not None:
# Pick first for now
hn = hostnames_node.find("hostname")
if hn is not None:
hostname = hn.get("name")
# Ports
ports = []
ports_node = host_node.find("ports")
if ports_node is not None:
for port_node in ports_node.findall("port"):
state_node = port_node.find("state")
state = state_node.get("state") if state_node is not None else "unknown"
# Only care about open ports usually, but keep all for now if needed
if state != "open":
continue
port_id = int(port_node.get("portid"))
protocol = port_node.get("protocol")
service_node = port_node.find("service")
service_name = service_node.get("name") if service_node is not None else "unknown"
product = service_node.get("product") if service_node is not None else None
version = service_node.get("version") if service_node is not None else None
service_def = {
"name": service_name,
}
if product: service_def["product"] = product
if version: service_def["version"] = version
ports.append({
"port": port_id,
"proto": protocol,
"state": state,
"service": service_def
})
# OS detection (basic)
os_match = None
os_node = host_node.find("os")
if os_node is not None:
os_match_node = os_node.find("osmatch")
if os_match_node is not None:
os_match = {
"name": os_match_node.get("name"),
"accuracy": os_match_node.get("accuracy")
}
host_data = {
"ip": ip,
"mac": mac, # might be None if scanning remote segment
"hostname": hostname,
"vendor": vendor,
"ports": ports,
"os_match": os_match
}
hosts.append(host_data)
return hosts

View File

@@ -0,0 +1,105 @@
import os
import requests
import json
import ipaddress
from .logging_config import setup_logging
logger = setup_logging("opnsense_client")
class OPNsenseClient:
def __init__(self):
self.base_url = os.getenv("OPNSENSE_URL", "https://192.168.1.1").rstrip('/')
self.api_key = os.getenv("OPNSENSE_API_KEY")
self.api_secret = os.getenv("OPNSENSE_API_SECRET")
self.verify_ssl = os.getenv("ES_VERIFY_SSL", "true").lower() == "true" # Reusing verify flag or add explicit OPNSENSE_VERIFY_SSL
if not self.api_key or not self.api_secret:
logger.warning("OPNSENSE_API_KEY or OPNSENSE_API_SECRET not set. API calls will fail.")
def _get(self, endpoint, params=None):
url = f"{self.base_url}{endpoint}"
try:
response = requests.get(
url,
auth=(self.api_key, self.api_secret),
verify=self.verify_ssl,
params=params,
timeout=10
)
response.raise_for_status()
return response.json()
except Exception as e:
logger.error(f"Failed to fetch {url}: {e}")
return {}
def get_dhcp_leases_v4(self):
# Endpoint: /api/dhcpv4/leases/search
# Note: 'search' endpoints in OPNsense often expect POST or GET with params for filtering.
# Often a simple GET works for 'searchLeases' or similar.
# Standard OPNsense API for leases might be under /api/dhcpv4/leases/searchLeases
# Let's try the standard search endpoint.
data = self._get("/api/dhcpv4/leases/searchLease")
# API return structure usually: {"rows": [...], "total": ...}
return data.get("rows", [])
def get_arp_table(self):
# Endpoint: /api/diagnostics/arp/search
# This endpoint returns the ARP table.
data = self._get("/api/diagnostics/interface/getArp")
# Structure varies, let's assume standard response list or rows
# If the standard plugin is used, it might be /api/diagnostics/interface/getArp
# Or /api/diagnostics/network/arp ...
# NOTE: OPNsense API paths can be tricky. /api/diagnostics/interface/getArp is a common one.
# It returns a list directly or a dict with rows.
# Let's assume list of dicts or {"rows": []}
if isinstance(data, list):
return data
return data.get("rows", [])
def get_dns_overrides(self):
# Endpoint: /api/unbound/settings/searchHostOverride
data = self._get("/api/unbound/settings/searchHostOverride")
return data.get("rows", [])
def get_vlan_networks(self):
"""
Build a list of IPv4 networks (CIDRs) from the routing table, grouped by interface description.
"""
routes = self._get("/api/diagnostics/interface/getRoutes")
networks = []
if not isinstance(routes, list):
return networks
seen = set()
for route in routes:
if route.get("proto") != "ipv4":
continue
destination = route.get("destination")
if not destination or "/" not in destination or destination == "default":
continue
desc = route.get("intf_description")
if not desc:
continue
try:
network = ipaddress.ip_network(destination, strict=False)
except ValueError:
continue
# Skip host routes (/32) which are usually static peers
if network.prefixlen == 32:
continue
if network.prefixlen < 16:
continue
key = (desc, str(network))
if key in seen:
continue
seen.add(key)
networks.append({
"key": desc,
"name": desc,
"cidr": str(network)
})
return networks
def get_opnsense_client():
return OPNsenseClient()

View File

@@ -0,0 +1,14 @@
FROM python:3.11-slim
RUN apt-get update && apt-get install -y nmap && rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY collectors/common /app/collectors/common
COPY collectors/nmap_collector /app/collectors/nmap_collector
ENV PYTHONPATH=/app
RUN pip install requests elasticsearch==8.15.1
CMD ["python", "collectors/nmap_collector/main.py"]

View File

@@ -0,0 +1,378 @@
import os
import time
import datetime
import sys
import json
import shlex
# Ensure we can import from common
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
from collectors.common.es_client import get_es_client
from collectors.common.opnsense_client import get_opnsense_client
from collectors.common.nmap_parser import run_nmap_scan
from collectors.common.logging_config import setup_logging
logger = setup_logging("nmap_collector")
def get_now_iso():
return datetime.datetime.now(datetime.timezone.utc).isoformat()
def chunk_list(lst, n):
for i in range(0, len(lst), n):
yield lst[i:i + n]
def should_scan_vlan(vlan, allowlist):
if not allowlist:
return True
name = (vlan.get("name") or "").strip()
key = (vlan.get("key") or "").strip()
return name in allowlist or key in allowlist
def build_discovery_update_action(host_id, mac, ip, hostname, vendor, ts_iso):
mac_norm = mac.lower() if mac else None
upsert_host = {
"host": {
"id": host_id,
"macs": [mac_norm] if mac_norm else [],
"ips": [ip] if ip else [],
"name": hostname,
"hostnames": [hostname] if hostname else [],
"vendor": vendor,
"sources": ["nmap-discovery"],
"last_seen": ts_iso,
"first_seen": ts_iso
}
}
script_source = """
if (ctx._source.host == null) { ctx._source.host = [:]; }
if (ctx._source.host.macs == null) { ctx._source.host.macs = []; }
if (ctx._source.host.ips == null) { ctx._source.host.ips = []; }
if (ctx._source.host.hostnames == null) { ctx._source.host.hostnames = []; }
if (ctx._source.host.sources == null) { ctx._source.host.sources = []; }
if (params.mac != null && !ctx._source.host.macs.contains(params.mac)) {
ctx._source.host.macs.add(params.mac);
}
if (params.ip != null && !ctx._source.host.ips.contains(params.ip)) {
ctx._source.host.ips.add(params.ip);
}
if (params.hostname != null && !ctx._source.host.hostnames.contains(params.hostname)) {
ctx._source.host.hostnames.add(params.hostname);
}
if (!ctx._source.host.sources.contains(params.source_tag)) {
ctx._source.host.sources.add(params.source_tag);
}
ctx._source.host.last_seen = params.ts;
if (ctx._source.host.name == null && params.hostname != null) {
ctx._source.host.name = params.hostname;
}
if (params.vendor != null && (ctx._source.host.vendor == null || ctx._source.host.vendor == \"\")) {
ctx._source.host.vendor = params.vendor;
}
"""
return {
"_index": "network-hosts",
"_op_type": "update",
"_id": host_id,
"script": {
"source": script_source,
"lang": "painless",
"params": {
"mac": mac_norm,
"ip": ip,
"hostname": hostname,
"vendor": vendor,
"ts": ts_iso,
"source_tag": "nmap-discovery"
}
},
"upsert": upsert_host
}
def run_vlan_discovery(es, opnsense_client, discovery_args, vlan_filter):
networks = opnsense_client.get_vlan_networks()
if not networks:
logger.info("VLAN discovery skipped: OPNsense returned no interfaces.")
return
scoped_networks = [n for n in networks if should_scan_vlan(n, vlan_filter)]
if not scoped_networks:
logger.info("VLAN discovery skipped: no interfaces matched NMAP_DISCOVERY_VLANS.")
return
actions = []
today = datetime.datetime.now().strftime("%Y.%m.%d")
event_index = f"network-events-{today}"
for vlan in scoped_networks:
cidr = vlan.get("cidr")
if not cidr:
continue
logger.info(f"VLAN discovery scan for {vlan.get('name')} ({cidr})")
scan_ts = get_now_iso()
scan_id = f"nmap_discovery_{vlan.get('name')}_{scan_ts}"
results = run_nmap_scan([cidr], discovery_args)
for res in results:
ip = res.get("ip")
if not ip:
continue
mac = res.get("mac")
hostname = res.get("hostname")
vendor = res.get("vendor")
host_id = f"mac:{mac.lower()}" if mac else None
event_doc = {
"@timestamp": scan_ts,
"source": "nmap-discovery",
"scan_id": scan_id,
"vlan": vlan.get("name"),
"cidr": cidr,
"host": {
"id": host_id,
"ip": ip,
"mac": mac,
"hostname": hostname,
"vendor": vendor
}
}
actions.append({
"_index": event_index,
"_op_type": "index",
"_source": event_doc
})
if host_id:
actions.append(
build_discovery_update_action(host_id, mac, ip, hostname, vendor, scan_ts)
)
if actions:
logger.info(f"VLAN discovery produced {len(actions)} Elasticsearch actions.")
es.bulk_index(actions)
else:
logger.info("VLAN discovery finished with no hosts discovered.")
def main():
es = get_es_client()
opnsense_client = get_opnsense_client()
interval = int(os.getenv("NMAP_INTERVAL_SECONDS", "300"))
full_batch_size = int(os.getenv("NMAP_BATCH_SIZE", "10"))
quick_batch_size = int(os.getenv("NMAP_QUICK_BATCH_SIZE", "30"))
port_range = os.getenv("NMAP_PORT_RANGE", "1-1024") # Full scan range
discovery_enabled = os.getenv("NMAP_DISCOVERY_ENABLED", "false").lower() == "true"
discovery_interval = int(os.getenv("NMAP_DISCOVERY_INTERVAL_SECONDS", "3600"))
discovery_vlan_filter = [v.strip() for v in os.getenv("NMAP_DISCOVERY_VLANS", "").split(",") if v.strip()]
discovery_extra_args = os.getenv("NMAP_DISCOVERY_EXTRA_ARGS", "-sn -n").strip()
if discovery_extra_args:
discovery_extra_args = shlex.split(discovery_extra_args)
else:
discovery_extra_args = ["-sn", "-n"]
discovery_last_run = time.time() - discovery_interval if discovery_enabled else 0.0
full_interval = int(os.getenv("NMAP_FULL_INTERVAL_SECONDS", "86400"))
quick_extra_str = os.getenv("NMAP_QUICK_EXTRA_ARGS", "-sS --top-ports 100 -T4 --open -Pn").strip()
quick_extra_args = shlex.split(quick_extra_str) if quick_extra_str else ["-sS", "--top-ports", "100", "-T4", "--open", "-Pn"]
last_full_scan = time.time()
# Construct base nmap args
# -sV for service version, -O for OS detection (requires root usually), --open to only show open
# We run as root in docker (usually) or need capabilities.
extra_args = ["-sV", "--open"]
# Check if port_range looks like a range or specific ports
if port_range:
extra_args.extend(["-p", port_range])
# Add user provided extra args
user_args = os.getenv("NMAP_EXTRA_ARGS", "")
if user_args:
extra_args.extend(user_args.split())
logger.info("Starting Nmap collector loop...")
while True:
try:
start_time = time.time()
ts_iso = get_now_iso()
now = time.time()
use_full_scan = (now - last_full_scan) >= full_interval
scan_type = "full" if use_full_scan else "quick"
scan_id = f"nmap_{scan_type}_{ts_iso}"
current_batch_size = full_batch_size if use_full_scan else quick_batch_size
scan_args = extra_args if use_full_scan else quick_extra_args
if use_full_scan:
last_full_scan = now
logger.info("Running scheduled full service scan.")
else:
logger.info("Running quick common-port sweep.")
if discovery_enabled and (time.time() - discovery_last_run) >= discovery_interval:
run_vlan_discovery(es, opnsense_client, discovery_extra_args, discovery_vlan_filter)
discovery_last_run = time.time()
# 1. Get targets from ES
# We only want hosts that have an IP.
hosts = es.search_hosts(index="network-hosts", size=1000)
# Extract IPs to scan. Map IP -> Host ID to correlate back
targets = []
ip_to_host_id = {}
for h in hosts:
# h is {"host": {...}, "ports": [...]}
host_info = h.get("host", {})
hid = host_info.get("id")
ips = host_info.get("ips", [])
if not hid or not ips:
continue
# Pick the "best" IP? Or scan all?
# Scaning all might be duplicate work if they point to same box.
# Let's pick the first one for now.
target_ip = ips[0]
targets.append(target_ip)
ip_to_host_id[target_ip] = hid
logger.info(f"Found {len(targets)} targets to scan ({scan_type}).")
total_processed = 0
logger.info(f"Scanning {scan_type} run with {len(targets)} targets.")
scan_results = run_nmap_scan(targets, scan_args)
actions = []
today = datetime.datetime.now().strftime("%Y.%m.%d")
event_index = f"network-events-{today}"
for res in scan_results:
ip = res.get("ip")
if not ip or ip not in ip_to_host_id:
continue
hid = ip_to_host_id[ip]
total_processed += 1
for p in res["ports"]:
p["last_seen"] = ts_iso
p["last_scan_id"] = scan_id
event_doc = {
"@timestamp": ts_iso,
"source": "nmap",
"scan_id": scan_id,
"host": {"id": hid, "ip": ip},
"ports": res["ports"],
"os": res.get("os_match")
}
actions.append({
"_index": event_index,
"_op_type": "index",
"_source": event_doc
})
script_source = """
if (ctx._source.host == null) { ctx._source.host = [:]; }
if (ctx._source.host.sources == null) { ctx._source.host.sources = []; }
if (!ctx._source.host.sources.contains('nmap')) {
ctx._source.host.sources.add('nmap');
}
ctx._source.host.last_seen = params.ts;
if (params.os != null) {
ctx._source.host.os = params.os;
}
if (ctx._source.ports == null) {
ctx._source.ports = [];
}
for (new_p in params.new_ports) {
boolean found = false;
for (old_p in ctx._source.ports) {
if (old_p.port == new_p.port && old_p.proto == new_p.proto) {
old_p.last_seen = params.ts;
old_p.state = new_p.state;
old_p.service = new_p.service;
old_p.last_scan_id = params.scan_id;
found = true;
break;
}
}
if (!found) {
new_p.first_seen = params.ts;
ctx._source.ports.add(new_p);
}
}
"""
actions.append({
"_index": "network-hosts",
"_op_type": "update",
"_id": hid,
"script": {
"source": script_source,
"lang": "painless",
"params": {
"ts": ts_iso,
"os": res.get("os_match"),
"new_ports": res["ports"],
"scan_id": scan_id
}
}
})
for p in res["ports"]:
svc_id = f"{hid}:{p['proto']}:{p['port']}"
svc_script = """
ctx._source.last_seen = params.ts;
ctx._source.state = params.state;
ctx._source.service = params.service;
if (ctx._source.first_seen == null) {
ctx._source.first_seen = params.ts;
}
"""
actions.append({
"_index": "network-services",
"_op_type": "update",
"_id": svc_id,
"script": {
"source": svc_script,
"lang": "painless",
"params": {
"ts": ts_iso,
"state": p["state"],
"service": p["service"]
}
},
"upsert": {
"host_id": hid,
"host_ip": ip,
"port": p["port"],
"proto": p["proto"],
"service": p["service"],
"state": p["state"],
"last_seen": ts_iso,
"first_seen": ts_iso,
"sources": ["nmap"]
}
})
if actions:
es.bulk_index(actions)
elapsed = time.time() - start_time
sleep_time = max(0, interval - elapsed)
logger.info(f"Nmap {scan_type} cycle done. Scanned {total_processed} hosts in {elapsed:.2f}s. Sleeping {sleep_time:.2f}s")
time.sleep(sleep_time)
except Exception as e:
logger.error(f"Error in Nmap loop: {e}")
time.sleep(10)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,14 @@
FROM python:3.11-slim
WORKDIR /app
COPY collectors/common /app/collectors/common
COPY collectors/opnsense_collector /app/collectors/opnsense_collector
# We need to make sure the module path works.
# The main.py does sys.path.append, but cleanest is to set PYTHONPATH.
ENV PYTHONPATH=/app
RUN pip install requests elasticsearch==8.15.1 pyyaml
CMD ["python", "collectors/opnsense_collector/main.py"]

View File

@@ -0,0 +1,261 @@
import os
import time
import json
import datetime
import sys
import yaml
# Ensure we can import from common
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
from collectors.common.es_client import get_es_client
from collectors.common.opnsense_client import get_opnsense_client
from collectors.common.logging_config import setup_logging
logger = setup_logging("opnsense_collector")
def load_static_metadata(path="/app/static/host_metadata.json"):
if not os.path.exists(path):
logger.info(f"No static metadata found at {path}")
return {}
try:
with open(path, 'r') as f:
return json.load(f)
except Exception as e:
logger.error(f"Failed to load static metadata: {e}")
return {}
def load_inventory_targets(path=None):
path = path or os.getenv("INVENTORY_FILE", "/app/inventory_targets.yml")
if not os.path.exists(path):
logger.info(f"No inventory targets found at {path}")
return []
try:
with open(path, 'r') as f:
data = yaml.safe_load(f) or {}
return data.get("inventory_targets", [])
except Exception as e:
logger.error(f"Failed to load inventory targets: {e}")
return []
def normalize_mac(mac):
if not mac:
return None
return mac.lower().replace("-", ":")
def get_now_iso():
return datetime.datetime.now(datetime.timezone.utc).isoformat()
def main():
es = get_es_client()
opn = get_opnsense_client()
interval = int(os.getenv("COLLECTOR_INTERVAL_SECONDS", "60"))
logger.info("Starting OPNsense collector loop...")
while True:
try:
start_time = time.time()
ts_iso = get_now_iso()
# 1. Fetch Data
dhcp_v4 = opn.get_dhcp_leases_v4()
arp_table = opn.get_arp_table()
dns_overrides = opn.get_dns_overrides()
static_meta = load_static_metadata()
inventory_entries = load_inventory_targets()
# 2. Process Data -> hosts map
# Key: identifier (mac:xx... or ip:xxx)
hosts_map = {}
def create_host_entry(identifier):
return {
"id": identifier,
"macs": set(),
"ips": set(),
"hostnames": set(),
"sources": set(),
"preferred_name": None,
"inventory_notes": None,
"inventory_ports": None
}
def get_or_create_host(mac):
norm_mac = normalize_mac(mac)
if not norm_mac:
return None
identifier = f"mac:{norm_mac}"
host = hosts_map.setdefault(identifier, create_host_entry(identifier))
host["macs"].add(norm_mac)
return host
def get_or_create_host_by_ip(ip):
if not ip:
return None
identifier = f"ip:{ip}"
host = hosts_map.setdefault(identifier, create_host_entry(identifier))
host["ips"].add(ip)
return host
# Process DHCP
for lease in dhcp_v4:
# Structure depends on OPNsense version, but usually has 'mac', 'address', 'hostname'
mac = lease.get('mac') or lease.get('hw_address')
ip = lease.get('address') or lease.get('ip')
hostname = lease.get('hostname')
host = get_or_create_host(mac)
if host:
if ip: host["ips"].add(ip)
if hostname: host["hostnames"].add(hostname)
host["sources"].add("opnsense-dhcp")
# Process ARP
for entry in arp_table:
# Structure: 'mac', 'ip', 'hostname' (sometimes)
mac = entry.get('mac')
ip = entry.get('ip')
hostname = entry.get('hostname')
host = get_or_create_host(mac)
if host:
if ip: host["ips"].add(ip)
if hostname and hostname != "?": host["hostnames"].add(hostname)
host["sources"].add("opnsense-arp")
# Process DNS Overrides (mapped by IP when possible)
ip_to_identifier = {}
for identifier, h in hosts_map.items():
for ip in h["ips"]:
ip_to_identifier[ip] = identifier
for override in dns_overrides:
ip = override.get('ip')
domain = override.get('domain')
hostname = override.get('hostname')
full_fqdn = f"{hostname}.{domain}" if hostname and domain else hostname
if ip and ip in ip_to_identifier:
identifier = ip_to_identifier[ip]
if full_fqdn:
hosts_map[identifier]["hostnames"].add(full_fqdn)
hosts_map[identifier]["sources"].add("opnsense-dns")
# Process inventory targets (by IP)
for entry in inventory_entries:
ip = entry.get("ip")
if not ip:
continue
identifier = ip_to_identifier.get(ip)
if identifier:
host = hosts_map.get(identifier)
if host is None:
host = get_or_create_host_by_ip(ip)
ip_to_identifier[ip] = host["id"]
else:
host = get_or_create_host_by_ip(ip)
if host:
ip_to_identifier[ip] = host["id"]
if not host:
continue
hostname = entry.get("hostname")
name = entry.get("name")
if hostname:
host["hostnames"].add(hostname)
if name:
host["hostnames"].add(name)
host["preferred_name"] = name
host["sources"].add("inventory")
notes = entry.get("notes")
if notes:
host["inventory_notes"] = notes
ports = entry.get("ports")
if ports:
host["inventory_ports"] = ports
# 3. Build Actions
actions = []
today = datetime.datetime.now().strftime("%Y.%m.%d")
event_index = f"network-events-{today}"
for _, h_data in hosts_map.items():
name = h_data.get("preferred_name")
if not name and h_data["hostnames"]:
name = next(iter(h_data["hostnames"]))
final_host = {
"host": {
"id": h_data["id"],
"macs": list(h_data["macs"]),
"ips": list(h_data["ips"]),
"name": name,
"hostnames": list(h_data["hostnames"]),
"last_seen": ts_iso,
"sources": list(h_data["sources"])
}
}
if h_data.get("inventory_notes"):
final_host["host"]["notes"] = h_data["inventory_notes"]
if h_data.get("inventory_ports"):
final_host["host"]["expected_ports"] = h_data["inventory_ports"]
# Merge Static Metadata
if h_data["id"] in static_meta:
meta = static_meta[h_data["id"]]
# Merge fields
for k, v in meta.items():
final_host["host"][k] = v
# 3a. Event Document
event_doc = {
"@timestamp": ts_iso,
"source": "opnsense",
"scan_id": f"opnsense_{ts_iso}",
"host": final_host["host"]
}
actions.append({
"_index": event_index,
"_op_type": "index",
"_source": event_doc
})
# 3b. Host Upsert
# We use a script upsert or doc_as_upsert.
# doc_as_upsert is simpler but replaces lists.
# Ideally we merge lists (ips, macs), but for now replacing with latest 'truth' from OPNsense + Static is okay.
# However, we don't want to lose 'ports' info from Nmap.
# So we must NOT overwrite 'ports'.
host_update_doc = {
"host": final_host["host"]
}
actions.append({
"_index": "network-hosts",
"_op_type": "update",
"_id": h_data["id"],
"doc": host_update_doc,
"doc_as_upsert": True
})
# 4. Send to ES
if actions:
logger.info(f"Sending {len(actions)} actions to Elasticsearch...")
success, failed = es.bulk_index(actions)
else:
logger.info("No hosts found or no actions generated.")
elapsed = time.time() - start_time
sleep_time = max(0, interval - elapsed)
logger.info(f"Cycle done in {elapsed:.2f}s. Sleeping for {sleep_time:.2f}s")
time.sleep(sleep_time)
except Exception as e:
logger.error(f"Error in main loop: {e}")
time.sleep(10)
if __name__ == "__main__":
main()