2025-12-31 20:11:44 -05:00

262 lines
9.6 KiB
Python

import os
import time
import json
import datetime
import sys
import yaml
# Ensure we can import from common
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
from collectors.common.es_client import get_es_client
from collectors.common.opnsense_client import get_opnsense_client
from collectors.common.logging_config import setup_logging
logger = setup_logging("opnsense_collector")
def load_static_metadata(path="/app/static/host_metadata.json"):
if not os.path.exists(path):
logger.info(f"No static metadata found at {path}")
return {}
try:
with open(path, 'r') as f:
return json.load(f)
except Exception as e:
logger.error(f"Failed to load static metadata: {e}")
return {}
def load_inventory_targets(path=None):
path = path or os.getenv("INVENTORY_FILE", "/app/inventory_targets.yml")
if not os.path.exists(path):
logger.info(f"No inventory targets found at {path}")
return []
try:
with open(path, 'r') as f:
data = yaml.safe_load(f) or {}
return data.get("inventory_targets", [])
except Exception as e:
logger.error(f"Failed to load inventory targets: {e}")
return []
def normalize_mac(mac):
if not mac:
return None
return mac.lower().replace("-", ":")
def get_now_iso():
return datetime.datetime.now(datetime.timezone.utc).isoformat()
def main():
es = get_es_client()
opn = get_opnsense_client()
interval = int(os.getenv("COLLECTOR_INTERVAL_SECONDS", "60"))
logger.info("Starting OPNsense collector loop...")
while True:
try:
start_time = time.time()
ts_iso = get_now_iso()
# 1. Fetch Data
dhcp_v4 = opn.get_dhcp_leases_v4()
arp_table = opn.get_arp_table()
dns_overrides = opn.get_dns_overrides()
static_meta = load_static_metadata()
inventory_entries = load_inventory_targets()
# 2. Process Data -> hosts map
# Key: identifier (mac:xx... or ip:xxx)
hosts_map = {}
def create_host_entry(identifier):
return {
"id": identifier,
"macs": set(),
"ips": set(),
"hostnames": set(),
"sources": set(),
"preferred_name": None,
"inventory_notes": None,
"inventory_ports": None
}
def get_or_create_host(mac):
norm_mac = normalize_mac(mac)
if not norm_mac:
return None
identifier = f"mac:{norm_mac}"
host = hosts_map.setdefault(identifier, create_host_entry(identifier))
host["macs"].add(norm_mac)
return host
def get_or_create_host_by_ip(ip):
if not ip:
return None
identifier = f"ip:{ip}"
host = hosts_map.setdefault(identifier, create_host_entry(identifier))
host["ips"].add(ip)
return host
# Process DHCP
for lease in dhcp_v4:
# Structure depends on OPNsense version, but usually has 'mac', 'address', 'hostname'
mac = lease.get('mac') or lease.get('hw_address')
ip = lease.get('address') or lease.get('ip')
hostname = lease.get('hostname')
host = get_or_create_host(mac)
if host:
if ip: host["ips"].add(ip)
if hostname: host["hostnames"].add(hostname)
host["sources"].add("opnsense-dhcp")
# Process ARP
for entry in arp_table:
# Structure: 'mac', 'ip', 'hostname' (sometimes)
mac = entry.get('mac')
ip = entry.get('ip')
hostname = entry.get('hostname')
host = get_or_create_host(mac)
if host:
if ip: host["ips"].add(ip)
if hostname and hostname != "?": host["hostnames"].add(hostname)
host["sources"].add("opnsense-arp")
# Process DNS Overrides (mapped by IP when possible)
ip_to_identifier = {}
for identifier, h in hosts_map.items():
for ip in h["ips"]:
ip_to_identifier[ip] = identifier
for override in dns_overrides:
ip = override.get('ip')
domain = override.get('domain')
hostname = override.get('hostname')
full_fqdn = f"{hostname}.{domain}" if hostname and domain else hostname
if ip and ip in ip_to_identifier:
identifier = ip_to_identifier[ip]
if full_fqdn:
hosts_map[identifier]["hostnames"].add(full_fqdn)
hosts_map[identifier]["sources"].add("opnsense-dns")
# Process inventory targets (by IP)
for entry in inventory_entries:
ip = entry.get("ip")
if not ip:
continue
identifier = ip_to_identifier.get(ip)
if identifier:
host = hosts_map.get(identifier)
if host is None:
host = get_or_create_host_by_ip(ip)
ip_to_identifier[ip] = host["id"]
else:
host = get_or_create_host_by_ip(ip)
if host:
ip_to_identifier[ip] = host["id"]
if not host:
continue
hostname = entry.get("hostname")
name = entry.get("name")
if hostname:
host["hostnames"].add(hostname)
if name:
host["hostnames"].add(name)
host["preferred_name"] = name
host["sources"].add("inventory")
notes = entry.get("notes")
if notes:
host["inventory_notes"] = notes
ports = entry.get("ports")
if ports:
host["inventory_ports"] = ports
# 3. Build Actions
actions = []
today = datetime.datetime.now().strftime("%Y.%m.%d")
event_index = f"network-events-{today}"
for _, h_data in hosts_map.items():
name = h_data.get("preferred_name")
if not name and h_data["hostnames"]:
name = next(iter(h_data["hostnames"]))
final_host = {
"host": {
"id": h_data["id"],
"macs": list(h_data["macs"]),
"ips": list(h_data["ips"]),
"name": name,
"hostnames": list(h_data["hostnames"]),
"last_seen": ts_iso,
"sources": list(h_data["sources"])
}
}
if h_data.get("inventory_notes"):
final_host["host"]["notes"] = h_data["inventory_notes"]
if h_data.get("inventory_ports"):
final_host["host"]["expected_ports"] = h_data["inventory_ports"]
# Merge Static Metadata
if h_data["id"] in static_meta:
meta = static_meta[h_data["id"]]
# Merge fields
for k, v in meta.items():
final_host["host"][k] = v
# 3a. Event Document
event_doc = {
"@timestamp": ts_iso,
"source": "opnsense",
"scan_id": f"opnsense_{ts_iso}",
"host": final_host["host"]
}
actions.append({
"_index": event_index,
"_op_type": "index",
"_source": event_doc
})
# 3b. Host Upsert
# We use a script upsert or doc_as_upsert.
# doc_as_upsert is simpler but replaces lists.
# Ideally we merge lists (ips, macs), but for now replacing with latest 'truth' from OPNsense + Static is okay.
# However, we don't want to lose 'ports' info from Nmap.
# So we must NOT overwrite 'ports'.
host_update_doc = {
"host": final_host["host"]
}
actions.append({
"_index": "network-hosts",
"_op_type": "update",
"_id": h_data["id"],
"doc": host_update_doc,
"doc_as_upsert": True
})
# 4. Send to ES
if actions:
logger.info(f"Sending {len(actions)} actions to Elasticsearch...")
success, failed = es.bulk_index(actions)
else:
logger.info("No hosts found or no actions generated.")
elapsed = time.time() - start_time
sleep_time = max(0, interval - elapsed)
logger.info(f"Cycle done in {elapsed:.2f}s. Sleeping for {sleep_time:.2f}s")
time.sleep(sleep_time)
except Exception as e:
logger.error(f"Error in main loop: {e}")
time.sleep(10)
if __name__ == "__main__":
main()