import os import time import json import datetime import sys import yaml # Ensure we can import from common sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..')) from collectors.common.es_client import get_es_client from collectors.common.opnsense_client import get_opnsense_client from collectors.common.logging_config import setup_logging logger = setup_logging("opnsense_collector") def load_static_metadata(path="/app/static/host_metadata.json"): if not os.path.exists(path): logger.info(f"No static metadata found at {path}") return {} try: with open(path, 'r') as f: return json.load(f) except Exception as e: logger.error(f"Failed to load static metadata: {e}") return {} def load_inventory_targets(path=None): path = path or os.getenv("INVENTORY_FILE", "/app/inventory_targets.yml") if not os.path.exists(path): logger.info(f"No inventory targets found at {path}") return [] try: with open(path, 'r') as f: data = yaml.safe_load(f) or {} return data.get("inventory_targets", []) except Exception as e: logger.error(f"Failed to load inventory targets: {e}") return [] def normalize_mac(mac): if not mac: return None return mac.lower().replace("-", ":") def get_now_iso(): return datetime.datetime.now(datetime.timezone.utc).isoformat() def main(): es = get_es_client() opn = get_opnsense_client() interval = int(os.getenv("COLLECTOR_INTERVAL_SECONDS", "60")) logger.info("Starting OPNsense collector loop...") while True: try: start_time = time.time() ts_iso = get_now_iso() # 1. Fetch Data dhcp_v4 = opn.get_dhcp_leases_v4() arp_table = opn.get_arp_table() dns_overrides = opn.get_dns_overrides() static_meta = load_static_metadata() inventory_entries = load_inventory_targets() # 2. Process Data -> hosts map # Key: identifier (mac:xx... or ip:xxx) hosts_map = {} def create_host_entry(identifier): return { "id": identifier, "macs": set(), "ips": set(), "hostnames": set(), "sources": set(), "preferred_name": None, "inventory_notes": None, "inventory_ports": None } def get_or_create_host(mac): norm_mac = normalize_mac(mac) if not norm_mac: return None identifier = f"mac:{norm_mac}" host = hosts_map.setdefault(identifier, create_host_entry(identifier)) host["macs"].add(norm_mac) return host def get_or_create_host_by_ip(ip): if not ip: return None identifier = f"ip:{ip}" host = hosts_map.setdefault(identifier, create_host_entry(identifier)) host["ips"].add(ip) return host # Process DHCP for lease in dhcp_v4: # Structure depends on OPNsense version, but usually has 'mac', 'address', 'hostname' mac = lease.get('mac') or lease.get('hw_address') ip = lease.get('address') or lease.get('ip') hostname = lease.get('hostname') host = get_or_create_host(mac) if host: if ip: host["ips"].add(ip) if hostname: host["hostnames"].add(hostname) host["sources"].add("opnsense-dhcp") # Process ARP for entry in arp_table: # Structure: 'mac', 'ip', 'hostname' (sometimes) mac = entry.get('mac') ip = entry.get('ip') hostname = entry.get('hostname') host = get_or_create_host(mac) if host: if ip: host["ips"].add(ip) if hostname and hostname != "?": host["hostnames"].add(hostname) host["sources"].add("opnsense-arp") # Process DNS Overrides (mapped by IP when possible) ip_to_identifier = {} for identifier, h in hosts_map.items(): for ip in h["ips"]: ip_to_identifier[ip] = identifier for override in dns_overrides: ip = override.get('ip') domain = override.get('domain') hostname = override.get('hostname') full_fqdn = f"{hostname}.{domain}" if hostname and domain else hostname if ip and ip in ip_to_identifier: identifier = ip_to_identifier[ip] if full_fqdn: hosts_map[identifier]["hostnames"].add(full_fqdn) hosts_map[identifier]["sources"].add("opnsense-dns") # Process inventory targets (by IP) for entry in inventory_entries: ip = entry.get("ip") if not ip: continue identifier = ip_to_identifier.get(ip) if identifier: host = hosts_map.get(identifier) if host is None: host = get_or_create_host_by_ip(ip) ip_to_identifier[ip] = host["id"] else: host = get_or_create_host_by_ip(ip) if host: ip_to_identifier[ip] = host["id"] if not host: continue hostname = entry.get("hostname") name = entry.get("name") if hostname: host["hostnames"].add(hostname) if name: host["hostnames"].add(name) host["preferred_name"] = name host["sources"].add("inventory") notes = entry.get("notes") if notes: host["inventory_notes"] = notes ports = entry.get("ports") if ports: host["inventory_ports"] = ports # 3. Build Actions actions = [] today = datetime.datetime.now().strftime("%Y.%m.%d") event_index = f"network-events-{today}" for _, h_data in hosts_map.items(): name = h_data.get("preferred_name") if not name and h_data["hostnames"]: name = next(iter(h_data["hostnames"])) final_host = { "host": { "id": h_data["id"], "macs": list(h_data["macs"]), "ips": list(h_data["ips"]), "name": name, "hostnames": list(h_data["hostnames"]), "last_seen": ts_iso, "sources": list(h_data["sources"]) } } if h_data.get("inventory_notes"): final_host["host"]["notes"] = h_data["inventory_notes"] if h_data.get("inventory_ports"): final_host["host"]["expected_ports"] = h_data["inventory_ports"] # Merge Static Metadata if h_data["id"] in static_meta: meta = static_meta[h_data["id"]] # Merge fields for k, v in meta.items(): final_host["host"][k] = v # 3a. Event Document event_doc = { "@timestamp": ts_iso, "source": "opnsense", "scan_id": f"opnsense_{ts_iso}", "host": final_host["host"] } actions.append({ "_index": event_index, "_op_type": "index", "_source": event_doc }) # 3b. Host Upsert # We use a script upsert or doc_as_upsert. # doc_as_upsert is simpler but replaces lists. # Ideally we merge lists (ips, macs), but for now replacing with latest 'truth' from OPNsense + Static is okay. # However, we don't want to lose 'ports' info from Nmap. # So we must NOT overwrite 'ports'. host_update_doc = { "host": final_host["host"] } actions.append({ "_index": "network-hosts", "_op_type": "update", "_id": h_data["id"], "doc": host_update_doc, "doc_as_upsert": True }) # 4. Send to ES if actions: logger.info(f"Sending {len(actions)} actions to Elasticsearch...") success, failed = es.bulk_index(actions) else: logger.info("No hosts found or no actions generated.") elapsed = time.time() - start_time sleep_time = max(0, interval - elapsed) logger.info(f"Cycle done in {elapsed:.2f}s. Sleeping for {sleep_time:.2f}s") time.sleep(sleep_time) except Exception as e: logger.error(f"Error in main loop: {e}") time.sleep(10) if __name__ == "__main__": main()