Respect external filter in metrics and graph
Some checks failed
docker-build / build (push) Has been cancelled

This commit is contained in:
2025-11-20 09:54:41 -05:00
parent b0c9d319ef
commit 93774c025f
3 changed files with 161 additions and 40 deletions

View File

@@ -94,7 +94,7 @@ def _ensure_client(config: AppConfig) -> "Elasticsearch":
return Elasticsearch(config.elastic.url, **kwargs)
def metrics_payload(data_root: Path) -> Dict[str, Any]:
def metrics_payload(data_root: Path, include_external: bool = True) -> Dict[str, Any]:
total_items = 0
channel_counter: Counter = Counter()
channel_name_map: Dict[str, str] = {}
@@ -117,7 +117,8 @@ def metrics_payload(data_root: Path) -> Dict[str, Any]:
doc = json.load(handle)
except Exception:
continue
if not include_external and doc.get("external_reference"):
continue
total_items += 1
channel_id = doc.get("channel_id")
@@ -180,6 +181,7 @@ def elastic_metrics_payload(
*,
channel_field_candidates: Optional[List[str]] = None,
debug: bool = False,
include_external: bool = True,
) -> Dict[str, Any]:
if channel_field_candidates is None:
channel_field_candidates = ["channel_id.keyword", "channel_id"]
@@ -220,6 +222,8 @@ def elastic_metrics_payload(
},
},
}
if not include_external:
base_body["query"] = {"bool": {"must_not": [{"term": {"external_reference": True}}]}}
last_error: Optional[Exception] = None
response: Optional[Dict[str, Any]] = None
@@ -614,9 +618,9 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
normalized.append(text)
return normalized
def build_graph_payload(
root_id: str, depth: int, max_nodes: int
) -> Dict[str, Any]:
def build_graph_payload(
root_id: str, depth: int, max_nodes: int, *, include_external: bool = True
) -> Dict[str, Any]:
root_id = root_id.strip()
if not root_id:
return {"nodes": [], "links": [], "root": root_id, "depth": depth, "meta": {}}
@@ -632,6 +636,14 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
except Exception as exc: # pragma: no cover - elasticsearch handles errors
LOGGER.debug("Graph: failed to load %s: %s", video_id, exc)
doc_cache[video_id] = None
doc = doc_cache[video_id]
if (
doc is not None
and not include_external
and doc.get("external_reference")
):
doc_cache[video_id] = None
return None
return doc_cache[video_id]
nodes: Dict[str, Dict[str, Any]] = {}
@@ -662,6 +674,7 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
"url": doc.get("url"),
"date": doc.get("date"),
"is_root": current_id == root_id,
"external_reference": bool(doc.get("external_reference")),
}
if level >= depth:
@@ -707,25 +720,28 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
continue
doc = fetch_document(node_id)
if doc is None:
nodes[node_id] = {
"id": node_id,
"title": node_id,
"channel_id": None,
"channel_name": "Unknown",
"url": None,
"date": None,
"is_root": node_id == root_id,
}
else:
nodes[node_id] = {
"id": node_id,
"title": doc.get("title") or node_id,
"channel_id": doc.get("channel_id"),
"channel_name": doc.get("channel_name") or doc.get("channel_id") or "Unknown",
"url": doc.get("url"),
"date": doc.get("date"),
"is_root": node_id == root_id,
}
if include_external:
nodes[node_id] = {
"id": node_id,
"title": node_id,
"channel_id": None,
"channel_name": "Unknown",
"url": None,
"date": None,
"is_root": node_id == root_id,
"external_reference": False,
}
continue
nodes[node_id] = {
"id": node_id,
"title": doc.get("title") or node_id,
"channel_id": doc.get("channel_id"),
"channel_name": doc.get("channel_name") or doc.get("channel_id") or "Unknown",
"url": doc.get("url"),
"date": doc.get("date"),
"is_root": node_id == root_id,
"external_reference": bool(doc.get("external_reference")),
}
links = [
link
@@ -744,9 +760,12 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
},
}
def build_full_graph_payload(
max_nodes: Optional[int], *, highlight_id: Optional[str] = None
) -> Dict[str, Any]:
def build_full_graph_payload(
max_nodes: Optional[int],
*,
highlight_id: Optional[str] = None,
include_external: bool = True,
) -> Dict[str, Any]:
"""
Attempt to render the entire reference graph by gathering every video that
references another (or is referenced).
@@ -785,6 +804,9 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
return False
if node_id in nodes:
if doc:
if not include_external and doc.get("external_reference"):
nodes.pop(node_id, None)
return False
existing = nodes[node_id]
if (not existing.get("title") or existing["title"] == node_id) and doc.get("title"):
existing["title"] = doc["title"]
@@ -808,6 +830,8 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
date_val = None
title = node_id
if doc:
if not include_external and doc.get("external_reference"):
return False
title = doc.get("title") or title
channel_id = doc.get("channel_id")
channel_name = doc.get("channel_name") or channel_id
@@ -821,6 +845,7 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
"url": url,
"date": date_val,
"is_root": False,
"external_reference": bool(doc.get("external_reference")) if doc else False,
}
return True
@@ -845,6 +870,9 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
video_id = source.get("video_id")
if not video_id:
continue
if not include_external and source.get("external_reference"):
nodes.pop(video_id, None)
continue
if not ensure_node(video_id, source):
continue
for target in normalize_reference_list(source.get("internal_references")):
@@ -885,6 +913,18 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
if highlight_id and highlight_id in nodes:
nodes[highlight_id]["is_root"] = True
links = [
link
for link in links
if link.get("source") in nodes and link.get("target") in nodes
]
links = [
link
for link in links
if link.get("source") in nodes and link.get("target") in nodes
]
return {
"root": highlight_id or "",
"depth": 0,
@@ -990,6 +1030,8 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
full_graph = full_graph.lower() in {"1", "true", "yes"}
if not full_graph and not video_id:
return jsonify({"error": "video_id is required"}), 400
include_external = request.args.get("external", default="1", type=str)
include_external = include_external.lower() not in {"0", "false", "no"}
try:
depth = int(request.args.get("depth", "1"))
@@ -1007,9 +1049,15 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
max_nodes = max(10, min(max_nodes, 400))
if full_graph:
payload = build_full_graph_payload(None, highlight_id=video_id or None)
payload = build_full_graph_payload(
None,
highlight_id=video_id or None,
include_external=include_external,
)
else:
payload = build_graph_payload(video_id, depth, max_nodes)
payload = build_graph_payload(
video_id, depth, max_nodes, include_external=include_external
)
if not payload["nodes"]:
return (
jsonify({"error": f"Video '{video_id}' was not found in the index."}),
@@ -1196,19 +1244,22 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
@app.route("/api/metrics")
def metrics():
include_external = request.args.get("external", default="1", type=str)
include_external = include_external.lower() not in {"0", "false", "no"}
try:
data = elastic_metrics_payload(
client,
index,
channel_field_candidates=["channel_id.keyword", "channel_id"],
debug=config.elastic.debug,
include_external=include_external,
)
except Exception:
LOGGER.exception(
"Falling back to local metrics payload due to Elasticsearch error.",
exc_info=True,
)
data = metrics_payload(config.data.root)
data = metrics_payload(config.data.root, include_external=include_external)
return jsonify(data)
@app.route("/api/frequency")