diff --git a/search_app.py b/search_app.py
index 1116dfc..eb056f0 100644
--- a/search_app.py
+++ b/search_app.py
@@ -744,6 +744,159 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
},
}
+ def build_full_graph_payload(
+ max_nodes: int, *, highlight_id: Optional[str] = None
+ ) -> Dict[str, Any]:
+ """
+ Attempt to render the entire reference graph by gathering every video that
+ references another (or is referenced).
+ """
+
+ query = {
+ "bool": {
+ "should": [
+ {"range": {"internal_references_count": {"gt": 0}}},
+ {"range": {"referenced_by_count": {"gt": 0}}},
+ {"exists": {"field": "internal_references"}},
+ {"exists": {"field": "referenced_by"}},
+ ],
+ "minimum_should_match": 1,
+ }
+ }
+ source_fields = [
+ "video_id",
+ "title",
+ "channel_id",
+ "channel_name",
+ "url",
+ "date",
+ "internal_references",
+ "referenced_by",
+ ]
+ nodes: Dict[str, Dict[str, Any]] = {}
+ links: List[Dict[str, Any]] = []
+ link_seen: Set[Tuple[str, str, str]] = set()
+ batch_size = min(500, max(50, max_nodes * 2))
+ truncated = False
+
+ def ensure_node(node_id: Optional[str], doc: Optional[Dict[str, Any]] = None) -> bool:
+ if not node_id:
+ return False
+ if node_id in nodes:
+ if doc:
+ existing = nodes[node_id]
+ if (not existing.get("title") or existing["title"] == node_id) and doc.get("title"):
+ existing["title"] = doc["title"]
+ if not existing.get("channel_id") and doc.get("channel_id"):
+ existing["channel_id"] = doc["channel_id"]
+ if (
+ existing.get("channel_name") in {"Unknown", node_id, None}
+ and (doc.get("channel_name") or doc.get("channel_id"))
+ ):
+ existing["channel_name"] = doc.get("channel_name") or doc.get("channel_id")
+ if not existing.get("url") and doc.get("url"):
+ existing["url"] = doc.get("url")
+ if not existing.get("date") and doc.get("date"):
+ existing["date"] = doc.get("date")
+ return True
+ if len(nodes) >= max_nodes:
+ return False
+ channel_name = None
+ channel_id = None
+ url = None
+ date_val = None
+ title = node_id
+ if doc:
+ title = doc.get("title") or title
+ channel_id = doc.get("channel_id")
+ channel_name = doc.get("channel_name") or channel_id
+ url = doc.get("url")
+ date_val = doc.get("date")
+ nodes[node_id] = {
+ "id": node_id,
+ "title": title,
+ "channel_id": channel_id,
+ "channel_name": channel_name or "Unknown",
+ "url": url,
+ "date": date_val,
+ "is_root": False,
+ }
+ return True
+
+ scroll_id: Optional[str] = None
+ try:
+ body = {"query": query, "_source": source_fields, "sort": ["_doc"]}
+ response = client.search(
+ index=index, body=body, size=batch_size, scroll="1m"
+ )
+ scroll_id = response.get("_scroll_id")
+ stop_fetch = False
+ while not stop_fetch:
+ hits = response.get("hits", {}).get("hits", [])
+ if not hits:
+ break
+ for hit in hits:
+ if len(nodes) >= max_nodes:
+ truncated = True
+ stop_fetch = True
+ break
+ source = hit.get("_source", {}) or {}
+ video_id = source.get("video_id")
+ if not video_id:
+ continue
+ if not ensure_node(video_id, source):
+ continue
+ for target in normalize_reference_list(source.get("internal_references")):
+ if target == video_id:
+ continue
+ if not ensure_node(target):
+ continue
+ key = (video_id, target, "references")
+ if key not in link_seen:
+ links.append(
+ {"source": video_id, "target": target, "relation": "references"}
+ )
+ link_seen.add(key)
+ for origin in normalize_reference_list(source.get("referenced_by")):
+ if origin == video_id:
+ continue
+ if not ensure_node(origin):
+ continue
+ key = (origin, video_id, "referenced_by")
+ if key not in link_seen:
+ links.append(
+ {"source": origin, "target": video_id, "relation": "referenced_by"}
+ )
+ link_seen.add(key)
+ if stop_fetch or not scroll_id:
+ break
+ response = client.scroll(scroll_id=scroll_id, scroll="1m")
+ scroll_id = response.get("_scroll_id")
+ if not scroll_id:
+ break
+ finally:
+ if scroll_id:
+ try:
+ client.clear_scroll(scroll_id=scroll_id)
+ except Exception:
+ pass
+
+ if highlight_id and highlight_id in nodes:
+ nodes[highlight_id]["is_root"] = True
+
+ return {
+ "root": highlight_id or "",
+ "depth": 0,
+ "nodes": list(nodes.values()),
+ "links": links,
+ "meta": {
+ "node_count": len(nodes),
+ "link_count": len(links),
+ "mode": "full",
+ "truncated": truncated,
+ },
+ }
+
@app.route("/api/channels")
def channels():
include_external = request.args.get("external", default="0", type=str)
@@ -832,7 +985,9 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
@app.route("/api/graph")
def graph_api():
video_id = (request.args.get("video_id") or "").strip()
- if not video_id:
+ full_graph = request.args.get("full_graph", default="0", type=str)
+ full_graph = full_graph.lower() in {"1", "true", "yes"}
+ if not full_graph and not video_id:
return jsonify({"error": "video_id is required"}), 400
try:
@@ -845,14 +1000,17 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
max_nodes = int(request.args.get("max_nodes", "200"))
except ValueError:
max_nodes = 200
- max_nodes = max(10, min(max_nodes, 400))
+ max_nodes = max(10, min(max_nodes, 800 if full_graph else 400))
- payload = build_graph_payload(video_id, depth, max_nodes)
- if not payload["nodes"]:
- return (
- jsonify({"error": f"Video '{video_id}' was not found in the index."}),
- 404,
- )
+ if full_graph:
+ payload = build_full_graph_payload(max_nodes, highlight_id=video_id or None)
+ else:
+ payload = build_graph_payload(video_id, depth, max_nodes)
+ if not payload["nodes"]:
+ return (
+ jsonify({"error": f"Video '{video_id}' was not found in the index."}),
+ 404,
+ )
payload["meta"]["max_nodes"] = max_nodes
return jsonify(payload)
diff --git a/static/graph.html b/static/graph.html
index 597b7dd..f8b03be 100644
--- a/static/graph.html
+++ b/static/graph.html
@@ -54,6 +54,17 @@
+
+
+
+ Includes every video that references another (ignores depth; may be slow). Max nodes still
+ applies.
+
+
+