Add graph traversal endpoints and sort metrics by channel name

This commit is contained in:
2026-01-08 14:22:01 -05:00
parent 9dd74111e7
commit d26edda029

View File

@@ -201,6 +201,15 @@ def elastic_metrics_payload(
"top_hits": { "top_hits": {
"size": 1, "size": 1,
"_source": {"includes": ["channel_name"]}, "_source": {"includes": ["channel_name"]},
"sort": [
{
"channel_name.keyword": {
"order": "asc",
"missing": "_last",
"unmapped_type": "keyword",
}
}
],
} }
} }
}, },
@@ -568,58 +577,14 @@ def build_query_payload(
return body return body
def create_app(config: AppConfig = CONFIG) -> Flask:
app = Flask(__name__, static_folder=str(Path(__file__).parent / "static"))
client = _ensure_client(config)
index = config.elastic.index
qdrant_url = config.qdrant_url
qdrant_collection = config.qdrant_collection
qdrant_vector_name = config.qdrant_vector_name
qdrant_vector_size = config.qdrant_vector_size
qdrant_embed_model = config.qdrant_embed_model
@app.route("/")
def index_page():
return send_from_directory(app.static_folder, "index.html")
@app.route("/graph")
def graph_page():
return send_from_directory(app.static_folder, "graph.html")
@app.route("/vector-search")
def vector_search_page():
return send_from_directory(app.static_folder, "vector.html")
@app.route("/static/<path:filename>")
def static_files(filename: str):
return send_from_directory(app.static_folder, filename)
def normalize_reference_list(values: Any) -> List[str]:
if values is None:
return []
if isinstance(values, (list, tuple, set)):
iterable = values
else:
iterable = [values]
normalized: List[str] = []
for item in iterable:
candidate: Optional[str]
if isinstance(item, dict):
candidate = item.get("video_id") or item.get("id") # type: ignore[assignment]
else:
candidate = item # type: ignore[assignment]
if candidate is None:
continue
text = str(candidate).strip()
if not text:
continue
if text.lower() in {"none", "null"}:
continue
normalized.append(text)
return normalized
def build_graph_payload( def build_graph_payload(
root_id: str, depth: int, max_nodes: int, *, include_external: bool = True client: "Elasticsearch",
index: str,
root_id: str,
depth: int,
max_nodes: int,
*,
include_external: bool = True,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
root_id = root_id.strip() root_id = root_id.strip()
if not root_id: if not root_id:
@@ -637,11 +602,7 @@ def build_graph_payload(
LOGGER.debug("Graph: failed to load %s: %s", video_id, exc) LOGGER.debug("Graph: failed to load %s: %s", video_id, exc)
doc_cache[video_id] = None doc_cache[video_id] = None
doc = doc_cache[video_id] doc = doc_cache[video_id]
if ( if doc is not None and not include_external and doc.get("external_reference"):
doc is not None
and not include_external
and doc.get("external_reference")
):
doc_cache[video_id] = None doc_cache[video_id] = None
return None return None
return doc_cache[video_id] return doc_cache[video_id]
@@ -760,7 +721,10 @@ def build_graph_payload(
}, },
} }
def build_full_graph_payload( def build_full_graph_payload(
client: "Elasticsearch",
index: str,
max_nodes: Optional[int], max_nodes: Optional[int],
*, *,
highlight_id: Optional[str] = None, highlight_id: Optional[str] = None,
@@ -811,7 +775,7 @@ def build_full_graph_payload(
if (not existing.get("title") or existing["title"] == node_id) and doc.get("title"): if (not existing.get("title") or existing["title"] == node_id) and doc.get("title"):
existing["title"] = doc["title"] existing["title"] = doc["title"]
if not existing.get("channel_id") and doc.get("channel_id"): if not existing.get("channel_id") and doc.get("channel_id"):
existing["channel_id"] = doc["channel_id"] existing["channel_id"] = doc.get("channel_id")
if ( if (
existing.get("channel_name") in {"Unknown", node_id, None} existing.get("channel_name") in {"Unknown", node_id, None}
and (doc.get("channel_name") or doc.get("channel_id")) and (doc.get("channel_name") or doc.get("channel_id"))
@@ -852,9 +816,7 @@ def build_full_graph_payload(
scroll_id: Optional[str] = None scroll_id: Optional[str] = None
try: try:
body = {"query": query, "_source": source_fields, "sort": ["_doc"]} body = {"query": query, "_source": source_fields, "sort": ["_doc"]}
response = client.search( response = client.search(index=index, body=body, size=batch_size, scroll="1m")
index=index, body=body, size=batch_size, scroll="1m"
)
scroll_id = response.get("_scroll_id") scroll_id = response.get("_scroll_id")
stop_fetch = False stop_fetch = False
while not stop_fetch: while not stop_fetch:
@@ -919,12 +881,6 @@ def build_full_graph_payload(
if link.get("source") in nodes and link.get("target") in nodes if link.get("source") in nodes and link.get("target") in nodes
] ]
links = [
link
for link in links
if link.get("source") in nodes and link.get("target") in nodes
]
return { return {
"root": highlight_id or "", "root": highlight_id or "",
"depth": 0, "depth": 0,
@@ -938,6 +894,58 @@ def build_full_graph_payload(
}, },
} }
def create_app(config: AppConfig = CONFIG) -> Flask:
app = Flask(__name__, static_folder=str(Path(__file__).parent / "static"))
client = _ensure_client(config)
index = config.elastic.index
qdrant_url = config.qdrant_url
qdrant_collection = config.qdrant_collection
qdrant_vector_name = config.qdrant_vector_name
qdrant_vector_size = config.qdrant_vector_size
qdrant_embed_model = config.qdrant_embed_model
@app.route("/")
def index_page():
return send_from_directory(app.static_folder, "index.html")
@app.route("/graph")
def graph_page():
return send_from_directory(app.static_folder, "graph.html")
@app.route("/vector-search")
def vector_search_page():
return send_from_directory(app.static_folder, "vector.html")
@app.route("/static/<path:filename>")
def static_files(filename: str):
return send_from_directory(app.static_folder, filename)
def normalize_reference_list(values: Any) -> List[str]:
if values is None:
return []
if isinstance(values, (list, tuple, set)):
iterable = values
else:
iterable = [values]
normalized: List[str] = []
for item in iterable:
candidate: Optional[str]
if isinstance(item, dict):
candidate = item.get("video_id") or item.get("id") # type: ignore[assignment]
else:
candidate = item # type: ignore[assignment]
if candidate is None:
continue
text = str(candidate).strip()
if not text:
continue
if text.lower() in {"none", "null"}:
continue
normalized.append(text)
return normalized
@app.route("/api/channels") @app.route("/api/channels")
def channels(): def channels():
include_external = request.args.get("external", default="0", type=str) include_external = request.args.get("external", default="0", type=str)
@@ -952,6 +960,15 @@ def build_full_graph_payload(
"top_hits": { "top_hits": {
"size": 1, "size": 1,
"_source": {"includes": ["channel_name"]}, "_source": {"includes": ["channel_name"]},
"sort": [
{
"channel_name.keyword": {
"order": "asc",
"missing": "_last",
"unmapped_type": "keyword",
}
}
],
} }
} }
}, },
@@ -1050,13 +1067,20 @@ def build_full_graph_payload(
if full_graph: if full_graph:
payload = build_full_graph_payload( payload = build_full_graph_payload(
client,
index,
None, None,
highlight_id=video_id or None, highlight_id=video_id or None,
include_external=include_external, include_external=include_external,
) )
else: else:
payload = build_graph_payload( payload = build_graph_payload(
video_id, depth, max_nodes, include_external=include_external client,
index,
video_id,
depth,
max_nodes,
include_external=include_external,
) )
if not payload["nodes"]: if not payload["nodes"]:
return ( return (
@@ -1367,6 +1391,15 @@ def build_full_graph_payload(
"top_hits": { "top_hits": {
"size": 1, "size": 1,
"_source": {"includes": ["channel_name"]}, "_source": {"includes": ["channel_name"]},
"sort": [
{
"channel_name.keyword": {
"order": "asc",
"missing": "_last",
"unmapped_type": "keyword",
}
}
],
} }
} }
}, },