Add graph traversal endpoints and sort metrics by channel name

This commit is contained in:
knight 2026-01-08 14:22:01 -05:00
parent 9dd74111e7
commit d26edda029

View File

@ -201,6 +201,15 @@ def elastic_metrics_payload(
"top_hits": {
"size": 1,
"_source": {"includes": ["channel_name"]},
"sort": [
{
"channel_name.keyword": {
"order": "asc",
"missing": "_last",
"unmapped_type": "keyword",
}
}
],
}
}
},
@ -568,58 +577,14 @@ def build_query_payload(
return body
def create_app(config: AppConfig = CONFIG) -> Flask:
app = Flask(__name__, static_folder=str(Path(__file__).parent / "static"))
client = _ensure_client(config)
index = config.elastic.index
qdrant_url = config.qdrant_url
qdrant_collection = config.qdrant_collection
qdrant_vector_name = config.qdrant_vector_name
qdrant_vector_size = config.qdrant_vector_size
qdrant_embed_model = config.qdrant_embed_model
@app.route("/")
def index_page():
return send_from_directory(app.static_folder, "index.html")
@app.route("/graph")
def graph_page():
return send_from_directory(app.static_folder, "graph.html")
@app.route("/vector-search")
def vector_search_page():
return send_from_directory(app.static_folder, "vector.html")
@app.route("/static/<path:filename>")
def static_files(filename: str):
return send_from_directory(app.static_folder, filename)
def normalize_reference_list(values: Any) -> List[str]:
if values is None:
return []
if isinstance(values, (list, tuple, set)):
iterable = values
else:
iterable = [values]
normalized: List[str] = []
for item in iterable:
candidate: Optional[str]
if isinstance(item, dict):
candidate = item.get("video_id") or item.get("id") # type: ignore[assignment]
else:
candidate = item # type: ignore[assignment]
if candidate is None:
continue
text = str(candidate).strip()
if not text:
continue
if text.lower() in {"none", "null"}:
continue
normalized.append(text)
return normalized
def build_graph_payload(
root_id: str, depth: int, max_nodes: int, *, include_external: bool = True
client: "Elasticsearch",
index: str,
root_id: str,
depth: int,
max_nodes: int,
*,
include_external: bool = True,
) -> Dict[str, Any]:
root_id = root_id.strip()
if not root_id:
@ -637,11 +602,7 @@ def build_graph_payload(
LOGGER.debug("Graph: failed to load %s: %s", video_id, exc)
doc_cache[video_id] = None
doc = doc_cache[video_id]
if (
doc is not None
and not include_external
and doc.get("external_reference")
):
if doc is not None and not include_external and doc.get("external_reference"):
doc_cache[video_id] = None
return None
return doc_cache[video_id]
@ -760,7 +721,10 @@ def build_graph_payload(
},
}
def build_full_graph_payload(
client: "Elasticsearch",
index: str,
max_nodes: Optional[int],
*,
highlight_id: Optional[str] = None,
@ -811,7 +775,7 @@ def build_full_graph_payload(
if (not existing.get("title") or existing["title"] == node_id) and doc.get("title"):
existing["title"] = doc["title"]
if not existing.get("channel_id") and doc.get("channel_id"):
existing["channel_id"] = doc["channel_id"]
existing["channel_id"] = doc.get("channel_id")
if (
existing.get("channel_name") in {"Unknown", node_id, None}
and (doc.get("channel_name") or doc.get("channel_id"))
@ -852,9 +816,7 @@ def build_full_graph_payload(
scroll_id: Optional[str] = None
try:
body = {"query": query, "_source": source_fields, "sort": ["_doc"]}
response = client.search(
index=index, body=body, size=batch_size, scroll="1m"
)
response = client.search(index=index, body=body, size=batch_size, scroll="1m")
scroll_id = response.get("_scroll_id")
stop_fetch = False
while not stop_fetch:
@ -919,12 +881,6 @@ def build_full_graph_payload(
if link.get("source") in nodes and link.get("target") in nodes
]
links = [
link
for link in links
if link.get("source") in nodes and link.get("target") in nodes
]
return {
"root": highlight_id or "",
"depth": 0,
@ -938,6 +894,58 @@ def build_full_graph_payload(
},
}
def create_app(config: AppConfig = CONFIG) -> Flask:
app = Flask(__name__, static_folder=str(Path(__file__).parent / "static"))
client = _ensure_client(config)
index = config.elastic.index
qdrant_url = config.qdrant_url
qdrant_collection = config.qdrant_collection
qdrant_vector_name = config.qdrant_vector_name
qdrant_vector_size = config.qdrant_vector_size
qdrant_embed_model = config.qdrant_embed_model
@app.route("/")
def index_page():
return send_from_directory(app.static_folder, "index.html")
@app.route("/graph")
def graph_page():
return send_from_directory(app.static_folder, "graph.html")
@app.route("/vector-search")
def vector_search_page():
return send_from_directory(app.static_folder, "vector.html")
@app.route("/static/<path:filename>")
def static_files(filename: str):
return send_from_directory(app.static_folder, filename)
def normalize_reference_list(values: Any) -> List[str]:
if values is None:
return []
if isinstance(values, (list, tuple, set)):
iterable = values
else:
iterable = [values]
normalized: List[str] = []
for item in iterable:
candidate: Optional[str]
if isinstance(item, dict):
candidate = item.get("video_id") or item.get("id") # type: ignore[assignment]
else:
candidate = item # type: ignore[assignment]
if candidate is None:
continue
text = str(candidate).strip()
if not text:
continue
if text.lower() in {"none", "null"}:
continue
normalized.append(text)
return normalized
@app.route("/api/channels")
def channels():
include_external = request.args.get("external", default="0", type=str)
@ -952,6 +960,15 @@ def build_full_graph_payload(
"top_hits": {
"size": 1,
"_source": {"includes": ["channel_name"]},
"sort": [
{
"channel_name.keyword": {
"order": "asc",
"missing": "_last",
"unmapped_type": "keyword",
}
}
],
}
}
},
@ -1050,13 +1067,20 @@ def build_full_graph_payload(
if full_graph:
payload = build_full_graph_payload(
client,
index,
None,
highlight_id=video_id or None,
include_external=include_external,
)
else:
payload = build_graph_payload(
video_id, depth, max_nodes, include_external=include_external
client,
index,
video_id,
depth,
max_nodes,
include_external=include_external,
)
if not payload["nodes"]:
return (
@ -1367,6 +1391,15 @@ def build_full_graph_payload(
"top_hits": {
"size": 1,
"_source": {"includes": ["channel_name"]},
"sort": [
{
"channel_name.keyword": {
"order": "asc",
"missing": "_last",
"unmapped_type": "keyword",
}
}
],
}
}
},