Add graph traversal endpoints and sort metrics by channel name
This commit is contained in:
167
search_app.py
167
search_app.py
@@ -201,6 +201,15 @@ def elastic_metrics_payload(
|
|||||||
"top_hits": {
|
"top_hits": {
|
||||||
"size": 1,
|
"size": 1,
|
||||||
"_source": {"includes": ["channel_name"]},
|
"_source": {"includes": ["channel_name"]},
|
||||||
|
"sort": [
|
||||||
|
{
|
||||||
|
"channel_name.keyword": {
|
||||||
|
"order": "asc",
|
||||||
|
"missing": "_last",
|
||||||
|
"unmapped_type": "keyword",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -568,58 +577,14 @@ def build_query_payload(
|
|||||||
return body
|
return body
|
||||||
|
|
||||||
|
|
||||||
def create_app(config: AppConfig = CONFIG) -> Flask:
|
|
||||||
app = Flask(__name__, static_folder=str(Path(__file__).parent / "static"))
|
|
||||||
client = _ensure_client(config)
|
|
||||||
index = config.elastic.index
|
|
||||||
qdrant_url = config.qdrant_url
|
|
||||||
qdrant_collection = config.qdrant_collection
|
|
||||||
qdrant_vector_name = config.qdrant_vector_name
|
|
||||||
qdrant_vector_size = config.qdrant_vector_size
|
|
||||||
qdrant_embed_model = config.qdrant_embed_model
|
|
||||||
|
|
||||||
@app.route("/")
|
|
||||||
def index_page():
|
|
||||||
return send_from_directory(app.static_folder, "index.html")
|
|
||||||
|
|
||||||
@app.route("/graph")
|
|
||||||
def graph_page():
|
|
||||||
return send_from_directory(app.static_folder, "graph.html")
|
|
||||||
|
|
||||||
@app.route("/vector-search")
|
|
||||||
def vector_search_page():
|
|
||||||
return send_from_directory(app.static_folder, "vector.html")
|
|
||||||
|
|
||||||
@app.route("/static/<path:filename>")
|
|
||||||
def static_files(filename: str):
|
|
||||||
return send_from_directory(app.static_folder, filename)
|
|
||||||
|
|
||||||
def normalize_reference_list(values: Any) -> List[str]:
|
|
||||||
if values is None:
|
|
||||||
return []
|
|
||||||
if isinstance(values, (list, tuple, set)):
|
|
||||||
iterable = values
|
|
||||||
else:
|
|
||||||
iterable = [values]
|
|
||||||
normalized: List[str] = []
|
|
||||||
for item in iterable:
|
|
||||||
candidate: Optional[str]
|
|
||||||
if isinstance(item, dict):
|
|
||||||
candidate = item.get("video_id") or item.get("id") # type: ignore[assignment]
|
|
||||||
else:
|
|
||||||
candidate = item # type: ignore[assignment]
|
|
||||||
if candidate is None:
|
|
||||||
continue
|
|
||||||
text = str(candidate).strip()
|
|
||||||
if not text:
|
|
||||||
continue
|
|
||||||
if text.lower() in {"none", "null"}:
|
|
||||||
continue
|
|
||||||
normalized.append(text)
|
|
||||||
return normalized
|
|
||||||
|
|
||||||
def build_graph_payload(
|
def build_graph_payload(
|
||||||
root_id: str, depth: int, max_nodes: int, *, include_external: bool = True
|
client: "Elasticsearch",
|
||||||
|
index: str,
|
||||||
|
root_id: str,
|
||||||
|
depth: int,
|
||||||
|
max_nodes: int,
|
||||||
|
*,
|
||||||
|
include_external: bool = True,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
root_id = root_id.strip()
|
root_id = root_id.strip()
|
||||||
if not root_id:
|
if not root_id:
|
||||||
@@ -637,11 +602,7 @@ def build_graph_payload(
|
|||||||
LOGGER.debug("Graph: failed to load %s: %s", video_id, exc)
|
LOGGER.debug("Graph: failed to load %s: %s", video_id, exc)
|
||||||
doc_cache[video_id] = None
|
doc_cache[video_id] = None
|
||||||
doc = doc_cache[video_id]
|
doc = doc_cache[video_id]
|
||||||
if (
|
if doc is not None and not include_external and doc.get("external_reference"):
|
||||||
doc is not None
|
|
||||||
and not include_external
|
|
||||||
and doc.get("external_reference")
|
|
||||||
):
|
|
||||||
doc_cache[video_id] = None
|
doc_cache[video_id] = None
|
||||||
return None
|
return None
|
||||||
return doc_cache[video_id]
|
return doc_cache[video_id]
|
||||||
@@ -760,7 +721,10 @@ def build_graph_payload(
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def build_full_graph_payload(
|
def build_full_graph_payload(
|
||||||
|
client: "Elasticsearch",
|
||||||
|
index: str,
|
||||||
max_nodes: Optional[int],
|
max_nodes: Optional[int],
|
||||||
*,
|
*,
|
||||||
highlight_id: Optional[str] = None,
|
highlight_id: Optional[str] = None,
|
||||||
@@ -811,7 +775,7 @@ def build_full_graph_payload(
|
|||||||
if (not existing.get("title") or existing["title"] == node_id) and doc.get("title"):
|
if (not existing.get("title") or existing["title"] == node_id) and doc.get("title"):
|
||||||
existing["title"] = doc["title"]
|
existing["title"] = doc["title"]
|
||||||
if not existing.get("channel_id") and doc.get("channel_id"):
|
if not existing.get("channel_id") and doc.get("channel_id"):
|
||||||
existing["channel_id"] = doc["channel_id"]
|
existing["channel_id"] = doc.get("channel_id")
|
||||||
if (
|
if (
|
||||||
existing.get("channel_name") in {"Unknown", node_id, None}
|
existing.get("channel_name") in {"Unknown", node_id, None}
|
||||||
and (doc.get("channel_name") or doc.get("channel_id"))
|
and (doc.get("channel_name") or doc.get("channel_id"))
|
||||||
@@ -852,9 +816,7 @@ def build_full_graph_payload(
|
|||||||
scroll_id: Optional[str] = None
|
scroll_id: Optional[str] = None
|
||||||
try:
|
try:
|
||||||
body = {"query": query, "_source": source_fields, "sort": ["_doc"]}
|
body = {"query": query, "_source": source_fields, "sort": ["_doc"]}
|
||||||
response = client.search(
|
response = client.search(index=index, body=body, size=batch_size, scroll="1m")
|
||||||
index=index, body=body, size=batch_size, scroll="1m"
|
|
||||||
)
|
|
||||||
scroll_id = response.get("_scroll_id")
|
scroll_id = response.get("_scroll_id")
|
||||||
stop_fetch = False
|
stop_fetch = False
|
||||||
while not stop_fetch:
|
while not stop_fetch:
|
||||||
@@ -919,12 +881,6 @@ def build_full_graph_payload(
|
|||||||
if link.get("source") in nodes and link.get("target") in nodes
|
if link.get("source") in nodes and link.get("target") in nodes
|
||||||
]
|
]
|
||||||
|
|
||||||
links = [
|
|
||||||
link
|
|
||||||
for link in links
|
|
||||||
if link.get("source") in nodes and link.get("target") in nodes
|
|
||||||
]
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"root": highlight_id or "",
|
"root": highlight_id or "",
|
||||||
"depth": 0,
|
"depth": 0,
|
||||||
@@ -938,6 +894,58 @@ def build_full_graph_payload(
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def create_app(config: AppConfig = CONFIG) -> Flask:
|
||||||
|
app = Flask(__name__, static_folder=str(Path(__file__).parent / "static"))
|
||||||
|
client = _ensure_client(config)
|
||||||
|
index = config.elastic.index
|
||||||
|
qdrant_url = config.qdrant_url
|
||||||
|
qdrant_collection = config.qdrant_collection
|
||||||
|
qdrant_vector_name = config.qdrant_vector_name
|
||||||
|
qdrant_vector_size = config.qdrant_vector_size
|
||||||
|
qdrant_embed_model = config.qdrant_embed_model
|
||||||
|
|
||||||
|
@app.route("/")
|
||||||
|
def index_page():
|
||||||
|
return send_from_directory(app.static_folder, "index.html")
|
||||||
|
|
||||||
|
@app.route("/graph")
|
||||||
|
def graph_page():
|
||||||
|
return send_from_directory(app.static_folder, "graph.html")
|
||||||
|
|
||||||
|
@app.route("/vector-search")
|
||||||
|
def vector_search_page():
|
||||||
|
return send_from_directory(app.static_folder, "vector.html")
|
||||||
|
|
||||||
|
@app.route("/static/<path:filename>")
|
||||||
|
def static_files(filename: str):
|
||||||
|
return send_from_directory(app.static_folder, filename)
|
||||||
|
|
||||||
|
def normalize_reference_list(values: Any) -> List[str]:
|
||||||
|
if values is None:
|
||||||
|
return []
|
||||||
|
if isinstance(values, (list, tuple, set)):
|
||||||
|
iterable = values
|
||||||
|
else:
|
||||||
|
iterable = [values]
|
||||||
|
normalized: List[str] = []
|
||||||
|
for item in iterable:
|
||||||
|
candidate: Optional[str]
|
||||||
|
if isinstance(item, dict):
|
||||||
|
candidate = item.get("video_id") or item.get("id") # type: ignore[assignment]
|
||||||
|
else:
|
||||||
|
candidate = item # type: ignore[assignment]
|
||||||
|
if candidate is None:
|
||||||
|
continue
|
||||||
|
text = str(candidate).strip()
|
||||||
|
if not text:
|
||||||
|
continue
|
||||||
|
if text.lower() in {"none", "null"}:
|
||||||
|
continue
|
||||||
|
normalized.append(text)
|
||||||
|
return normalized
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/channels")
|
@app.route("/api/channels")
|
||||||
def channels():
|
def channels():
|
||||||
include_external = request.args.get("external", default="0", type=str)
|
include_external = request.args.get("external", default="0", type=str)
|
||||||
@@ -952,6 +960,15 @@ def build_full_graph_payload(
|
|||||||
"top_hits": {
|
"top_hits": {
|
||||||
"size": 1,
|
"size": 1,
|
||||||
"_source": {"includes": ["channel_name"]},
|
"_source": {"includes": ["channel_name"]},
|
||||||
|
"sort": [
|
||||||
|
{
|
||||||
|
"channel_name.keyword": {
|
||||||
|
"order": "asc",
|
||||||
|
"missing": "_last",
|
||||||
|
"unmapped_type": "keyword",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -1050,13 +1067,20 @@ def build_full_graph_payload(
|
|||||||
|
|
||||||
if full_graph:
|
if full_graph:
|
||||||
payload = build_full_graph_payload(
|
payload = build_full_graph_payload(
|
||||||
|
client,
|
||||||
|
index,
|
||||||
None,
|
None,
|
||||||
highlight_id=video_id or None,
|
highlight_id=video_id or None,
|
||||||
include_external=include_external,
|
include_external=include_external,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
payload = build_graph_payload(
|
payload = build_graph_payload(
|
||||||
video_id, depth, max_nodes, include_external=include_external
|
client,
|
||||||
|
index,
|
||||||
|
video_id,
|
||||||
|
depth,
|
||||||
|
max_nodes,
|
||||||
|
include_external=include_external,
|
||||||
)
|
)
|
||||||
if not payload["nodes"]:
|
if not payload["nodes"]:
|
||||||
return (
|
return (
|
||||||
@@ -1367,6 +1391,15 @@ def build_full_graph_payload(
|
|||||||
"top_hits": {
|
"top_hits": {
|
||||||
"size": 1,
|
"size": 1,
|
||||||
"_source": {"includes": ["channel_name"]},
|
"_source": {"includes": ["channel_name"]},
|
||||||
|
"sort": [
|
||||||
|
{
|
||||||
|
"channel_name.keyword": {
|
||||||
|
"order": "asc",
|
||||||
|
"missing": "_last",
|
||||||
|
"unmapped_type": "keyword",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user