Add graph traversal endpoints and sort metrics by channel name
This commit is contained in:
parent
9dd74111e7
commit
d26edda029
167
search_app.py
167
search_app.py
@ -201,6 +201,15 @@ def elastic_metrics_payload(
|
||||
"top_hits": {
|
||||
"size": 1,
|
||||
"_source": {"includes": ["channel_name"]},
|
||||
"sort": [
|
||||
{
|
||||
"channel_name.keyword": {
|
||||
"order": "asc",
|
||||
"missing": "_last",
|
||||
"unmapped_type": "keyword",
|
||||
}
|
||||
}
|
||||
],
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -568,58 +577,14 @@ def build_query_payload(
|
||||
return body
|
||||
|
||||
|
||||
def create_app(config: AppConfig = CONFIG) -> Flask:
|
||||
app = Flask(__name__, static_folder=str(Path(__file__).parent / "static"))
|
||||
client = _ensure_client(config)
|
||||
index = config.elastic.index
|
||||
qdrant_url = config.qdrant_url
|
||||
qdrant_collection = config.qdrant_collection
|
||||
qdrant_vector_name = config.qdrant_vector_name
|
||||
qdrant_vector_size = config.qdrant_vector_size
|
||||
qdrant_embed_model = config.qdrant_embed_model
|
||||
|
||||
@app.route("/")
|
||||
def index_page():
|
||||
return send_from_directory(app.static_folder, "index.html")
|
||||
|
||||
@app.route("/graph")
|
||||
def graph_page():
|
||||
return send_from_directory(app.static_folder, "graph.html")
|
||||
|
||||
@app.route("/vector-search")
|
||||
def vector_search_page():
|
||||
return send_from_directory(app.static_folder, "vector.html")
|
||||
|
||||
@app.route("/static/<path:filename>")
|
||||
def static_files(filename: str):
|
||||
return send_from_directory(app.static_folder, filename)
|
||||
|
||||
def normalize_reference_list(values: Any) -> List[str]:
|
||||
if values is None:
|
||||
return []
|
||||
if isinstance(values, (list, tuple, set)):
|
||||
iterable = values
|
||||
else:
|
||||
iterable = [values]
|
||||
normalized: List[str] = []
|
||||
for item in iterable:
|
||||
candidate: Optional[str]
|
||||
if isinstance(item, dict):
|
||||
candidate = item.get("video_id") or item.get("id") # type: ignore[assignment]
|
||||
else:
|
||||
candidate = item # type: ignore[assignment]
|
||||
if candidate is None:
|
||||
continue
|
||||
text = str(candidate).strip()
|
||||
if not text:
|
||||
continue
|
||||
if text.lower() in {"none", "null"}:
|
||||
continue
|
||||
normalized.append(text)
|
||||
return normalized
|
||||
|
||||
def build_graph_payload(
|
||||
root_id: str, depth: int, max_nodes: int, *, include_external: bool = True
|
||||
client: "Elasticsearch",
|
||||
index: str,
|
||||
root_id: str,
|
||||
depth: int,
|
||||
max_nodes: int,
|
||||
*,
|
||||
include_external: bool = True,
|
||||
) -> Dict[str, Any]:
|
||||
root_id = root_id.strip()
|
||||
if not root_id:
|
||||
@ -637,11 +602,7 @@ def build_graph_payload(
|
||||
LOGGER.debug("Graph: failed to load %s: %s", video_id, exc)
|
||||
doc_cache[video_id] = None
|
||||
doc = doc_cache[video_id]
|
||||
if (
|
||||
doc is not None
|
||||
and not include_external
|
||||
and doc.get("external_reference")
|
||||
):
|
||||
if doc is not None and not include_external and doc.get("external_reference"):
|
||||
doc_cache[video_id] = None
|
||||
return None
|
||||
return doc_cache[video_id]
|
||||
@ -760,7 +721,10 @@ def build_graph_payload(
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def build_full_graph_payload(
|
||||
client: "Elasticsearch",
|
||||
index: str,
|
||||
max_nodes: Optional[int],
|
||||
*,
|
||||
highlight_id: Optional[str] = None,
|
||||
@ -811,7 +775,7 @@ def build_full_graph_payload(
|
||||
if (not existing.get("title") or existing["title"] == node_id) and doc.get("title"):
|
||||
existing["title"] = doc["title"]
|
||||
if not existing.get("channel_id") and doc.get("channel_id"):
|
||||
existing["channel_id"] = doc["channel_id"]
|
||||
existing["channel_id"] = doc.get("channel_id")
|
||||
if (
|
||||
existing.get("channel_name") in {"Unknown", node_id, None}
|
||||
and (doc.get("channel_name") or doc.get("channel_id"))
|
||||
@ -852,9 +816,7 @@ def build_full_graph_payload(
|
||||
scroll_id: Optional[str] = None
|
||||
try:
|
||||
body = {"query": query, "_source": source_fields, "sort": ["_doc"]}
|
||||
response = client.search(
|
||||
index=index, body=body, size=batch_size, scroll="1m"
|
||||
)
|
||||
response = client.search(index=index, body=body, size=batch_size, scroll="1m")
|
||||
scroll_id = response.get("_scroll_id")
|
||||
stop_fetch = False
|
||||
while not stop_fetch:
|
||||
@ -919,12 +881,6 @@ def build_full_graph_payload(
|
||||
if link.get("source") in nodes and link.get("target") in nodes
|
||||
]
|
||||
|
||||
links = [
|
||||
link
|
||||
for link in links
|
||||
if link.get("source") in nodes and link.get("target") in nodes
|
||||
]
|
||||
|
||||
return {
|
||||
"root": highlight_id or "",
|
||||
"depth": 0,
|
||||
@ -938,6 +894,58 @@ def build_full_graph_payload(
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def create_app(config: AppConfig = CONFIG) -> Flask:
|
||||
app = Flask(__name__, static_folder=str(Path(__file__).parent / "static"))
|
||||
client = _ensure_client(config)
|
||||
index = config.elastic.index
|
||||
qdrant_url = config.qdrant_url
|
||||
qdrant_collection = config.qdrant_collection
|
||||
qdrant_vector_name = config.qdrant_vector_name
|
||||
qdrant_vector_size = config.qdrant_vector_size
|
||||
qdrant_embed_model = config.qdrant_embed_model
|
||||
|
||||
@app.route("/")
|
||||
def index_page():
|
||||
return send_from_directory(app.static_folder, "index.html")
|
||||
|
||||
@app.route("/graph")
|
||||
def graph_page():
|
||||
return send_from_directory(app.static_folder, "graph.html")
|
||||
|
||||
@app.route("/vector-search")
|
||||
def vector_search_page():
|
||||
return send_from_directory(app.static_folder, "vector.html")
|
||||
|
||||
@app.route("/static/<path:filename>")
|
||||
def static_files(filename: str):
|
||||
return send_from_directory(app.static_folder, filename)
|
||||
|
||||
def normalize_reference_list(values: Any) -> List[str]:
|
||||
if values is None:
|
||||
return []
|
||||
if isinstance(values, (list, tuple, set)):
|
||||
iterable = values
|
||||
else:
|
||||
iterable = [values]
|
||||
normalized: List[str] = []
|
||||
for item in iterable:
|
||||
candidate: Optional[str]
|
||||
if isinstance(item, dict):
|
||||
candidate = item.get("video_id") or item.get("id") # type: ignore[assignment]
|
||||
else:
|
||||
candidate = item # type: ignore[assignment]
|
||||
if candidate is None:
|
||||
continue
|
||||
text = str(candidate).strip()
|
||||
if not text:
|
||||
continue
|
||||
if text.lower() in {"none", "null"}:
|
||||
continue
|
||||
normalized.append(text)
|
||||
return normalized
|
||||
|
||||
|
||||
@app.route("/api/channels")
|
||||
def channels():
|
||||
include_external = request.args.get("external", default="0", type=str)
|
||||
@ -952,6 +960,15 @@ def build_full_graph_payload(
|
||||
"top_hits": {
|
||||
"size": 1,
|
||||
"_source": {"includes": ["channel_name"]},
|
||||
"sort": [
|
||||
{
|
||||
"channel_name.keyword": {
|
||||
"order": "asc",
|
||||
"missing": "_last",
|
||||
"unmapped_type": "keyword",
|
||||
}
|
||||
}
|
||||
],
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -1050,13 +1067,20 @@ def build_full_graph_payload(
|
||||
|
||||
if full_graph:
|
||||
payload = build_full_graph_payload(
|
||||
client,
|
||||
index,
|
||||
None,
|
||||
highlight_id=video_id or None,
|
||||
include_external=include_external,
|
||||
)
|
||||
else:
|
||||
payload = build_graph_payload(
|
||||
video_id, depth, max_nodes, include_external=include_external
|
||||
client,
|
||||
index,
|
||||
video_id,
|
||||
depth,
|
||||
max_nodes,
|
||||
include_external=include_external,
|
||||
)
|
||||
if not payload["nodes"]:
|
||||
return (
|
||||
@ -1367,6 +1391,15 @@ def build_full_graph_payload(
|
||||
"top_hits": {
|
||||
"size": 1,
|
||||
"_source": {"includes": ["channel_name"]},
|
||||
"sort": [
|
||||
{
|
||||
"channel_name.keyword": {
|
||||
"order": "asc",
|
||||
"missing": "_last",
|
||||
"unmapped_type": "keyword",
|
||||
}
|
||||
}
|
||||
],
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user