From 93774c025f80c002ae26b8e33c755b45b3a82028 Mon Sep 17 00:00:00 2001 From: knight Date: Thu, 20 Nov 2025 09:54:41 -0500 Subject: [PATCH] Respect external filter in metrics and graph --- search_app.py | 111 +++++++++++++++++++++++++++++++++++------------- static/app.js | 23 ++++++++-- static/graph.js | 67 ++++++++++++++++++++++++++--- 3 files changed, 161 insertions(+), 40 deletions(-) diff --git a/search_app.py b/search_app.py index fb30dfa..1d0e1ec 100644 --- a/search_app.py +++ b/search_app.py @@ -94,7 +94,7 @@ def _ensure_client(config: AppConfig) -> "Elasticsearch": return Elasticsearch(config.elastic.url, **kwargs) -def metrics_payload(data_root: Path) -> Dict[str, Any]: +def metrics_payload(data_root: Path, include_external: bool = True) -> Dict[str, Any]: total_items = 0 channel_counter: Counter = Counter() channel_name_map: Dict[str, str] = {} @@ -117,7 +117,8 @@ def metrics_payload(data_root: Path) -> Dict[str, Any]: doc = json.load(handle) except Exception: continue - + if not include_external and doc.get("external_reference"): + continue total_items += 1 channel_id = doc.get("channel_id") @@ -180,6 +181,7 @@ def elastic_metrics_payload( *, channel_field_candidates: Optional[List[str]] = None, debug: bool = False, + include_external: bool = True, ) -> Dict[str, Any]: if channel_field_candidates is None: channel_field_candidates = ["channel_id.keyword", "channel_id"] @@ -220,6 +222,8 @@ def elastic_metrics_payload( }, }, } + if not include_external: + base_body["query"] = {"bool": {"must_not": [{"term": {"external_reference": True}}]}} last_error: Optional[Exception] = None response: Optional[Dict[str, Any]] = None @@ -614,9 +618,9 @@ def create_app(config: AppConfig = CONFIG) -> Flask: normalized.append(text) return normalized - def build_graph_payload( - root_id: str, depth: int, max_nodes: int - ) -> Dict[str, Any]: +def build_graph_payload( + root_id: str, depth: int, max_nodes: int, *, include_external: bool = True +) -> Dict[str, Any]: root_id = root_id.strip() if not root_id: return {"nodes": [], "links": [], "root": root_id, "depth": depth, "meta": {}} @@ -632,6 +636,14 @@ def create_app(config: AppConfig = CONFIG) -> Flask: except Exception as exc: # pragma: no cover - elasticsearch handles errors LOGGER.debug("Graph: failed to load %s: %s", video_id, exc) doc_cache[video_id] = None + doc = doc_cache[video_id] + if ( + doc is not None + and not include_external + and doc.get("external_reference") + ): + doc_cache[video_id] = None + return None return doc_cache[video_id] nodes: Dict[str, Dict[str, Any]] = {} @@ -662,6 +674,7 @@ def create_app(config: AppConfig = CONFIG) -> Flask: "url": doc.get("url"), "date": doc.get("date"), "is_root": current_id == root_id, + "external_reference": bool(doc.get("external_reference")), } if level >= depth: @@ -707,25 +720,28 @@ def create_app(config: AppConfig = CONFIG) -> Flask: continue doc = fetch_document(node_id) if doc is None: - nodes[node_id] = { - "id": node_id, - "title": node_id, - "channel_id": None, - "channel_name": "Unknown", - "url": None, - "date": None, - "is_root": node_id == root_id, - } - else: - nodes[node_id] = { - "id": node_id, - "title": doc.get("title") or node_id, - "channel_id": doc.get("channel_id"), - "channel_name": doc.get("channel_name") or doc.get("channel_id") or "Unknown", - "url": doc.get("url"), - "date": doc.get("date"), - "is_root": node_id == root_id, - } + if include_external: + nodes[node_id] = { + "id": node_id, + "title": node_id, + "channel_id": None, + "channel_name": "Unknown", + "url": None, + "date": None, + "is_root": node_id == root_id, + "external_reference": False, + } + continue + nodes[node_id] = { + "id": node_id, + "title": doc.get("title") or node_id, + "channel_id": doc.get("channel_id"), + "channel_name": doc.get("channel_name") or doc.get("channel_id") or "Unknown", + "url": doc.get("url"), + "date": doc.get("date"), + "is_root": node_id == root_id, + "external_reference": bool(doc.get("external_reference")), + } links = [ link @@ -744,9 +760,12 @@ def create_app(config: AppConfig = CONFIG) -> Flask: }, } - def build_full_graph_payload( - max_nodes: Optional[int], *, highlight_id: Optional[str] = None - ) -> Dict[str, Any]: +def build_full_graph_payload( + max_nodes: Optional[int], + *, + highlight_id: Optional[str] = None, + include_external: bool = True, +) -> Dict[str, Any]: """ Attempt to render the entire reference graph by gathering every video that references another (or is referenced). @@ -785,6 +804,9 @@ def create_app(config: AppConfig = CONFIG) -> Flask: return False if node_id in nodes: if doc: + if not include_external and doc.get("external_reference"): + nodes.pop(node_id, None) + return False existing = nodes[node_id] if (not existing.get("title") or existing["title"] == node_id) and doc.get("title"): existing["title"] = doc["title"] @@ -808,6 +830,8 @@ def create_app(config: AppConfig = CONFIG) -> Flask: date_val = None title = node_id if doc: + if not include_external and doc.get("external_reference"): + return False title = doc.get("title") or title channel_id = doc.get("channel_id") channel_name = doc.get("channel_name") or channel_id @@ -821,6 +845,7 @@ def create_app(config: AppConfig = CONFIG) -> Flask: "url": url, "date": date_val, "is_root": False, + "external_reference": bool(doc.get("external_reference")) if doc else False, } return True @@ -845,6 +870,9 @@ def create_app(config: AppConfig = CONFIG) -> Flask: video_id = source.get("video_id") if not video_id: continue + if not include_external and source.get("external_reference"): + nodes.pop(video_id, None) + continue if not ensure_node(video_id, source): continue for target in normalize_reference_list(source.get("internal_references")): @@ -885,6 +913,18 @@ def create_app(config: AppConfig = CONFIG) -> Flask: if highlight_id and highlight_id in nodes: nodes[highlight_id]["is_root"] = True + links = [ + link + for link in links + if link.get("source") in nodes and link.get("target") in nodes + ] + + links = [ + link + for link in links + if link.get("source") in nodes and link.get("target") in nodes + ] + return { "root": highlight_id or "", "depth": 0, @@ -990,6 +1030,8 @@ def create_app(config: AppConfig = CONFIG) -> Flask: full_graph = full_graph.lower() in {"1", "true", "yes"} if not full_graph and not video_id: return jsonify({"error": "video_id is required"}), 400 + include_external = request.args.get("external", default="1", type=str) + include_external = include_external.lower() not in {"0", "false", "no"} try: depth = int(request.args.get("depth", "1")) @@ -1007,9 +1049,15 @@ def create_app(config: AppConfig = CONFIG) -> Flask: max_nodes = max(10, min(max_nodes, 400)) if full_graph: - payload = build_full_graph_payload(None, highlight_id=video_id or None) + payload = build_full_graph_payload( + None, + highlight_id=video_id or None, + include_external=include_external, + ) else: - payload = build_graph_payload(video_id, depth, max_nodes) + payload = build_graph_payload( + video_id, depth, max_nodes, include_external=include_external + ) if not payload["nodes"]: return ( jsonify({"error": f"Video '{video_id}' was not found in the index."}), @@ -1196,19 +1244,22 @@ def create_app(config: AppConfig = CONFIG) -> Flask: @app.route("/api/metrics") def metrics(): + include_external = request.args.get("external", default="1", type=str) + include_external = include_external.lower() not in {"0", "false", "no"} try: data = elastic_metrics_payload( client, index, channel_field_candidates=["channel_id.keyword", "channel_id"], debug=config.elastic.debug, + include_external=include_external, ) except Exception: LOGGER.exception( "Falling back to local metrics payload due to Elasticsearch error.", exc_info=True, ) - data = metrics_payload(config.data.root) + data = metrics_payload(config.data.root, include_external=include_external) return jsonify(data) @app.route("/api/frequency") diff --git a/static/app.js b/static/app.js index a04c6d1..8e738cb 100644 --- a/static/app.js +++ b/static/app.js @@ -267,6 +267,10 @@ if (!graphOverlay || !graphUiAvailable()) { return; } + const includeExternal = externalToggle ? !!externalToggle.checked : false; + if (graphUiAvailable() && typeof window.GraphUI.setIncludeExternal === "function") { + window.GraphUI.setIncludeExternal(includeExternal); + } lastFocusBeforeModal = document.activeElement instanceof HTMLElement ? document.activeElement : null; graphOverlay.classList.add("active"); @@ -282,7 +286,10 @@ graphVideoField.value = videoId; } if (videoId) { - window.GraphUI.load(videoId, undefined, undefined, { updateInputs: true }); + window.GraphUI.load(videoId, undefined, undefined, { + updateInputs: true, + includeExternal, + }); } window.GraphUI.focusInput(); }); @@ -1020,7 +1027,7 @@ } } - function renderMetrics(data) { +function renderMetrics(data) { if (!metricsContent) return; metricsContent.innerHTML = ""; if (!data) return; @@ -1058,7 +1065,8 @@ async function loadMetrics() { metricsStatus.textContent = "Loading metrics…"; } try { - const res = await fetch("/api/metrics"); + const includeExternal = externalToggle ? !!externalToggle.checked : false; + const res = await fetch(`/api/metrics?external=${includeExternal ? "1" : "0"}`); const data = await res.json(); renderMetrics(data); metricsContainer.dataset.loaded = "1"; @@ -1325,6 +1333,11 @@ async function updateFrequencyChart(term, channels, year, queryMode, toggles = { } of ${payload.totalPages}`; (payload.items || []).forEach((item) => { + const isExternal = !!item.external_reference; + const hasTitle = typeof item.title === "string" && item.title.trim().length > 0; + if (isExternal && !hasTitle) { + return; + } const el = document.createElement("div"); el.className = "item"; const titleHtml = @@ -1685,6 +1698,10 @@ async function updateFrequencyChart(term, channels, year, queryMode, toggles = { settings.external = !!externalToggle.checked; persistSettings(); loadChannels().then(() => runSearch(0)); + loadMetrics(); + if (graphUiAvailable()) { + window.GraphUI.setIncludeExternal(settings.external); + } }); } if (queryToggle) { diff --git a/static/graph.js b/static/graph.js index daf1c24..5018a81 100644 --- a/static/graph.js +++ b/static/graph.js @@ -135,6 +135,8 @@ let currentMaxNodes = sanitizeMaxNodes(maxNodesInput.value); let currentSimulation = null; let currentFullGraph = false; + let currentIncludeExternal = true; + let previousMaxNodesValue = maxNodesInput ? maxNodesInput.value : "200"; let previousMaxNodesValue = maxNodesInput ? maxNodesInput.value : "200"; function setStatus(message, isError = false) { @@ -187,7 +189,13 @@ } } - async function fetchGraph(videoId, depth, maxNodes, fullGraphMode = false) { + async function fetchGraph( + videoId, + depth, + maxNodes, + fullGraphMode = false, + includeExternal = true + ) { const params = new URLSearchParams(); if (videoId) { params.set("video_id", videoId); @@ -199,6 +207,7 @@ params.set("depth", String(depth)); params.set("max_nodes", String(maxNodes)); } + params.set("external", includeExternal ? "1" : "0"); const response = await fetch(`/api/graph?${params.toString()}`); if (!response.ok) { const errorPayload = await response.json().catch(() => ({})); @@ -366,7 +375,10 @@ }) .on("contextmenu", (event, d) => { event.preventDefault(); - loadGraph(d.id, currentDepth, currentMaxNodes, { updateInputs: true }); + loadGraph(d.id, currentDepth, currentMaxNodes, { + updateInputs: true, + includeExternal: currentIncludeExternal, + }); }); nodeSelection @@ -449,11 +461,14 @@ videoId, depth, maxNodes, - { updateInputs = false, fullGraph } = {} + { updateInputs = false, fullGraph, includeExternal } = {} ) { const wantsFull = isFullGraphMode( typeof fullGraph === "boolean" ? fullGraph : undefined ); + const includeFlag = + typeof includeExternal === "boolean" ? includeExternal : currentIncludeExternal; + currentIncludeExternal = includeFlag; const sanitizedId = sanitizeId(videoId); if (!wantsFull && !sanitizedId) { setStatus("Please enter a video ID.", true); @@ -477,7 +492,8 @@ sanitizedId, safeDepth, safeMaxNodes, - wantsFull + wantsFull, + includeFlag ); if (!data.nodes || data.nodes.length === 0) { setStatus("No nodes returned for this video.", true); @@ -503,7 +519,8 @@ safeDepth, safeMaxNodes, getLabelSize(), - wantsFull + wantsFull, + includeFlag ); } catch (err) { console.error(err); @@ -520,6 +537,7 @@ await loadGraph(videoInput.value, depthInput.value, maxNodesInput.value, { updateInputs: true, fullGraph: isFullGraphMode(), + includeExternal: currentIncludeExternal, }); } @@ -631,7 +649,14 @@ } } - function updateUrlState(videoId, depth, maxNodes, labelSize, fullGraphMode) { + function updateUrlState( + videoId, + depth, + maxNodes, + labelSize, + fullGraphMode, + includeExternal + ) { if (isEmbedded) { return; } @@ -650,6 +675,11 @@ next.searchParams.delete("full_graph"); next.searchParams.set("max_nodes", String(maxNodes)); } + if (!includeExternal) { + next.searchParams.set("external", "0"); + } else { + next.searchParams.delete("external"); + } if (labelSize && labelSize !== "normal") { next.searchParams.set("label_size", labelSize); } else { @@ -671,6 +701,11 @@ const fullGraphParam = params.get("full_graph"); const viewFull = fullGraphParam && ["1", "true", "yes"].includes(fullGraphParam.toLowerCase()); + const externalParam = params.get("external"); + const includeExternal = + !externalParam || + !["0", "false", "no"].includes(externalParam.toLowerCase()); + currentIncludeExternal = includeExternal; if (videoId) { videoInput.value = videoId; } @@ -691,6 +726,7 @@ loadGraph(videoId, depth, maxNodes, { updateInputs: false, fullGraph: viewFull, + includeExternal, }); } @@ -713,7 +749,8 @@ currentDepth, currentMaxNodes, size, - currentFullGraph + currentFullGraph, + currentIncludeExternal ); }); initFromQuery(); @@ -736,9 +773,20 @@ typeof explicitFull === "boolean" ? explicitFull : isFullGraphMode(); + const explicitInclude = + typeof options.includeExternal === "boolean" + ? options.includeExternal + : undefined; + if (typeof explicitInclude === "boolean") { + currentIncludeExternal = explicitInclude; + } return loadGraph(videoId, targetDepth, targetMax, { updateInputs: options.updateInputs !== false, fullGraph: fullFlag, + includeExternal: + typeof explicitInclude === "boolean" + ? explicitInclude + : currentIncludeExternal, }); }, setLabelSize(size) { @@ -778,8 +826,13 @@ nodes: currentGraphData ? currentGraphData.nodes.slice() : [], links: currentGraphData ? currentGraphData.links.slice() : [], fullGraph: currentFullGraph, + includeExternal: currentIncludeExternal, }; }, + setIncludeExternal(value) { + if (typeof value !== "boolean") return; + currentIncludeExternal = value; + }, isEmbedded, }); GraphUI.ready = true;