Respect external filter in metrics and graph
Some checks failed
docker-build / build (push) Has been cancelled

This commit is contained in:
knight 2025-11-20 09:54:41 -05:00
parent b0c9d319ef
commit 93774c025f
3 changed files with 161 additions and 40 deletions

View File

@ -94,7 +94,7 @@ def _ensure_client(config: AppConfig) -> "Elasticsearch":
return Elasticsearch(config.elastic.url, **kwargs) return Elasticsearch(config.elastic.url, **kwargs)
def metrics_payload(data_root: Path) -> Dict[str, Any]: def metrics_payload(data_root: Path, include_external: bool = True) -> Dict[str, Any]:
total_items = 0 total_items = 0
channel_counter: Counter = Counter() channel_counter: Counter = Counter()
channel_name_map: Dict[str, str] = {} channel_name_map: Dict[str, str] = {}
@ -117,7 +117,8 @@ def metrics_payload(data_root: Path) -> Dict[str, Any]:
doc = json.load(handle) doc = json.load(handle)
except Exception: except Exception:
continue continue
if not include_external and doc.get("external_reference"):
continue
total_items += 1 total_items += 1
channel_id = doc.get("channel_id") channel_id = doc.get("channel_id")
@ -180,6 +181,7 @@ def elastic_metrics_payload(
*, *,
channel_field_candidates: Optional[List[str]] = None, channel_field_candidates: Optional[List[str]] = None,
debug: bool = False, debug: bool = False,
include_external: bool = True,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
if channel_field_candidates is None: if channel_field_candidates is None:
channel_field_candidates = ["channel_id.keyword", "channel_id"] channel_field_candidates = ["channel_id.keyword", "channel_id"]
@ -220,6 +222,8 @@ def elastic_metrics_payload(
}, },
}, },
} }
if not include_external:
base_body["query"] = {"bool": {"must_not": [{"term": {"external_reference": True}}]}}
last_error: Optional[Exception] = None last_error: Optional[Exception] = None
response: Optional[Dict[str, Any]] = None response: Optional[Dict[str, Any]] = None
@ -614,9 +618,9 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
normalized.append(text) normalized.append(text)
return normalized return normalized
def build_graph_payload( def build_graph_payload(
root_id: str, depth: int, max_nodes: int root_id: str, depth: int, max_nodes: int, *, include_external: bool = True
) -> Dict[str, Any]: ) -> Dict[str, Any]:
root_id = root_id.strip() root_id = root_id.strip()
if not root_id: if not root_id:
return {"nodes": [], "links": [], "root": root_id, "depth": depth, "meta": {}} return {"nodes": [], "links": [], "root": root_id, "depth": depth, "meta": {}}
@ -632,6 +636,14 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
except Exception as exc: # pragma: no cover - elasticsearch handles errors except Exception as exc: # pragma: no cover - elasticsearch handles errors
LOGGER.debug("Graph: failed to load %s: %s", video_id, exc) LOGGER.debug("Graph: failed to load %s: %s", video_id, exc)
doc_cache[video_id] = None doc_cache[video_id] = None
doc = doc_cache[video_id]
if (
doc is not None
and not include_external
and doc.get("external_reference")
):
doc_cache[video_id] = None
return None
return doc_cache[video_id] return doc_cache[video_id]
nodes: Dict[str, Dict[str, Any]] = {} nodes: Dict[str, Dict[str, Any]] = {}
@ -662,6 +674,7 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
"url": doc.get("url"), "url": doc.get("url"),
"date": doc.get("date"), "date": doc.get("date"),
"is_root": current_id == root_id, "is_root": current_id == root_id,
"external_reference": bool(doc.get("external_reference")),
} }
if level >= depth: if level >= depth:
@ -707,6 +720,7 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
continue continue
doc = fetch_document(node_id) doc = fetch_document(node_id)
if doc is None: if doc is None:
if include_external:
nodes[node_id] = { nodes[node_id] = {
"id": node_id, "id": node_id,
"title": node_id, "title": node_id,
@ -715,8 +729,9 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
"url": None, "url": None,
"date": None, "date": None,
"is_root": node_id == root_id, "is_root": node_id == root_id,
"external_reference": False,
} }
else: continue
nodes[node_id] = { nodes[node_id] = {
"id": node_id, "id": node_id,
"title": doc.get("title") or node_id, "title": doc.get("title") or node_id,
@ -725,6 +740,7 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
"url": doc.get("url"), "url": doc.get("url"),
"date": doc.get("date"), "date": doc.get("date"),
"is_root": node_id == root_id, "is_root": node_id == root_id,
"external_reference": bool(doc.get("external_reference")),
} }
links = [ links = [
@ -744,9 +760,12 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
}, },
} }
def build_full_graph_payload( def build_full_graph_payload(
max_nodes: Optional[int], *, highlight_id: Optional[str] = None max_nodes: Optional[int],
) -> Dict[str, Any]: *,
highlight_id: Optional[str] = None,
include_external: bool = True,
) -> Dict[str, Any]:
""" """
Attempt to render the entire reference graph by gathering every video that Attempt to render the entire reference graph by gathering every video that
references another (or is referenced). references another (or is referenced).
@ -785,6 +804,9 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
return False return False
if node_id in nodes: if node_id in nodes:
if doc: if doc:
if not include_external and doc.get("external_reference"):
nodes.pop(node_id, None)
return False
existing = nodes[node_id] existing = nodes[node_id]
if (not existing.get("title") or existing["title"] == node_id) and doc.get("title"): if (not existing.get("title") or existing["title"] == node_id) and doc.get("title"):
existing["title"] = doc["title"] existing["title"] = doc["title"]
@ -808,6 +830,8 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
date_val = None date_val = None
title = node_id title = node_id
if doc: if doc:
if not include_external and doc.get("external_reference"):
return False
title = doc.get("title") or title title = doc.get("title") or title
channel_id = doc.get("channel_id") channel_id = doc.get("channel_id")
channel_name = doc.get("channel_name") or channel_id channel_name = doc.get("channel_name") or channel_id
@ -821,6 +845,7 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
"url": url, "url": url,
"date": date_val, "date": date_val,
"is_root": False, "is_root": False,
"external_reference": bool(doc.get("external_reference")) if doc else False,
} }
return True return True
@ -845,6 +870,9 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
video_id = source.get("video_id") video_id = source.get("video_id")
if not video_id: if not video_id:
continue continue
if not include_external and source.get("external_reference"):
nodes.pop(video_id, None)
continue
if not ensure_node(video_id, source): if not ensure_node(video_id, source):
continue continue
for target in normalize_reference_list(source.get("internal_references")): for target in normalize_reference_list(source.get("internal_references")):
@ -885,6 +913,18 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
if highlight_id and highlight_id in nodes: if highlight_id and highlight_id in nodes:
nodes[highlight_id]["is_root"] = True nodes[highlight_id]["is_root"] = True
links = [
link
for link in links
if link.get("source") in nodes and link.get("target") in nodes
]
links = [
link
for link in links
if link.get("source") in nodes and link.get("target") in nodes
]
return { return {
"root": highlight_id or "", "root": highlight_id or "",
"depth": 0, "depth": 0,
@ -990,6 +1030,8 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
full_graph = full_graph.lower() in {"1", "true", "yes"} full_graph = full_graph.lower() in {"1", "true", "yes"}
if not full_graph and not video_id: if not full_graph and not video_id:
return jsonify({"error": "video_id is required"}), 400 return jsonify({"error": "video_id is required"}), 400
include_external = request.args.get("external", default="1", type=str)
include_external = include_external.lower() not in {"0", "false", "no"}
try: try:
depth = int(request.args.get("depth", "1")) depth = int(request.args.get("depth", "1"))
@ -1007,9 +1049,15 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
max_nodes = max(10, min(max_nodes, 400)) max_nodes = max(10, min(max_nodes, 400))
if full_graph: if full_graph:
payload = build_full_graph_payload(None, highlight_id=video_id or None) payload = build_full_graph_payload(
None,
highlight_id=video_id or None,
include_external=include_external,
)
else: else:
payload = build_graph_payload(video_id, depth, max_nodes) payload = build_graph_payload(
video_id, depth, max_nodes, include_external=include_external
)
if not payload["nodes"]: if not payload["nodes"]:
return ( return (
jsonify({"error": f"Video '{video_id}' was not found in the index."}), jsonify({"error": f"Video '{video_id}' was not found in the index."}),
@ -1196,19 +1244,22 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
@app.route("/api/metrics") @app.route("/api/metrics")
def metrics(): def metrics():
include_external = request.args.get("external", default="1", type=str)
include_external = include_external.lower() not in {"0", "false", "no"}
try: try:
data = elastic_metrics_payload( data = elastic_metrics_payload(
client, client,
index, index,
channel_field_candidates=["channel_id.keyword", "channel_id"], channel_field_candidates=["channel_id.keyword", "channel_id"],
debug=config.elastic.debug, debug=config.elastic.debug,
include_external=include_external,
) )
except Exception: except Exception:
LOGGER.exception( LOGGER.exception(
"Falling back to local metrics payload due to Elasticsearch error.", "Falling back to local metrics payload due to Elasticsearch error.",
exc_info=True, exc_info=True,
) )
data = metrics_payload(config.data.root) data = metrics_payload(config.data.root, include_external=include_external)
return jsonify(data) return jsonify(data)
@app.route("/api/frequency") @app.route("/api/frequency")

View File

@ -267,6 +267,10 @@
if (!graphOverlay || !graphUiAvailable()) { if (!graphOverlay || !graphUiAvailable()) {
return; return;
} }
const includeExternal = externalToggle ? !!externalToggle.checked : false;
if (graphUiAvailable() && typeof window.GraphUI.setIncludeExternal === "function") {
window.GraphUI.setIncludeExternal(includeExternal);
}
lastFocusBeforeModal = lastFocusBeforeModal =
document.activeElement instanceof HTMLElement ? document.activeElement : null; document.activeElement instanceof HTMLElement ? document.activeElement : null;
graphOverlay.classList.add("active"); graphOverlay.classList.add("active");
@ -282,7 +286,10 @@
graphVideoField.value = videoId; graphVideoField.value = videoId;
} }
if (videoId) { if (videoId) {
window.GraphUI.load(videoId, undefined, undefined, { updateInputs: true }); window.GraphUI.load(videoId, undefined, undefined, {
updateInputs: true,
includeExternal,
});
} }
window.GraphUI.focusInput(); window.GraphUI.focusInput();
}); });
@ -1020,7 +1027,7 @@
} }
} }
function renderMetrics(data) { function renderMetrics(data) {
if (!metricsContent) return; if (!metricsContent) return;
metricsContent.innerHTML = ""; metricsContent.innerHTML = "";
if (!data) return; if (!data) return;
@ -1058,7 +1065,8 @@ async function loadMetrics() {
metricsStatus.textContent = "Loading metrics…"; metricsStatus.textContent = "Loading metrics…";
} }
try { try {
const res = await fetch("/api/metrics"); const includeExternal = externalToggle ? !!externalToggle.checked : false;
const res = await fetch(`/api/metrics?external=${includeExternal ? "1" : "0"}`);
const data = await res.json(); const data = await res.json();
renderMetrics(data); renderMetrics(data);
metricsContainer.dataset.loaded = "1"; metricsContainer.dataset.loaded = "1";
@ -1325,6 +1333,11 @@ async function updateFrequencyChart(term, channels, year, queryMode, toggles = {
} of ${payload.totalPages}`; } of ${payload.totalPages}`;
(payload.items || []).forEach((item) => { (payload.items || []).forEach((item) => {
const isExternal = !!item.external_reference;
const hasTitle = typeof item.title === "string" && item.title.trim().length > 0;
if (isExternal && !hasTitle) {
return;
}
const el = document.createElement("div"); const el = document.createElement("div");
el.className = "item"; el.className = "item";
const titleHtml = const titleHtml =
@ -1685,6 +1698,10 @@ async function updateFrequencyChart(term, channels, year, queryMode, toggles = {
settings.external = !!externalToggle.checked; settings.external = !!externalToggle.checked;
persistSettings(); persistSettings();
loadChannels().then(() => runSearch(0)); loadChannels().then(() => runSearch(0));
loadMetrics();
if (graphUiAvailable()) {
window.GraphUI.setIncludeExternal(settings.external);
}
}); });
} }
if (queryToggle) { if (queryToggle) {

View File

@ -135,6 +135,8 @@
let currentMaxNodes = sanitizeMaxNodes(maxNodesInput.value); let currentMaxNodes = sanitizeMaxNodes(maxNodesInput.value);
let currentSimulation = null; let currentSimulation = null;
let currentFullGraph = false; let currentFullGraph = false;
let currentIncludeExternal = true;
let previousMaxNodesValue = maxNodesInput ? maxNodesInput.value : "200";
let previousMaxNodesValue = maxNodesInput ? maxNodesInput.value : "200"; let previousMaxNodesValue = maxNodesInput ? maxNodesInput.value : "200";
function setStatus(message, isError = false) { function setStatus(message, isError = false) {
@ -187,7 +189,13 @@
} }
} }
async function fetchGraph(videoId, depth, maxNodes, fullGraphMode = false) { async function fetchGraph(
videoId,
depth,
maxNodes,
fullGraphMode = false,
includeExternal = true
) {
const params = new URLSearchParams(); const params = new URLSearchParams();
if (videoId) { if (videoId) {
params.set("video_id", videoId); params.set("video_id", videoId);
@ -199,6 +207,7 @@
params.set("depth", String(depth)); params.set("depth", String(depth));
params.set("max_nodes", String(maxNodes)); params.set("max_nodes", String(maxNodes));
} }
params.set("external", includeExternal ? "1" : "0");
const response = await fetch(`/api/graph?${params.toString()}`); const response = await fetch(`/api/graph?${params.toString()}`);
if (!response.ok) { if (!response.ok) {
const errorPayload = await response.json().catch(() => ({})); const errorPayload = await response.json().catch(() => ({}));
@ -366,7 +375,10 @@
}) })
.on("contextmenu", (event, d) => { .on("contextmenu", (event, d) => {
event.preventDefault(); event.preventDefault();
loadGraph(d.id, currentDepth, currentMaxNodes, { updateInputs: true }); loadGraph(d.id, currentDepth, currentMaxNodes, {
updateInputs: true,
includeExternal: currentIncludeExternal,
});
}); });
nodeSelection nodeSelection
@ -449,11 +461,14 @@
videoId, videoId,
depth, depth,
maxNodes, maxNodes,
{ updateInputs = false, fullGraph } = {} { updateInputs = false, fullGraph, includeExternal } = {}
) { ) {
const wantsFull = isFullGraphMode( const wantsFull = isFullGraphMode(
typeof fullGraph === "boolean" ? fullGraph : undefined typeof fullGraph === "boolean" ? fullGraph : undefined
); );
const includeFlag =
typeof includeExternal === "boolean" ? includeExternal : currentIncludeExternal;
currentIncludeExternal = includeFlag;
const sanitizedId = sanitizeId(videoId); const sanitizedId = sanitizeId(videoId);
if (!wantsFull && !sanitizedId) { if (!wantsFull && !sanitizedId) {
setStatus("Please enter a video ID.", true); setStatus("Please enter a video ID.", true);
@ -477,7 +492,8 @@
sanitizedId, sanitizedId,
safeDepth, safeDepth,
safeMaxNodes, safeMaxNodes,
wantsFull wantsFull,
includeFlag
); );
if (!data.nodes || data.nodes.length === 0) { if (!data.nodes || data.nodes.length === 0) {
setStatus("No nodes returned for this video.", true); setStatus("No nodes returned for this video.", true);
@ -503,7 +519,8 @@
safeDepth, safeDepth,
safeMaxNodes, safeMaxNodes,
getLabelSize(), getLabelSize(),
wantsFull wantsFull,
includeFlag
); );
} catch (err) { } catch (err) {
console.error(err); console.error(err);
@ -520,6 +537,7 @@
await loadGraph(videoInput.value, depthInput.value, maxNodesInput.value, { await loadGraph(videoInput.value, depthInput.value, maxNodesInput.value, {
updateInputs: true, updateInputs: true,
fullGraph: isFullGraphMode(), fullGraph: isFullGraphMode(),
includeExternal: currentIncludeExternal,
}); });
} }
@ -631,7 +649,14 @@
} }
} }
function updateUrlState(videoId, depth, maxNodes, labelSize, fullGraphMode) { function updateUrlState(
videoId,
depth,
maxNodes,
labelSize,
fullGraphMode,
includeExternal
) {
if (isEmbedded) { if (isEmbedded) {
return; return;
} }
@ -650,6 +675,11 @@
next.searchParams.delete("full_graph"); next.searchParams.delete("full_graph");
next.searchParams.set("max_nodes", String(maxNodes)); next.searchParams.set("max_nodes", String(maxNodes));
} }
if (!includeExternal) {
next.searchParams.set("external", "0");
} else {
next.searchParams.delete("external");
}
if (labelSize && labelSize !== "normal") { if (labelSize && labelSize !== "normal") {
next.searchParams.set("label_size", labelSize); next.searchParams.set("label_size", labelSize);
} else { } else {
@ -671,6 +701,11 @@
const fullGraphParam = params.get("full_graph"); const fullGraphParam = params.get("full_graph");
const viewFull = const viewFull =
fullGraphParam && ["1", "true", "yes"].includes(fullGraphParam.toLowerCase()); fullGraphParam && ["1", "true", "yes"].includes(fullGraphParam.toLowerCase());
const externalParam = params.get("external");
const includeExternal =
!externalParam ||
!["0", "false", "no"].includes(externalParam.toLowerCase());
currentIncludeExternal = includeExternal;
if (videoId) { if (videoId) {
videoInput.value = videoId; videoInput.value = videoId;
} }
@ -691,6 +726,7 @@
loadGraph(videoId, depth, maxNodes, { loadGraph(videoId, depth, maxNodes, {
updateInputs: false, updateInputs: false,
fullGraph: viewFull, fullGraph: viewFull,
includeExternal,
}); });
} }
@ -713,7 +749,8 @@
currentDepth, currentDepth,
currentMaxNodes, currentMaxNodes,
size, size,
currentFullGraph currentFullGraph,
currentIncludeExternal
); );
}); });
initFromQuery(); initFromQuery();
@ -736,9 +773,20 @@
typeof explicitFull === "boolean" typeof explicitFull === "boolean"
? explicitFull ? explicitFull
: isFullGraphMode(); : isFullGraphMode();
const explicitInclude =
typeof options.includeExternal === "boolean"
? options.includeExternal
: undefined;
if (typeof explicitInclude === "boolean") {
currentIncludeExternal = explicitInclude;
}
return loadGraph(videoId, targetDepth, targetMax, { return loadGraph(videoId, targetDepth, targetMax, {
updateInputs: options.updateInputs !== false, updateInputs: options.updateInputs !== false,
fullGraph: fullFlag, fullGraph: fullFlag,
includeExternal:
typeof explicitInclude === "boolean"
? explicitInclude
: currentIncludeExternal,
}); });
}, },
setLabelSize(size) { setLabelSize(size) {
@ -778,8 +826,13 @@
nodes: currentGraphData ? currentGraphData.nodes.slice() : [], nodes: currentGraphData ? currentGraphData.nodes.slice() : [],
links: currentGraphData ? currentGraphData.links.slice() : [], links: currentGraphData ? currentGraphData.links.slice() : [],
fullGraph: currentFullGraph, fullGraph: currentFullGraph,
includeExternal: currentIncludeExternal,
}; };
}, },
setIncludeExternal(value) {
if (typeof value !== "boolean") return;
currentIncludeExternal = value;
},
isEmbedded, isEmbedded,
}); });
GraphUI.ready = true; GraphUI.ready = true;