Respect external filter in metrics and graph
Some checks failed
docker-build / build (push) Has been cancelled

This commit is contained in:
knight 2025-11-20 09:54:41 -05:00
parent b0c9d319ef
commit 93774c025f
3 changed files with 161 additions and 40 deletions

View File

@ -94,7 +94,7 @@ def _ensure_client(config: AppConfig) -> "Elasticsearch":
return Elasticsearch(config.elastic.url, **kwargs) return Elasticsearch(config.elastic.url, **kwargs)
def metrics_payload(data_root: Path) -> Dict[str, Any]: def metrics_payload(data_root: Path, include_external: bool = True) -> Dict[str, Any]:
total_items = 0 total_items = 0
channel_counter: Counter = Counter() channel_counter: Counter = Counter()
channel_name_map: Dict[str, str] = {} channel_name_map: Dict[str, str] = {}
@ -117,7 +117,8 @@ def metrics_payload(data_root: Path) -> Dict[str, Any]:
doc = json.load(handle) doc = json.load(handle)
except Exception: except Exception:
continue continue
if not include_external and doc.get("external_reference"):
continue
total_items += 1 total_items += 1
channel_id = doc.get("channel_id") channel_id = doc.get("channel_id")
@ -180,6 +181,7 @@ def elastic_metrics_payload(
*, *,
channel_field_candidates: Optional[List[str]] = None, channel_field_candidates: Optional[List[str]] = None,
debug: bool = False, debug: bool = False,
include_external: bool = True,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
if channel_field_candidates is None: if channel_field_candidates is None:
channel_field_candidates = ["channel_id.keyword", "channel_id"] channel_field_candidates = ["channel_id.keyword", "channel_id"]
@ -220,6 +222,8 @@ def elastic_metrics_payload(
}, },
}, },
} }
if not include_external:
base_body["query"] = {"bool": {"must_not": [{"term": {"external_reference": True}}]}}
last_error: Optional[Exception] = None last_error: Optional[Exception] = None
response: Optional[Dict[str, Any]] = None response: Optional[Dict[str, Any]] = None
@ -615,7 +619,7 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
return normalized return normalized
def build_graph_payload( def build_graph_payload(
root_id: str, depth: int, max_nodes: int root_id: str, depth: int, max_nodes: int, *, include_external: bool = True
) -> Dict[str, Any]: ) -> Dict[str, Any]:
root_id = root_id.strip() root_id = root_id.strip()
if not root_id: if not root_id:
@ -632,6 +636,14 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
except Exception as exc: # pragma: no cover - elasticsearch handles errors except Exception as exc: # pragma: no cover - elasticsearch handles errors
LOGGER.debug("Graph: failed to load %s: %s", video_id, exc) LOGGER.debug("Graph: failed to load %s: %s", video_id, exc)
doc_cache[video_id] = None doc_cache[video_id] = None
doc = doc_cache[video_id]
if (
doc is not None
and not include_external
and doc.get("external_reference")
):
doc_cache[video_id] = None
return None
return doc_cache[video_id] return doc_cache[video_id]
nodes: Dict[str, Dict[str, Any]] = {} nodes: Dict[str, Dict[str, Any]] = {}
@ -662,6 +674,7 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
"url": doc.get("url"), "url": doc.get("url"),
"date": doc.get("date"), "date": doc.get("date"),
"is_root": current_id == root_id, "is_root": current_id == root_id,
"external_reference": bool(doc.get("external_reference")),
} }
if level >= depth: if level >= depth:
@ -707,6 +720,7 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
continue continue
doc = fetch_document(node_id) doc = fetch_document(node_id)
if doc is None: if doc is None:
if include_external:
nodes[node_id] = { nodes[node_id] = {
"id": node_id, "id": node_id,
"title": node_id, "title": node_id,
@ -715,8 +729,9 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
"url": None, "url": None,
"date": None, "date": None,
"is_root": node_id == root_id, "is_root": node_id == root_id,
"external_reference": False,
} }
else: continue
nodes[node_id] = { nodes[node_id] = {
"id": node_id, "id": node_id,
"title": doc.get("title") or node_id, "title": doc.get("title") or node_id,
@ -725,6 +740,7 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
"url": doc.get("url"), "url": doc.get("url"),
"date": doc.get("date"), "date": doc.get("date"),
"is_root": node_id == root_id, "is_root": node_id == root_id,
"external_reference": bool(doc.get("external_reference")),
} }
links = [ links = [
@ -745,7 +761,10 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
} }
def build_full_graph_payload( def build_full_graph_payload(
max_nodes: Optional[int], *, highlight_id: Optional[str] = None max_nodes: Optional[int],
*,
highlight_id: Optional[str] = None,
include_external: bool = True,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
""" """
Attempt to render the entire reference graph by gathering every video that Attempt to render the entire reference graph by gathering every video that
@ -785,6 +804,9 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
return False return False
if node_id in nodes: if node_id in nodes:
if doc: if doc:
if not include_external and doc.get("external_reference"):
nodes.pop(node_id, None)
return False
existing = nodes[node_id] existing = nodes[node_id]
if (not existing.get("title") or existing["title"] == node_id) and doc.get("title"): if (not existing.get("title") or existing["title"] == node_id) and doc.get("title"):
existing["title"] = doc["title"] existing["title"] = doc["title"]
@ -808,6 +830,8 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
date_val = None date_val = None
title = node_id title = node_id
if doc: if doc:
if not include_external and doc.get("external_reference"):
return False
title = doc.get("title") or title title = doc.get("title") or title
channel_id = doc.get("channel_id") channel_id = doc.get("channel_id")
channel_name = doc.get("channel_name") or channel_id channel_name = doc.get("channel_name") or channel_id
@ -821,6 +845,7 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
"url": url, "url": url,
"date": date_val, "date": date_val,
"is_root": False, "is_root": False,
"external_reference": bool(doc.get("external_reference")) if doc else False,
} }
return True return True
@ -845,6 +870,9 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
video_id = source.get("video_id") video_id = source.get("video_id")
if not video_id: if not video_id:
continue continue
if not include_external and source.get("external_reference"):
nodes.pop(video_id, None)
continue
if not ensure_node(video_id, source): if not ensure_node(video_id, source):
continue continue
for target in normalize_reference_list(source.get("internal_references")): for target in normalize_reference_list(source.get("internal_references")):
@ -885,6 +913,18 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
if highlight_id and highlight_id in nodes: if highlight_id and highlight_id in nodes:
nodes[highlight_id]["is_root"] = True nodes[highlight_id]["is_root"] = True
links = [
link
for link in links
if link.get("source") in nodes and link.get("target") in nodes
]
links = [
link
for link in links
if link.get("source") in nodes and link.get("target") in nodes
]
return { return {
"root": highlight_id or "", "root": highlight_id or "",
"depth": 0, "depth": 0,
@ -990,6 +1030,8 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
full_graph = full_graph.lower() in {"1", "true", "yes"} full_graph = full_graph.lower() in {"1", "true", "yes"}
if not full_graph and not video_id: if not full_graph and not video_id:
return jsonify({"error": "video_id is required"}), 400 return jsonify({"error": "video_id is required"}), 400
include_external = request.args.get("external", default="1", type=str)
include_external = include_external.lower() not in {"0", "false", "no"}
try: try:
depth = int(request.args.get("depth", "1")) depth = int(request.args.get("depth", "1"))
@ -1007,9 +1049,15 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
max_nodes = max(10, min(max_nodes, 400)) max_nodes = max(10, min(max_nodes, 400))
if full_graph: if full_graph:
payload = build_full_graph_payload(None, highlight_id=video_id or None) payload = build_full_graph_payload(
None,
highlight_id=video_id or None,
include_external=include_external,
)
else: else:
payload = build_graph_payload(video_id, depth, max_nodes) payload = build_graph_payload(
video_id, depth, max_nodes, include_external=include_external
)
if not payload["nodes"]: if not payload["nodes"]:
return ( return (
jsonify({"error": f"Video '{video_id}' was not found in the index."}), jsonify({"error": f"Video '{video_id}' was not found in the index."}),
@ -1196,19 +1244,22 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
@app.route("/api/metrics") @app.route("/api/metrics")
def metrics(): def metrics():
include_external = request.args.get("external", default="1", type=str)
include_external = include_external.lower() not in {"0", "false", "no"}
try: try:
data = elastic_metrics_payload( data = elastic_metrics_payload(
client, client,
index, index,
channel_field_candidates=["channel_id.keyword", "channel_id"], channel_field_candidates=["channel_id.keyword", "channel_id"],
debug=config.elastic.debug, debug=config.elastic.debug,
include_external=include_external,
) )
except Exception: except Exception:
LOGGER.exception( LOGGER.exception(
"Falling back to local metrics payload due to Elasticsearch error.", "Falling back to local metrics payload due to Elasticsearch error.",
exc_info=True, exc_info=True,
) )
data = metrics_payload(config.data.root) data = metrics_payload(config.data.root, include_external=include_external)
return jsonify(data) return jsonify(data)
@app.route("/api/frequency") @app.route("/api/frequency")

View File

@ -267,6 +267,10 @@
if (!graphOverlay || !graphUiAvailable()) { if (!graphOverlay || !graphUiAvailable()) {
return; return;
} }
const includeExternal = externalToggle ? !!externalToggle.checked : false;
if (graphUiAvailable() && typeof window.GraphUI.setIncludeExternal === "function") {
window.GraphUI.setIncludeExternal(includeExternal);
}
lastFocusBeforeModal = lastFocusBeforeModal =
document.activeElement instanceof HTMLElement ? document.activeElement : null; document.activeElement instanceof HTMLElement ? document.activeElement : null;
graphOverlay.classList.add("active"); graphOverlay.classList.add("active");
@ -282,7 +286,10 @@
graphVideoField.value = videoId; graphVideoField.value = videoId;
} }
if (videoId) { if (videoId) {
window.GraphUI.load(videoId, undefined, undefined, { updateInputs: true }); window.GraphUI.load(videoId, undefined, undefined, {
updateInputs: true,
includeExternal,
});
} }
window.GraphUI.focusInput(); window.GraphUI.focusInput();
}); });
@ -1058,7 +1065,8 @@ async function loadMetrics() {
metricsStatus.textContent = "Loading metrics…"; metricsStatus.textContent = "Loading metrics…";
} }
try { try {
const res = await fetch("/api/metrics"); const includeExternal = externalToggle ? !!externalToggle.checked : false;
const res = await fetch(`/api/metrics?external=${includeExternal ? "1" : "0"}`);
const data = await res.json(); const data = await res.json();
renderMetrics(data); renderMetrics(data);
metricsContainer.dataset.loaded = "1"; metricsContainer.dataset.loaded = "1";
@ -1325,6 +1333,11 @@ async function updateFrequencyChart(term, channels, year, queryMode, toggles = {
} of ${payload.totalPages}`; } of ${payload.totalPages}`;
(payload.items || []).forEach((item) => { (payload.items || []).forEach((item) => {
const isExternal = !!item.external_reference;
const hasTitle = typeof item.title === "string" && item.title.trim().length > 0;
if (isExternal && !hasTitle) {
return;
}
const el = document.createElement("div"); const el = document.createElement("div");
el.className = "item"; el.className = "item";
const titleHtml = const titleHtml =
@ -1685,6 +1698,10 @@ async function updateFrequencyChart(term, channels, year, queryMode, toggles = {
settings.external = !!externalToggle.checked; settings.external = !!externalToggle.checked;
persistSettings(); persistSettings();
loadChannels().then(() => runSearch(0)); loadChannels().then(() => runSearch(0));
loadMetrics();
if (graphUiAvailable()) {
window.GraphUI.setIncludeExternal(settings.external);
}
}); });
} }
if (queryToggle) { if (queryToggle) {

View File

@ -135,6 +135,8 @@
let currentMaxNodes = sanitizeMaxNodes(maxNodesInput.value); let currentMaxNodes = sanitizeMaxNodes(maxNodesInput.value);
let currentSimulation = null; let currentSimulation = null;
let currentFullGraph = false; let currentFullGraph = false;
let currentIncludeExternal = true;
let previousMaxNodesValue = maxNodesInput ? maxNodesInput.value : "200";
let previousMaxNodesValue = maxNodesInput ? maxNodesInput.value : "200"; let previousMaxNodesValue = maxNodesInput ? maxNodesInput.value : "200";
function setStatus(message, isError = false) { function setStatus(message, isError = false) {
@ -187,7 +189,13 @@
} }
} }
async function fetchGraph(videoId, depth, maxNodes, fullGraphMode = false) { async function fetchGraph(
videoId,
depth,
maxNodes,
fullGraphMode = false,
includeExternal = true
) {
const params = new URLSearchParams(); const params = new URLSearchParams();
if (videoId) { if (videoId) {
params.set("video_id", videoId); params.set("video_id", videoId);
@ -199,6 +207,7 @@
params.set("depth", String(depth)); params.set("depth", String(depth));
params.set("max_nodes", String(maxNodes)); params.set("max_nodes", String(maxNodes));
} }
params.set("external", includeExternal ? "1" : "0");
const response = await fetch(`/api/graph?${params.toString()}`); const response = await fetch(`/api/graph?${params.toString()}`);
if (!response.ok) { if (!response.ok) {
const errorPayload = await response.json().catch(() => ({})); const errorPayload = await response.json().catch(() => ({}));
@ -366,7 +375,10 @@
}) })
.on("contextmenu", (event, d) => { .on("contextmenu", (event, d) => {
event.preventDefault(); event.preventDefault();
loadGraph(d.id, currentDepth, currentMaxNodes, { updateInputs: true }); loadGraph(d.id, currentDepth, currentMaxNodes, {
updateInputs: true,
includeExternal: currentIncludeExternal,
});
}); });
nodeSelection nodeSelection
@ -449,11 +461,14 @@
videoId, videoId,
depth, depth,
maxNodes, maxNodes,
{ updateInputs = false, fullGraph } = {} { updateInputs = false, fullGraph, includeExternal } = {}
) { ) {
const wantsFull = isFullGraphMode( const wantsFull = isFullGraphMode(
typeof fullGraph === "boolean" ? fullGraph : undefined typeof fullGraph === "boolean" ? fullGraph : undefined
); );
const includeFlag =
typeof includeExternal === "boolean" ? includeExternal : currentIncludeExternal;
currentIncludeExternal = includeFlag;
const sanitizedId = sanitizeId(videoId); const sanitizedId = sanitizeId(videoId);
if (!wantsFull && !sanitizedId) { if (!wantsFull && !sanitizedId) {
setStatus("Please enter a video ID.", true); setStatus("Please enter a video ID.", true);
@ -477,7 +492,8 @@
sanitizedId, sanitizedId,
safeDepth, safeDepth,
safeMaxNodes, safeMaxNodes,
wantsFull wantsFull,
includeFlag
); );
if (!data.nodes || data.nodes.length === 0) { if (!data.nodes || data.nodes.length === 0) {
setStatus("No nodes returned for this video.", true); setStatus("No nodes returned for this video.", true);
@ -503,7 +519,8 @@
safeDepth, safeDepth,
safeMaxNodes, safeMaxNodes,
getLabelSize(), getLabelSize(),
wantsFull wantsFull,
includeFlag
); );
} catch (err) { } catch (err) {
console.error(err); console.error(err);
@ -520,6 +537,7 @@
await loadGraph(videoInput.value, depthInput.value, maxNodesInput.value, { await loadGraph(videoInput.value, depthInput.value, maxNodesInput.value, {
updateInputs: true, updateInputs: true,
fullGraph: isFullGraphMode(), fullGraph: isFullGraphMode(),
includeExternal: currentIncludeExternal,
}); });
} }
@ -631,7 +649,14 @@
} }
} }
function updateUrlState(videoId, depth, maxNodes, labelSize, fullGraphMode) { function updateUrlState(
videoId,
depth,
maxNodes,
labelSize,
fullGraphMode,
includeExternal
) {
if (isEmbedded) { if (isEmbedded) {
return; return;
} }
@ -650,6 +675,11 @@
next.searchParams.delete("full_graph"); next.searchParams.delete("full_graph");
next.searchParams.set("max_nodes", String(maxNodes)); next.searchParams.set("max_nodes", String(maxNodes));
} }
if (!includeExternal) {
next.searchParams.set("external", "0");
} else {
next.searchParams.delete("external");
}
if (labelSize && labelSize !== "normal") { if (labelSize && labelSize !== "normal") {
next.searchParams.set("label_size", labelSize); next.searchParams.set("label_size", labelSize);
} else { } else {
@ -671,6 +701,11 @@
const fullGraphParam = params.get("full_graph"); const fullGraphParam = params.get("full_graph");
const viewFull = const viewFull =
fullGraphParam && ["1", "true", "yes"].includes(fullGraphParam.toLowerCase()); fullGraphParam && ["1", "true", "yes"].includes(fullGraphParam.toLowerCase());
const externalParam = params.get("external");
const includeExternal =
!externalParam ||
!["0", "false", "no"].includes(externalParam.toLowerCase());
currentIncludeExternal = includeExternal;
if (videoId) { if (videoId) {
videoInput.value = videoId; videoInput.value = videoId;
} }
@ -691,6 +726,7 @@
loadGraph(videoId, depth, maxNodes, { loadGraph(videoId, depth, maxNodes, {
updateInputs: false, updateInputs: false,
fullGraph: viewFull, fullGraph: viewFull,
includeExternal,
}); });
} }
@ -713,7 +749,8 @@
currentDepth, currentDepth,
currentMaxNodes, currentMaxNodes,
size, size,
currentFullGraph currentFullGraph,
currentIncludeExternal
); );
}); });
initFromQuery(); initFromQuery();
@ -736,9 +773,20 @@
typeof explicitFull === "boolean" typeof explicitFull === "boolean"
? explicitFull ? explicitFull
: isFullGraphMode(); : isFullGraphMode();
const explicitInclude =
typeof options.includeExternal === "boolean"
? options.includeExternal
: undefined;
if (typeof explicitInclude === "boolean") {
currentIncludeExternal = explicitInclude;
}
return loadGraph(videoId, targetDepth, targetMax, { return loadGraph(videoId, targetDepth, targetMax, {
updateInputs: options.updateInputs !== false, updateInputs: options.updateInputs !== false,
fullGraph: fullFlag, fullGraph: fullFlag,
includeExternal:
typeof explicitInclude === "boolean"
? explicitInclude
: currentIncludeExternal,
}); });
}, },
setLabelSize(size) { setLabelSize(size) {
@ -778,8 +826,13 @@
nodes: currentGraphData ? currentGraphData.nodes.slice() : [], nodes: currentGraphData ? currentGraphData.nodes.slice() : [],
links: currentGraphData ? currentGraphData.links.slice() : [], links: currentGraphData ? currentGraphData.links.slice() : [],
fullGraph: currentFullGraph, fullGraph: currentFullGraph,
includeExternal: currentIncludeExternal,
}; };
}, },
setIncludeExternal(value) {
if (typeof value !== "boolean") return;
currentIncludeExternal = value;
},
isEmbedded, isEmbedded,
}); });
GraphUI.ready = true; GraphUI.ready = true;