Add graph and vector search features
This commit is contained in:
parent
14d37f23e4
commit
40d4f41f6e
31
AGENTS.md
Normal file
31
AGENTS.md
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
# Repository Guidelines
|
||||||
|
|
||||||
|
## Project Structure & Module Organization
|
||||||
|
- Core modules live under `python_app/`: `config.py` centralizes settings, `transcript_collector.py` gathers transcripts, `ingest.py` handles Elasticsearch bulk loads, and `search_app.py` exposes the Flask UI.
|
||||||
|
- Static assets belong in `static/` (`index.html`, `frequency.html`, companion JS/CSS). Keep HTML here and wire it up through Flask routes.
|
||||||
|
- Runtime artifacts land in `data/` (`raw/` for downloads, `video_metadata/` for cleaned payloads). Preserve the JSON schema emitted by the collector.
|
||||||
|
- When adding utilities, place them in `python_app/` and use package-relative imports so scripts continue to run via `python -m`.
|
||||||
|
|
||||||
|
## Build, Test, and Development Commands
|
||||||
|
- `python -m venv .venv && source .venv/bin/activate`: bootstrap the virtualenv used by all scripts.
|
||||||
|
- `pip install -r requirements.txt`: install Flask, Elasticsearch tooling, Google API clients, and dotenv support.
|
||||||
|
- `python -m python_app.transcript_collector --channel UC... --output data/raw`: fetch transcript JSON for a channel; rerun to refresh cached data.
|
||||||
|
- `python -m python_app.ingest --source data/video_metadata --index this_little_corner_py`: index prepared metadata and auto-create mappings when needed.
|
||||||
|
- `python -m python_app.search_app`: launch the Flask server on port 8080 for UI smoke tests.
|
||||||
|
|
||||||
|
## Coding Style & Naming Conventions
|
||||||
|
- Follow PEP 8 with 4-space indentation, `snake_case` for functions/modules, and `CamelCase` for classes; reserve UPPER_SNAKE_CASE for configuration constants.
|
||||||
|
- Keep Elasticsearch payload keys lower-case with underscores, and centralize shared values in `config.py` rather than scattering literals.
|
||||||
|
|
||||||
|
## Testing Guidelines
|
||||||
|
- No automated suite is committed yet; when adding coverage, create `tests/` modules using `pytest` with files named `test_*.py`.
|
||||||
|
- Focus tests on collector pagination, ingest transformations, and Flask route helpers, and run `python -m pytest` locally before opening a PR.
|
||||||
|
- Manually verify by ingesting a small sample into a local Elasticsearch node and checking facets, highlights, and transcript retrieval via the UI.
|
||||||
|
|
||||||
|
## Commit & Pull Request Guidelines
|
||||||
|
- Mirror the existing history: short, imperative commit subjects (e.g. “Fix results overflow”, “Add video reference tracking”).
|
||||||
|
- PRs should describe scope, list environment variables or indices touched, link issues, and attach before/after screenshots whenever UI output changes. Highlight Elasticsearch mapping or data migration impacts for both search and frontend reviewers.
|
||||||
|
|
||||||
|
## Configuration & Security Tips
|
||||||
|
- Load credentials through environment variables (`ELASTIC_URL`, `ELASTIC_USERNAME`, `ELASTIC_PASSWORD`, `ELASTIC_API_KEY`, `YOUTUBE_API_KEY`) or a `.env` file, and keep secrets out of version control.
|
||||||
|
- Adjust `ELASTIC_VERIFY_CERTS`, `ELASTIC_CA_CERT`, and `ELASTIC_DEBUG` only while debugging, and prefer branch-specific indices (`this_little_corner_py_<initials>`) to avoid clobbering shared data.
|
||||||
22
config.py
22
config.py
@ -20,13 +20,13 @@ from typing import Optional
|
|||||||
try:
|
try:
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
import logging
|
import logging
|
||||||
_logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
_logger = logging.getLogger(__name__)
|
||||||
_env_path = Path(__file__).parent / ".env"
|
_env_path = Path(__file__).parent / ".env"
|
||||||
if _env_path.exists():
|
if _env_path.exists():
|
||||||
_logger.info(f"Loading .env from: {_env_path}")
|
_logger.info("Loading .env from: %s", _env_path)
|
||||||
result = load_dotenv(_env_path, override=True)
|
result = load_dotenv(_env_path, override=True)
|
||||||
_logger.info(f"load_dotenv result: {result}")
|
_logger.info("load_dotenv result: %s", result)
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass # python-dotenv not installed
|
pass # python-dotenv not installed
|
||||||
|
|
||||||
@ -58,6 +58,11 @@ class AppConfig:
|
|||||||
elastic: ElasticSettings
|
elastic: ElasticSettings
|
||||||
data: DataSettings
|
data: DataSettings
|
||||||
youtube: YoutubeSettings
|
youtube: YoutubeSettings
|
||||||
|
qdrant_url: str
|
||||||
|
qdrant_collection: str
|
||||||
|
qdrant_vector_name: Optional[str]
|
||||||
|
qdrant_vector_size: int
|
||||||
|
qdrant_embed_model: str
|
||||||
|
|
||||||
|
|
||||||
def _env(name: str, default: Optional[str] = None) -> Optional[str]:
|
def _env(name: str, default: Optional[str] = None) -> Optional[str]:
|
||||||
@ -89,7 +94,16 @@ def load_config() -> AppConfig:
|
|||||||
)
|
)
|
||||||
data = DataSettings(root=data_root)
|
data = DataSettings(root=data_root)
|
||||||
youtube = YoutubeSettings(api_key=_env("YOUTUBE_API_KEY"))
|
youtube = YoutubeSettings(api_key=_env("YOUTUBE_API_KEY"))
|
||||||
return AppConfig(elastic=elastic, data=data, youtube=youtube)
|
return AppConfig(
|
||||||
|
elastic=elastic,
|
||||||
|
data=data,
|
||||||
|
youtube=youtube,
|
||||||
|
qdrant_url=_env("QDRANT_URL", "http://localhost:6333"),
|
||||||
|
qdrant_collection=_env("QDRANT_COLLECTION", "tlc_embeddings"),
|
||||||
|
qdrant_vector_name=_env("QDRANT_VECTOR_NAME"),
|
||||||
|
qdrant_vector_size=int(_env("QDRANT_VECTOR_SIZE", "1024")),
|
||||||
|
qdrant_embed_model=_env("QDRANT_EMBED_MODEL", "BAAI/bge-large-en-v1.5"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
CONFIG = load_config()
|
CONFIG = load_config()
|
||||||
|
|||||||
@ -3,3 +3,5 @@ elasticsearch>=7.0.0,<9.0.0
|
|||||||
youtube-transcript-api>=0.6
|
youtube-transcript-api>=0.6
|
||||||
google-api-python-client>=2.0.0
|
google-api-python-client>=2.0.0
|
||||||
python-dotenv>=0.19.0
|
python-dotenv>=0.19.0
|
||||||
|
requests>=2.31.0
|
||||||
|
sentence-transformers>=2.7.0
|
||||||
|
|||||||
461
search_app.py
461
search_app.py
@ -1,11 +1,15 @@
|
|||||||
"""
|
"""
|
||||||
Flask application exposing a minimal search API backed by Elasticsearch.
|
Flask application exposing search, graph, and transcript endpoints for TLC.
|
||||||
|
|
||||||
Routes:
|
Routes:
|
||||||
GET / -> Static HTML search page.
|
GET / -> static HTML search page.
|
||||||
GET /api/channels -> List available channels (via terms aggregation).
|
GET /graph -> static reference graph UI.
|
||||||
GET /api/search -> Search index with pagination and simple highlighting.
|
GET /vector-search -> experimental Qdrant vector search UI.
|
||||||
GET /api/transcript -> Return full transcript for a given video_id.
|
GET /api/channels -> channels aggregation.
|
||||||
|
GET /api/search -> Elasticsearch keyword search.
|
||||||
|
POST /api/vector-search -> Qdrant vector similarity query.
|
||||||
|
GET /api/graph -> reference graph API.
|
||||||
|
GET /api/transcript -> transcript JSON payload.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
@ -15,13 +19,20 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set
|
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple
|
||||||
|
|
||||||
from collections import Counter
|
from collections import Counter, deque
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from flask import Flask, jsonify, request, send_from_directory
|
from flask import Flask, jsonify, request, send_from_directory
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
try:
|
||||||
|
from sentence_transformers import SentenceTransformer # type: ignore
|
||||||
|
except ImportError: # pragma: no cover - optional dependency
|
||||||
|
SentenceTransformer = None
|
||||||
|
|
||||||
from .config import CONFIG, AppConfig
|
from .config import CONFIG, AppConfig
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -32,6 +43,35 @@ except ImportError: # pragma: no cover - dependency optional
|
|||||||
BadRequestError = Exception # type: ignore
|
BadRequestError = Exception # type: ignore
|
||||||
|
|
||||||
LOGGER = logging.getLogger(__name__)
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
_EMBED_MODEL = None
|
||||||
|
_EMBED_MODEL_NAME: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_embedder(model_name: str) -> "SentenceTransformer":
|
||||||
|
global _EMBED_MODEL, _EMBED_MODEL_NAME
|
||||||
|
if SentenceTransformer is None: # pragma: no cover - optional dependency
|
||||||
|
raise RuntimeError(
|
||||||
|
"sentence-transformers is required for vector search. Install via pip install sentence-transformers."
|
||||||
|
)
|
||||||
|
if _EMBED_MODEL is None or _EMBED_MODEL_NAME != model_name:
|
||||||
|
LOGGER.info("Loading embedding model: %s", model_name)
|
||||||
|
_EMBED_MODEL = SentenceTransformer(model_name)
|
||||||
|
_EMBED_MODEL_NAME = model_name
|
||||||
|
return _EMBED_MODEL
|
||||||
|
|
||||||
|
|
||||||
|
def embed_query(text: str, *, model_name: str, expected_dim: int) -> List[float]:
|
||||||
|
embedder = _ensure_embedder(model_name)
|
||||||
|
vector = embedder.encode(
|
||||||
|
[f"query: {text}"],
|
||||||
|
show_progress_bar=False,
|
||||||
|
normalize_embeddings=True,
|
||||||
|
)[0].tolist()
|
||||||
|
if len(vector) != expected_dim:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Embedding dimension mismatch (expected {expected_dim}, got {len(vector)})"
|
||||||
|
)
|
||||||
|
return vector
|
||||||
|
|
||||||
|
|
||||||
def _ensure_client(config: AppConfig) -> "Elasticsearch":
|
def _ensure_client(config: AppConfig) -> "Elasticsearch":
|
||||||
@ -428,6 +468,17 @@ def build_query_payload(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
should.append(
|
||||||
|
{
|
||||||
|
"match_phrase": {
|
||||||
|
"title": {
|
||||||
|
"query": query,
|
||||||
|
"slop": 0,
|
||||||
|
"boost": 50.0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
if use_fuzzy:
|
if use_fuzzy:
|
||||||
should.append(
|
should.append(
|
||||||
{
|
{
|
||||||
@ -513,15 +564,182 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
|
|||||||
app = Flask(__name__, static_folder=str(Path(__file__).parent / "static"))
|
app = Flask(__name__, static_folder=str(Path(__file__).parent / "static"))
|
||||||
client = _ensure_client(config)
|
client = _ensure_client(config)
|
||||||
index = config.elastic.index
|
index = config.elastic.index
|
||||||
|
qdrant_url = config.qdrant_url
|
||||||
|
qdrant_collection = config.qdrant_collection
|
||||||
|
qdrant_vector_name = config.qdrant_vector_name
|
||||||
|
qdrant_vector_size = config.qdrant_vector_size
|
||||||
|
qdrant_embed_model = config.qdrant_embed_model
|
||||||
|
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
def index_page():
|
def index_page():
|
||||||
return send_from_directory(app.static_folder, "index.html")
|
return send_from_directory(app.static_folder, "index.html")
|
||||||
|
|
||||||
|
@app.route("/graph")
|
||||||
|
def graph_page():
|
||||||
|
return send_from_directory(app.static_folder, "graph.html")
|
||||||
|
|
||||||
|
@app.route("/vector-search")
|
||||||
|
def vector_search_page():
|
||||||
|
return send_from_directory(app.static_folder, "vector.html")
|
||||||
|
|
||||||
@app.route("/static/<path:filename>")
|
@app.route("/static/<path:filename>")
|
||||||
def static_files(filename: str):
|
def static_files(filename: str):
|
||||||
return send_from_directory(app.static_folder, filename)
|
return send_from_directory(app.static_folder, filename)
|
||||||
|
|
||||||
|
def normalize_reference_list(values: Any) -> List[str]:
|
||||||
|
if values is None:
|
||||||
|
return []
|
||||||
|
if isinstance(values, (list, tuple, set)):
|
||||||
|
iterable = values
|
||||||
|
else:
|
||||||
|
iterable = [values]
|
||||||
|
normalized: List[str] = []
|
||||||
|
for item in iterable:
|
||||||
|
candidate: Optional[str]
|
||||||
|
if isinstance(item, dict):
|
||||||
|
candidate = item.get("video_id") or item.get("id") # type: ignore[assignment]
|
||||||
|
else:
|
||||||
|
candidate = item # type: ignore[assignment]
|
||||||
|
if candidate is None:
|
||||||
|
continue
|
||||||
|
text = str(candidate).strip()
|
||||||
|
if not text:
|
||||||
|
continue
|
||||||
|
if text.lower() in {"none", "null"}:
|
||||||
|
continue
|
||||||
|
normalized.append(text)
|
||||||
|
return normalized
|
||||||
|
|
||||||
|
def build_graph_payload(
|
||||||
|
root_id: str, depth: int, max_nodes: int
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
root_id = root_id.strip()
|
||||||
|
if not root_id:
|
||||||
|
return {"nodes": [], "links": [], "root": root_id, "depth": depth, "meta": {}}
|
||||||
|
|
||||||
|
doc_cache: Dict[str, Optional[Dict[str, Any]]] = {}
|
||||||
|
|
||||||
|
def fetch_document(video_id: str) -> Optional[Dict[str, Any]]:
|
||||||
|
if video_id in doc_cache:
|
||||||
|
return doc_cache[video_id]
|
||||||
|
try:
|
||||||
|
result = client.get(index=index, id=video_id)
|
||||||
|
doc_cache[video_id] = result.get("_source")
|
||||||
|
except Exception as exc: # pragma: no cover - elasticsearch handles errors
|
||||||
|
LOGGER.debug("Graph: failed to load %s: %s", video_id, exc)
|
||||||
|
doc_cache[video_id] = None
|
||||||
|
return doc_cache[video_id]
|
||||||
|
|
||||||
|
nodes: Dict[str, Dict[str, Any]] = {}
|
||||||
|
links: List[Dict[str, Any]] = []
|
||||||
|
link_seen: Set[Tuple[str, str, str]] = set()
|
||||||
|
queue: deque[Tuple[str, int]] = deque([(root_id, 0)])
|
||||||
|
queued: Set[str] = {root_id}
|
||||||
|
visited: Set[str] = set()
|
||||||
|
|
||||||
|
while queue and len(nodes) < max_nodes:
|
||||||
|
current_id, level = queue.popleft()
|
||||||
|
queued.discard(current_id)
|
||||||
|
if current_id in visited:
|
||||||
|
continue
|
||||||
|
doc = fetch_document(current_id)
|
||||||
|
if doc is None:
|
||||||
|
if current_id == root_id:
|
||||||
|
break
|
||||||
|
visited.add(current_id)
|
||||||
|
continue
|
||||||
|
|
||||||
|
visited.add(current_id)
|
||||||
|
nodes[current_id] = {
|
||||||
|
"id": current_id,
|
||||||
|
"title": doc.get("title") or current_id,
|
||||||
|
"channel_id": doc.get("channel_id"),
|
||||||
|
"channel_name": doc.get("channel_name") or doc.get("channel_id") or "Unknown",
|
||||||
|
"url": doc.get("url"),
|
||||||
|
"date": doc.get("date"),
|
||||||
|
"is_root": current_id == root_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
if level >= depth:
|
||||||
|
continue
|
||||||
|
|
||||||
|
neighbor_ids: List[str] = []
|
||||||
|
|
||||||
|
for ref_id in normalize_reference_list(doc.get("internal_references")):
|
||||||
|
if ref_id == current_id:
|
||||||
|
continue
|
||||||
|
key = (current_id, ref_id, "references")
|
||||||
|
if key not in link_seen:
|
||||||
|
links.append(
|
||||||
|
{"source": current_id, "target": ref_id, "relation": "references"}
|
||||||
|
)
|
||||||
|
link_seen.add(key)
|
||||||
|
neighbor_ids.append(ref_id)
|
||||||
|
|
||||||
|
for ref_id in normalize_reference_list(doc.get("referenced_by")):
|
||||||
|
if ref_id == current_id:
|
||||||
|
continue
|
||||||
|
key = (ref_id, current_id, "referenced_by")
|
||||||
|
if key not in link_seen:
|
||||||
|
links.append(
|
||||||
|
{"source": ref_id, "target": current_id, "relation": "referenced_by"}
|
||||||
|
)
|
||||||
|
link_seen.add(key)
|
||||||
|
neighbor_ids.append(ref_id)
|
||||||
|
|
||||||
|
for neighbor in neighbor_ids:
|
||||||
|
if neighbor in visited or neighbor in queued:
|
||||||
|
continue
|
||||||
|
if len(nodes) + len(queue) >= max_nodes:
|
||||||
|
break
|
||||||
|
queue.append((neighbor, level + 1))
|
||||||
|
queued.add(neighbor)
|
||||||
|
|
||||||
|
# Ensure nodes referenced by links exist in the payload.
|
||||||
|
for link in links:
|
||||||
|
for key in ("source", "target"):
|
||||||
|
node_id = link[key]
|
||||||
|
if node_id in nodes:
|
||||||
|
continue
|
||||||
|
doc = fetch_document(node_id)
|
||||||
|
if doc is None:
|
||||||
|
nodes[node_id] = {
|
||||||
|
"id": node_id,
|
||||||
|
"title": node_id,
|
||||||
|
"channel_id": None,
|
||||||
|
"channel_name": "Unknown",
|
||||||
|
"url": None,
|
||||||
|
"date": None,
|
||||||
|
"is_root": node_id == root_id,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
nodes[node_id] = {
|
||||||
|
"id": node_id,
|
||||||
|
"title": doc.get("title") or node_id,
|
||||||
|
"channel_id": doc.get("channel_id"),
|
||||||
|
"channel_name": doc.get("channel_name") or doc.get("channel_id") or "Unknown",
|
||||||
|
"url": doc.get("url"),
|
||||||
|
"date": doc.get("date"),
|
||||||
|
"is_root": node_id == root_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
links = [
|
||||||
|
link
|
||||||
|
for link in links
|
||||||
|
if link.get("source") in nodes and link.get("target") in nodes
|
||||||
|
]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"root": root_id,
|
||||||
|
"depth": depth,
|
||||||
|
"nodes": list(nodes.values()),
|
||||||
|
"links": links,
|
||||||
|
"meta": {
|
||||||
|
"node_count": len(nodes),
|
||||||
|
"link_count": len(links),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
@app.route("/api/channels")
|
@app.route("/api/channels")
|
||||||
def channels():
|
def channels():
|
||||||
base_channels_body = {
|
base_channels_body = {
|
||||||
@ -580,23 +798,54 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
|
|||||||
.get("channels", {})
|
.get("channels", {})
|
||||||
.get("buckets", [])
|
.get("buckets", [])
|
||||||
)
|
)
|
||||||
data = [
|
data = []
|
||||||
{
|
for bucket in buckets:
|
||||||
"Id": bucket.get("key"),
|
key = bucket.get("key")
|
||||||
"Name": (
|
name_hit = (
|
||||||
bucket.get("name", {})
|
bucket.get("name", {})
|
||||||
.get("hits", {})
|
.get("hits", {})
|
||||||
.get("hits", [{}])[0]
|
.get("hits", [{}])[0]
|
||||||
.get("_source", {})
|
.get("_source", {})
|
||||||
.get("channel_name", bucket.get("key"))
|
.get("channel_name")
|
||||||
),
|
)
|
||||||
|
display_name = name_hit or key or "Unknown"
|
||||||
|
data.append(
|
||||||
|
{
|
||||||
|
"Id": key,
|
||||||
|
"Name": display_name,
|
||||||
"Count": bucket.get("doc_count", 0),
|
"Count": bucket.get("doc_count", 0),
|
||||||
}
|
}
|
||||||
for bucket in buckets
|
)
|
||||||
]
|
|
||||||
data.sort(key=lambda item: item["Name"].lower())
|
data.sort(key=lambda item: item["Name"].lower())
|
||||||
return jsonify(data)
|
return jsonify(data)
|
||||||
|
|
||||||
|
@app.route("/api/graph")
|
||||||
|
def graph_api():
|
||||||
|
video_id = (request.args.get("video_id") or "").strip()
|
||||||
|
if not video_id:
|
||||||
|
return jsonify({"error": "video_id is required"}), 400
|
||||||
|
|
||||||
|
try:
|
||||||
|
depth = int(request.args.get("depth", "1"))
|
||||||
|
except ValueError:
|
||||||
|
depth = 1
|
||||||
|
depth = max(0, min(depth, 3))
|
||||||
|
|
||||||
|
try:
|
||||||
|
max_nodes = int(request.args.get("max_nodes", "200"))
|
||||||
|
except ValueError:
|
||||||
|
max_nodes = 200
|
||||||
|
max_nodes = max(10, min(max_nodes, 400))
|
||||||
|
|
||||||
|
payload = build_graph_payload(video_id, depth, max_nodes)
|
||||||
|
if not payload["nodes"]:
|
||||||
|
return (
|
||||||
|
jsonify({"error": f"Video '{video_id}' was not found in the index."}),
|
||||||
|
404,
|
||||||
|
)
|
||||||
|
payload["meta"]["max_nodes"] = max_nodes
|
||||||
|
return jsonify(payload)
|
||||||
|
|
||||||
@app.route("/api/years")
|
@app.route("/api/years")
|
||||||
def years():
|
def years():
|
||||||
body = {
|
body = {
|
||||||
@ -718,10 +967,13 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
|
|||||||
for hit in hits.get("hits", []):
|
for hit in hits.get("hits", []):
|
||||||
source = hit.get("_source", {})
|
source = hit.get("_source", {})
|
||||||
highlight_map = hit.get("highlight", {})
|
highlight_map = hit.get("highlight", {})
|
||||||
transcript_highlight = (
|
transcript_highlight = [
|
||||||
(highlight_map.get("transcript_full", []) or [])
|
{"html": value, "source": "primary"}
|
||||||
+ (highlight_map.get("transcript_secondary_full", []) or [])
|
for value in (highlight_map.get("transcript_full", []) or [])
|
||||||
)
|
] + [
|
||||||
|
{"html": value, "source": "secondary"}
|
||||||
|
for value in (highlight_map.get("transcript_secondary_full", []) or [])
|
||||||
|
]
|
||||||
|
|
||||||
title_html = (
|
title_html = (
|
||||||
highlight_map.get("title")
|
highlight_map.get("title")
|
||||||
@ -741,6 +993,7 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
|
|||||||
"description": source.get("description"),
|
"description": source.get("description"),
|
||||||
"descriptionHtml": description_html,
|
"descriptionHtml": description_html,
|
||||||
"date": source.get("date"),
|
"date": source.get("date"),
|
||||||
|
"duration": source.get("duration"),
|
||||||
"url": source.get("url"),
|
"url": source.get("url"),
|
||||||
"toHighlight": transcript_highlight,
|
"toHighlight": transcript_highlight,
|
||||||
"highlightSource": {
|
"highlightSource": {
|
||||||
@ -751,6 +1004,7 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
|
|||||||
"internal_references": source.get("internal_references", []),
|
"internal_references": source.get("internal_references", []),
|
||||||
"referenced_by_count": source.get("referenced_by_count", 0),
|
"referenced_by_count": source.get("referenced_by_count", 0),
|
||||||
"referenced_by": source.get("referenced_by", []),
|
"referenced_by": source.get("referenced_by", []),
|
||||||
|
"video_status": source.get("video_status"),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -877,12 +1131,20 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
|
|||||||
"field": "channel_id.keyword",
|
"field": "channel_id.keyword",
|
||||||
"size": channel_terms_size,
|
"size": channel_terms_size,
|
||||||
"order": {"_count": "desc"},
|
"order": {"_count": "desc"},
|
||||||
|
},
|
||||||
|
"aggs": {
|
||||||
|
"channel_name_hit": {
|
||||||
|
"top_hits": {
|
||||||
|
"size": 1,
|
||||||
|
"_source": {"includes": ["channel_name"]},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
if config.elastic.debug:
|
if config.elastic.debug:
|
||||||
LOGGER.info(
|
LOGGER.info(
|
||||||
@ -916,7 +1178,7 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
|
|||||||
.get("buckets", [])
|
.get("buckets", [])
|
||||||
)
|
)
|
||||||
|
|
||||||
channel_totals: Dict[str, int] = {}
|
channel_totals: Dict[str, Dict[str, Any]] = {}
|
||||||
buckets: List[Dict[str, Any]] = []
|
buckets: List[Dict[str, Any]] = []
|
||||||
for bucket in raw_buckets:
|
for bucket in raw_buckets:
|
||||||
date_str = bucket.get("key_as_string")
|
date_str = bucket.get("key_as_string")
|
||||||
@ -926,14 +1188,28 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
|
|||||||
cid = ch_bucket.get("key")
|
cid = ch_bucket.get("key")
|
||||||
count = ch_bucket.get("doc_count", 0)
|
count = ch_bucket.get("doc_count", 0)
|
||||||
if cid:
|
if cid:
|
||||||
channel_entries.append({"id": cid, "count": count})
|
hit_source = (
|
||||||
channel_totals[cid] = channel_totals.get(cid, 0) + count
|
ch_bucket.get("channel_name_hit", {})
|
||||||
|
.get("hits", {})
|
||||||
|
.get("hits", [{}])[0]
|
||||||
|
.get("_source", {})
|
||||||
|
)
|
||||||
|
channel_name = hit_source.get("channel_name") if isinstance(hit_source, dict) else None
|
||||||
|
channel_entries.append({"id": cid, "count": count, "name": channel_name})
|
||||||
|
if cid not in channel_totals:
|
||||||
|
channel_totals[cid] = {"total": 0, "name": channel_name}
|
||||||
|
channel_totals[cid]["total"] += count
|
||||||
|
if channel_name and not channel_totals[cid].get("name"):
|
||||||
|
channel_totals[cid]["name"] = channel_name
|
||||||
buckets.append(
|
buckets.append(
|
||||||
{"date": date_str, "total": total, "channels": channel_entries}
|
{"date": date_str, "total": total, "channels": channel_entries}
|
||||||
)
|
)
|
||||||
|
|
||||||
ranked_channels = sorted(
|
ranked_channels = sorted(
|
||||||
[{"id": cid, "total": total} for cid, total in channel_totals.items()],
|
[
|
||||||
|
{"id": cid, "total": info.get("total", 0), "name": info.get("name")}
|
||||||
|
for cid, info in channel_totals.items()
|
||||||
|
],
|
||||||
key=lambda item: item["total"],
|
key=lambda item: item["total"],
|
||||||
reverse=True,
|
reverse=True,
|
||||||
)
|
)
|
||||||
@ -953,6 +1229,145 @@ def create_app(config: AppConfig = CONFIG) -> Flask:
|
|||||||
def frequency_page():
|
def frequency_page():
|
||||||
return send_from_directory(app.static_folder, "frequency.html")
|
return send_from_directory(app.static_folder, "frequency.html")
|
||||||
|
|
||||||
|
@app.route("/api/vector-search", methods=["POST"])
|
||||||
|
def api_vector_search():
|
||||||
|
payload = request.get_json(silent=True) or {}
|
||||||
|
query_text = (payload.get("query") or "").strip()
|
||||||
|
filters = payload.get("filters") or {}
|
||||||
|
limit = max(int(payload.get("size", 10)), 1)
|
||||||
|
offset = max(int(payload.get("offset", 0)), 0)
|
||||||
|
|
||||||
|
if not query_text:
|
||||||
|
return jsonify(
|
||||||
|
{"items": [], "totalResults": 0, "offset": offset, "error": "empty_query"}
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
query_vector = embed_query(
|
||||||
|
query_text, model_name=qdrant_embed_model, expected_dim=qdrant_vector_size
|
||||||
|
)
|
||||||
|
except Exception as exc: # pragma: no cover - runtime dependency
|
||||||
|
LOGGER.error("Embedding failed: %s", exc, exc_info=config.elastic.debug)
|
||||||
|
return jsonify({"error": "embedding_unavailable"}), 500
|
||||||
|
|
||||||
|
qdrant_vector_payload: Any
|
||||||
|
if qdrant_vector_name:
|
||||||
|
qdrant_vector_payload = {qdrant_vector_name: query_vector}
|
||||||
|
else:
|
||||||
|
qdrant_vector_payload = query_vector
|
||||||
|
|
||||||
|
qdrant_body: Dict[str, Any] = {
|
||||||
|
"vector": qdrant_vector_payload,
|
||||||
|
"limit": limit,
|
||||||
|
"offset": offset,
|
||||||
|
"with_payload": True,
|
||||||
|
"with_vectors": False,
|
||||||
|
}
|
||||||
|
if filters:
|
||||||
|
qdrant_body["filter"] = filters
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
f"{qdrant_url}/collections/{qdrant_collection}/points/search",
|
||||||
|
json=qdrant_body,
|
||||||
|
timeout=20,
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
except Exception as exc:
|
||||||
|
LOGGER.error("Vector search failed: %s", exc, exc_info=config.elastic.debug)
|
||||||
|
return jsonify({"error": "vector_search_unavailable"}), 502
|
||||||
|
|
||||||
|
points = data.get("result", []) if isinstance(data, dict) else []
|
||||||
|
items: List[Dict[str, Any]] = []
|
||||||
|
missing_channel_ids: Set[str] = set()
|
||||||
|
for point in points:
|
||||||
|
payload = point.get("payload", {}) or {}
|
||||||
|
raw_highlights = payload.get("highlights") or []
|
||||||
|
highlight_entries: List[Dict[str, str]] = []
|
||||||
|
for entry in raw_highlights:
|
||||||
|
if isinstance(entry, dict):
|
||||||
|
html_value = entry.get("html") or entry.get("text")
|
||||||
|
else:
|
||||||
|
html_value = str(entry)
|
||||||
|
if not html_value:
|
||||||
|
continue
|
||||||
|
highlight_entries.append({"html": html_value, "source": "primary"})
|
||||||
|
|
||||||
|
channel_label = (
|
||||||
|
payload.get("channel_name")
|
||||||
|
or payload.get("channel_title")
|
||||||
|
or payload.get("channel_id")
|
||||||
|
)
|
||||||
|
items.append(
|
||||||
|
{
|
||||||
|
"video_id": payload.get("video_id"),
|
||||||
|
"channel_id": payload.get("channel_id"),
|
||||||
|
"channel_name": channel_label,
|
||||||
|
"title": payload.get("title"),
|
||||||
|
"titleHtml": payload.get("title"),
|
||||||
|
"description": payload.get("description"),
|
||||||
|
"descriptionHtml": payload.get("description"),
|
||||||
|
"date": payload.get("date"),
|
||||||
|
"url": payload.get("url"),
|
||||||
|
"chunkText": payload.get("text")
|
||||||
|
or payload.get("chunk_text")
|
||||||
|
or payload.get("chunk")
|
||||||
|
or payload.get("content"),
|
||||||
|
"chunkTimestamp": payload.get("timestamp")
|
||||||
|
or payload.get("start_seconds")
|
||||||
|
or payload.get("start"),
|
||||||
|
"toHighlight": highlight_entries,
|
||||||
|
"highlightSource": {
|
||||||
|
"primary": bool(highlight_entries),
|
||||||
|
"secondary": False,
|
||||||
|
},
|
||||||
|
"distance": point.get("score"),
|
||||||
|
"internal_references_count": payload.get("internal_references_count", 0),
|
||||||
|
"internal_references": payload.get("internal_references", []),
|
||||||
|
"referenced_by_count": payload.get("referenced_by_count", 0),
|
||||||
|
"referenced_by": payload.get("referenced_by", []),
|
||||||
|
"video_status": payload.get("video_status"),
|
||||||
|
"duration": payload.get("duration"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
if (not channel_label) and payload.get("channel_id"):
|
||||||
|
missing_channel_ids.add(str(payload.get("channel_id")))
|
||||||
|
|
||||||
|
if missing_channel_ids:
|
||||||
|
try:
|
||||||
|
es_lookup = client.search(
|
||||||
|
index=index,
|
||||||
|
body={
|
||||||
|
"size": len(missing_channel_ids) * 2,
|
||||||
|
"_source": ["channel_id", "channel_name"],
|
||||||
|
"query": {"terms": {"channel_id.keyword": list(missing_channel_ids)}},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
hits = es_lookup.get("hits", {}).get("hits", [])
|
||||||
|
channel_lookup = {}
|
||||||
|
for hit in hits:
|
||||||
|
src = hit.get("_source", {}) or {}
|
||||||
|
cid = src.get("channel_id")
|
||||||
|
cname = src.get("channel_name")
|
||||||
|
if cid and cname and cid not in channel_lookup:
|
||||||
|
channel_lookup[cid] = cname
|
||||||
|
for item in items:
|
||||||
|
if not item.get("channel_name"):
|
||||||
|
cid = item.get("channel_id")
|
||||||
|
if cid and cid in channel_lookup:
|
||||||
|
item["channel_name"] = channel_lookup[cid]
|
||||||
|
except Exception as exc:
|
||||||
|
LOGGER.debug("Vector channel lookup failed: %s", exc)
|
||||||
|
|
||||||
|
return jsonify(
|
||||||
|
{
|
||||||
|
"items": items,
|
||||||
|
"totalResults": len(items),
|
||||||
|
"offset": offset,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
@app.route("/api/transcript")
|
@app.route("/api/transcript")
|
||||||
def transcript():
|
def transcript():
|
||||||
video_id = request.args.get("video_id", type=str)
|
video_id = request.args.get("video_id", type=str)
|
||||||
|
|||||||
691
static/app.js
691
static/app.js
@ -32,9 +32,7 @@
|
|||||||
|
|
||||||
let qs = new URLSearchParams(window.location.search);
|
let qs = new URLSearchParams(window.location.search);
|
||||||
const qInput = document.getElementById("q");
|
const qInput = document.getElementById("q");
|
||||||
const channelDropdown = document.getElementById("channelDropdown");
|
const channelSelect = document.getElementById("channel");
|
||||||
const channelSummary = document.getElementById("channelSummary");
|
|
||||||
const channelOptions = document.getElementById("channelOptions");
|
|
||||||
const yearSel = document.getElementById("year");
|
const yearSel = document.getElementById("year");
|
||||||
const sortSel = document.getElementById("sort");
|
const sortSel = document.getElementById("sort");
|
||||||
const sizeSel = document.getElementById("size");
|
const sizeSel = document.getElementById("size");
|
||||||
@ -43,6 +41,9 @@
|
|||||||
const phraseToggle = document.getElementById("phraseToggle");
|
const phraseToggle = document.getElementById("phraseToggle");
|
||||||
const queryToggle = document.getElementById("queryStringToggle");
|
const queryToggle = document.getElementById("queryStringToggle");
|
||||||
const searchBtn = document.getElementById("searchBtn");
|
const searchBtn = document.getElementById("searchBtn");
|
||||||
|
const aboutBtn = document.getElementById("aboutBtn");
|
||||||
|
const aboutPanel = document.getElementById("aboutPanel");
|
||||||
|
const aboutCloseBtn = document.getElementById("aboutCloseBtn");
|
||||||
const resultsDiv = document.getElementById("results");
|
const resultsDiv = document.getElementById("results");
|
||||||
const metaDiv = document.getElementById("meta");
|
const metaDiv = document.getElementById("meta");
|
||||||
const metricsContainer = document.getElementById("metrics");
|
const metricsContainer = document.getElementById("metrics");
|
||||||
@ -50,17 +51,27 @@
|
|||||||
const metricsContent = document.getElementById("metricsContent");
|
const metricsContent = document.getElementById("metricsContent");
|
||||||
const freqSummary = document.getElementById("frequencySummary");
|
const freqSummary = document.getElementById("frequencySummary");
|
||||||
const freqChart = document.getElementById("frequencyChart");
|
const freqChart = document.getElementById("frequencyChart");
|
||||||
|
const graphOverlay = document.getElementById("graphModalOverlay");
|
||||||
|
const graphModalClose = document.getElementById("graphModalClose");
|
||||||
const channelMap = new Map();
|
const channelMap = new Map();
|
||||||
const selectedChannels = new Set();
|
const transcriptCache = new Map();
|
||||||
let pendingChannelSelection = [];
|
let lastFocusBeforeModal = null;
|
||||||
|
let pendingChannelSelection = "";
|
||||||
let channelsReady = false;
|
let channelsReady = false;
|
||||||
let suppressChannelChange = false;
|
|
||||||
let allChannelsCheckbox = null;
|
|
||||||
let previousToggleState = { exact: true, fuzzy: true, phrase: true };
|
let previousToggleState = { exact: true, fuzzy: true, phrase: true };
|
||||||
let currentPage =
|
let currentPage =
|
||||||
parseInt(qs.get("page") || "0", 10) ||
|
parseInt(qs.get("page") || "0", 10) ||
|
||||||
0;
|
0;
|
||||||
|
|
||||||
|
function toggleAboutPanel(show) {
|
||||||
|
if (!aboutPanel) return;
|
||||||
|
if (show) {
|
||||||
|
aboutPanel.removeAttribute("hidden");
|
||||||
|
} else {
|
||||||
|
aboutPanel.setAttribute("hidden", "hidden");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function parseBoolParam(name, defaultValue) {
|
function parseBoolParam(name, defaultValue) {
|
||||||
const raw = qs.get(name);
|
const raw = qs.get(name);
|
||||||
if (raw === null) return defaultValue;
|
if (raw === null) return defaultValue;
|
||||||
@ -68,9 +79,8 @@
|
|||||||
return !["0", "false", "no"].includes(lowered);
|
return !["0", "false", "no"].includes(lowered);
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseChannelParams(params) {
|
function parseChannelParam(params) {
|
||||||
const collected = [];
|
if (!params) return "";
|
||||||
if (!params) return collected;
|
|
||||||
const seen = new Set();
|
const seen = new Set();
|
||||||
const rawValues = params.getAll("channel_id");
|
const rawValues = params.getAll("channel_id");
|
||||||
const legacy = params.get("channel");
|
const legacy = params.get("channel");
|
||||||
@ -84,61 +94,17 @@
|
|||||||
.forEach((part) => {
|
.forEach((part) => {
|
||||||
if (!seen.has(part)) {
|
if (!seen.has(part)) {
|
||||||
seen.add(part);
|
seen.add(part);
|
||||||
collected.push(part);
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
return collected;
|
const first = Array.from(seen)[0];
|
||||||
|
return first || "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function getSelectedChannels() {
|
function getSelectedChannels() {
|
||||||
return Array.from(selectedChannels);
|
if (!channelSelect) return [];
|
||||||
}
|
const value = channelSelect.value;
|
||||||
|
return value ? [value] : [];
|
||||||
function ensureAllCheckboxState() {
|
|
||||||
if (allChannelsCheckbox) {
|
|
||||||
allChannelsCheckbox.checked = selectedChannels.size === 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function updateChannelSummary() {
|
|
||||||
if (!channelSummary) return;
|
|
||||||
if (!selectedChannels.size) {
|
|
||||||
channelSummary.textContent = "All Channels";
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const names = Array.from(selectedChannels).map(
|
|
||||||
(id) => channelMap.get(id) || id
|
|
||||||
);
|
|
||||||
if (names.length > 1) {
|
|
||||||
names.sort((a, b) => a.localeCompare(b, undefined, { sensitivity: "base" }));
|
|
||||||
}
|
|
||||||
let label = names.slice(0, 3).join(", ");
|
|
||||||
if (names.length > 3) {
|
|
||||||
label += ` +${names.length - 3} more`;
|
|
||||||
}
|
|
||||||
channelSummary.textContent = label;
|
|
||||||
}
|
|
||||||
|
|
||||||
function applyChannelSelection(ids, { silent = false } = {}) {
|
|
||||||
selectedChannels.clear();
|
|
||||||
ids.forEach((id) => selectedChannels.add(id));
|
|
||||||
pendingChannelSelection = getSelectedChannels();
|
|
||||||
ensureAllCheckboxState();
|
|
||||||
if (channelOptions) {
|
|
||||||
suppressChannelChange = true;
|
|
||||||
const checkboxes = channelOptions.querySelectorAll(
|
|
||||||
'input[type="checkbox"][data-channel="1"]'
|
|
||||||
);
|
|
||||||
checkboxes.forEach((checkbox) => {
|
|
||||||
checkbox.checked = selectedChannels.has(checkbox.value);
|
|
||||||
});
|
|
||||||
suppressChannelChange = false;
|
|
||||||
}
|
|
||||||
updateChannelSummary();
|
|
||||||
if (!silent && channelsReady) {
|
|
||||||
runSearch(0);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function loadYears() {
|
async function loadYears() {
|
||||||
@ -166,8 +132,10 @@
|
|||||||
yearSel.value = qs.get("year") || "";
|
yearSel.value = qs.get("year") || "";
|
||||||
sortSel.value = qs.get("sort") || "relevant";
|
sortSel.value = qs.get("sort") || "relevant";
|
||||||
sizeSel.value = qs.get("size") || "10";
|
sizeSel.value = qs.get("size") || "10";
|
||||||
pendingChannelSelection = parseChannelParams(qs);
|
pendingChannelSelection = parseChannelParam(qs);
|
||||||
applyChannelSelection(pendingChannelSelection, { silent: true });
|
if (channelSelect) {
|
||||||
|
channelSelect.value = pendingChannelSelection || "";
|
||||||
|
}
|
||||||
exactToggle.checked = parseBoolParam("exact", true);
|
exactToggle.checked = parseBoolParam("exact", true);
|
||||||
fuzzyToggle.checked = parseBoolParam("fuzzy", true);
|
fuzzyToggle.checked = parseBoolParam("fuzzy", true);
|
||||||
phraseToggle.checked = parseBoolParam("phrase", true);
|
phraseToggle.checked = parseBoolParam("phrase", true);
|
||||||
@ -212,6 +180,76 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function graphUiAvailable() {
|
||||||
|
return !!(window.GraphUI && window.GraphUI.ready);
|
||||||
|
}
|
||||||
|
|
||||||
|
function openGraphModal(videoId) {
|
||||||
|
if (!graphOverlay || !graphUiAvailable()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
lastFocusBeforeModal =
|
||||||
|
document.activeElement instanceof HTMLElement ? document.activeElement : null;
|
||||||
|
graphOverlay.classList.add("active");
|
||||||
|
graphOverlay.setAttribute("aria-hidden", "false");
|
||||||
|
document.body.classList.add("modal-open");
|
||||||
|
|
||||||
|
window.requestAnimationFrame(() => {
|
||||||
|
window.GraphUI.setDepth(1);
|
||||||
|
window.GraphUI.setMaxNodes(200);
|
||||||
|
window.GraphUI.setLabelSize("tiny");
|
||||||
|
const graphVideoField = document.getElementById("graphVideoId");
|
||||||
|
if (videoId && graphVideoField) {
|
||||||
|
graphVideoField.value = videoId;
|
||||||
|
}
|
||||||
|
if (videoId) {
|
||||||
|
window.GraphUI.load(videoId, undefined, undefined, { updateInputs: true });
|
||||||
|
}
|
||||||
|
window.GraphUI.focusInput();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function closeGraphModal() {
|
||||||
|
if (!graphOverlay) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
graphOverlay.classList.remove("active");
|
||||||
|
graphOverlay.setAttribute("aria-hidden", "true");
|
||||||
|
document.body.classList.remove("modal-open");
|
||||||
|
if (graphUiAvailable()) {
|
||||||
|
window.GraphUI.stop();
|
||||||
|
}
|
||||||
|
if (lastFocusBeforeModal && typeof lastFocusBeforeModal.focus === "function") {
|
||||||
|
lastFocusBeforeModal.focus();
|
||||||
|
}
|
||||||
|
lastFocusBeforeModal = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (graphModalClose) {
|
||||||
|
graphModalClose.addEventListener("click", closeGraphModal);
|
||||||
|
}
|
||||||
|
if (graphOverlay) {
|
||||||
|
graphOverlay.addEventListener("click", (event) => {
|
||||||
|
if (event.target === graphOverlay) {
|
||||||
|
closeGraphModal();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
document.addEventListener("keydown", (event) => {
|
||||||
|
if (event.key === "Escape" && graphOverlay && graphOverlay.classList.contains("active")) {
|
||||||
|
closeGraphModal();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
window.addEventListener("graph-ui-ready", () => {
|
||||||
|
document
|
||||||
|
.querySelectorAll('.graph-launch-btn[data-await-graph-ready="1"]')
|
||||||
|
.forEach((btn) => {
|
||||||
|
btn.removeAttribute("disabled");
|
||||||
|
btn.removeAttribute("data-await-graph-ready");
|
||||||
|
btn.title = "Open reference graph";
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
function ensureQueryStringMode() {
|
function ensureQueryStringMode() {
|
||||||
if (!queryToggle) return;
|
if (!queryToggle) return;
|
||||||
if (!queryToggle.checked) {
|
if (!queryToggle.checked) {
|
||||||
@ -242,60 +280,8 @@
|
|||||||
return `${field}:(${escaped.join(" OR ")})`;
|
return `${field}:(${escaped.join(" OR ")})`;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (channelOptions) {
|
|
||||||
channelOptions.addEventListener("change", (event) => {
|
|
||||||
const target = event.target;
|
|
||||||
if (!(target instanceof HTMLInputElement) || target.type !== "checkbox") {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (suppressChannelChange) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (target.dataset.all === "1") {
|
|
||||||
if (!target.checked && !selectedChannels.size) {
|
|
||||||
suppressChannelChange = true;
|
|
||||||
target.checked = true;
|
|
||||||
suppressChannelChange = false;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (target.checked) {
|
|
||||||
selectedChannels.clear();
|
|
||||||
pendingChannelSelection = [];
|
|
||||||
suppressChannelChange = true;
|
|
||||||
const others = channelOptions.querySelectorAll(
|
|
||||||
'input[type="checkbox"][data-channel="1"]'
|
|
||||||
);
|
|
||||||
others.forEach((checkbox) => {
|
|
||||||
checkbox.checked = false;
|
|
||||||
});
|
|
||||||
suppressChannelChange = false;
|
|
||||||
ensureAllCheckboxState();
|
|
||||||
updateChannelSummary();
|
|
||||||
if (channelsReady) {
|
|
||||||
runSearch(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const id = target.value;
|
|
||||||
if (!id) return;
|
|
||||||
if (target.checked) {
|
|
||||||
selectedChannels.add(id);
|
|
||||||
} else {
|
|
||||||
selectedChannels.delete(id);
|
|
||||||
}
|
|
||||||
pendingChannelSelection = getSelectedChannels();
|
|
||||||
ensureAllCheckboxState();
|
|
||||||
updateChannelSummary();
|
|
||||||
if (channelsReady) {
|
|
||||||
runSearch(0);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
async function loadChannels() {
|
async function loadChannels() {
|
||||||
if (!channelOptions) {
|
if (!channelSelect) {
|
||||||
channelsReady = true;
|
channelsReady = true;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -303,57 +289,27 @@
|
|||||||
const res = await fetch("/api/channels");
|
const res = await fetch("/api/channels");
|
||||||
const data = await res.json();
|
const data = await res.json();
|
||||||
channelMap.clear();
|
channelMap.clear();
|
||||||
channelOptions.innerHTML = "";
|
channelSelect.innerHTML = '<option value="">All Channels</option>';
|
||||||
|
|
||||||
const listFragment = document.createDocumentFragment();
|
|
||||||
|
|
||||||
const allLabel = document.createElement("label");
|
|
||||||
allLabel.className = "channel-option";
|
|
||||||
allChannelsCheckbox = document.createElement("input");
|
|
||||||
allChannelsCheckbox.type = "checkbox";
|
|
||||||
allChannelsCheckbox.dataset.all = "1";
|
|
||||||
allChannelsCheckbox.checked = selectedChannels.size === 0;
|
|
||||||
const allText = document.createElement("span");
|
|
||||||
allText.textContent = "All Channels";
|
|
||||||
allLabel.appendChild(allChannelsCheckbox);
|
|
||||||
allLabel.appendChild(allText);
|
|
||||||
listFragment.appendChild(allLabel);
|
|
||||||
|
|
||||||
data.forEach((item) => {
|
data.forEach((item) => {
|
||||||
const label = document.createElement("label");
|
const option = document.createElement("option");
|
||||||
label.className = "channel-option";
|
option.value = item.Id;
|
||||||
const checkbox = document.createElement("input");
|
option.textContent = `${item.Name} (${item.Count})`;
|
||||||
checkbox.type = "checkbox";
|
channelSelect.appendChild(option);
|
||||||
checkbox.value = item.Id;
|
|
||||||
checkbox.dataset.channel = "1";
|
|
||||||
const text = document.createElement("span");
|
|
||||||
text.textContent = `${item.Name} (${item.Count})`;
|
|
||||||
label.appendChild(checkbox);
|
|
||||||
label.appendChild(text);
|
|
||||||
listFragment.appendChild(label);
|
|
||||||
channelMap.set(item.Id, item.Name);
|
channelMap.set(item.Id, item.Name);
|
||||||
});
|
});
|
||||||
|
|
||||||
channelOptions.appendChild(listFragment);
|
if (pendingChannelSelection && channelMap.has(pendingChannelSelection)) {
|
||||||
|
channelSelect.value = pendingChannelSelection;
|
||||||
if (!data.length) {
|
} else {
|
||||||
const empty = document.createElement("div");
|
channelSelect.value = "";
|
||||||
empty.textContent = "No channels available.";
|
|
||||||
channelOptions.appendChild(empty);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const initialSelection = pendingChannelSelection.length
|
|
||||||
? pendingChannelSelection
|
|
||||||
: Array.from(selectedChannels);
|
|
||||||
applyChannelSelection(initialSelection, { silent: true });
|
|
||||||
channelsReady = true;
|
channelsReady = true;
|
||||||
updateChannelSummary();
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error("Failed to load channels", err);
|
console.error("Failed to load channels", err);
|
||||||
channelOptions.innerHTML = "<div>Failed to load channels.</div>";
|
channelSelect.innerHTML = '<option value="">All Channels</option>';
|
||||||
channelsReady = true;
|
channelsReady = true;
|
||||||
ensureAllCheckboxState();
|
|
||||||
updateChannelSummary();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -391,6 +347,188 @@
|
|||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function getTranscriptData(videoId) {
|
||||||
|
if (!videoId) return null;
|
||||||
|
if (transcriptCache.has(videoId)) {
|
||||||
|
return transcriptCache.get(videoId);
|
||||||
|
}
|
||||||
|
const res = await fetch(`/api/transcript?video_id=${encodeURIComponent(videoId)}`);
|
||||||
|
if (!res.ok) {
|
||||||
|
throw new Error(`Transcript fetch failed (${res.status})`);
|
||||||
|
}
|
||||||
|
const data = await res.json();
|
||||||
|
transcriptCache.set(videoId, data);
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatMlaDate(value) {
|
||||||
|
if (!value) return "";
|
||||||
|
const parsed = new Date(value);
|
||||||
|
if (Number.isNaN(parsed.valueOf())) {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
const months = [
|
||||||
|
"Jan.", "Feb.", "Mar.", "Apr.", "May", "June",
|
||||||
|
"July", "Aug.", "Sept.", "Oct.", "Nov.", "Dec.",
|
||||||
|
];
|
||||||
|
return `${parsed.getDate()} ${months[parsed.getMonth()]} ${parsed.getFullYear()}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildMlaCitation(item) {
|
||||||
|
const channel = (item.channel_name || item.channel_id || "Unknown channel").trim();
|
||||||
|
const title = (item.title || "Untitled").trim();
|
||||||
|
const url = item.url || "";
|
||||||
|
const publishDate = formatMlaDate(item.date) || "n.d.";
|
||||||
|
const today = formatMlaDate(new Date().toISOString().split("T")[0]);
|
||||||
|
return `${channel}. "${title}." YouTube, uploaded by ${channel}, ${publishDate}, ${url}. Accessed ${today}.`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatSegmentTimestamp(segment) {
|
||||||
|
if (!segment) return "";
|
||||||
|
if (segment.timestamp) return segment.timestamp;
|
||||||
|
const candidates = [
|
||||||
|
segment.start_seconds,
|
||||||
|
segment.start,
|
||||||
|
segment.offset,
|
||||||
|
segment.time,
|
||||||
|
];
|
||||||
|
for (const value of candidates) {
|
||||||
|
if (value == null) continue;
|
||||||
|
const seconds = parseFloat(value);
|
||||||
|
if (!Number.isNaN(seconds)) {
|
||||||
|
return formatTimestamp(seconds);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
function serializeTranscriptSection(label, parts, fullText) {
|
||||||
|
let content = "";
|
||||||
|
if (typeof fullText === "string" && fullText.trim()) {
|
||||||
|
content = fullText.trim();
|
||||||
|
} else if (Array.isArray(parts) && parts.length) {
|
||||||
|
content = parts
|
||||||
|
.map((segment) => {
|
||||||
|
const ts = formatSegmentTimestamp(segment);
|
||||||
|
const text = segment && segment.text ? segment.text : "";
|
||||||
|
return ts ? `[${ts}] ${text}` : text;
|
||||||
|
})
|
||||||
|
.join("\n")
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
if (!content) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
return `${label}\n${content}\n`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildTranscriptDownloadText(item, transcriptData) {
|
||||||
|
const lines = [];
|
||||||
|
lines.push(`Title: ${item.title || "Untitled"}`);
|
||||||
|
if (item.channel_name) {
|
||||||
|
lines.push(`Channel: ${item.channel_name}`);
|
||||||
|
}
|
||||||
|
if (item.date) {
|
||||||
|
lines.push(`Published: ${item.date}`);
|
||||||
|
}
|
||||||
|
if (item.url) {
|
||||||
|
lines.push(`URL: ${item.url}`);
|
||||||
|
}
|
||||||
|
lines.push("");
|
||||||
|
|
||||||
|
const primaryText = serializeTranscriptSection(
|
||||||
|
"Primary Transcript",
|
||||||
|
transcriptData.transcript_parts,
|
||||||
|
transcriptData.transcript_full
|
||||||
|
);
|
||||||
|
const secondaryText = serializeTranscriptSection(
|
||||||
|
"Secondary Transcript",
|
||||||
|
transcriptData.transcript_secondary_parts,
|
||||||
|
transcriptData.transcript_secondary_full
|
||||||
|
);
|
||||||
|
|
||||||
|
if (primaryText) {
|
||||||
|
lines.push(primaryText);
|
||||||
|
}
|
||||||
|
if (secondaryText) {
|
||||||
|
lines.push(secondaryText);
|
||||||
|
}
|
||||||
|
if (!primaryText && !secondaryText) {
|
||||||
|
lines.push("No transcript available.");
|
||||||
|
}
|
||||||
|
return lines.join("\n").trim() + "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
function flashButtonMessage(button, message, duration = 1800) {
|
||||||
|
if (!button) return;
|
||||||
|
const original = button.dataset.originalLabel || button.textContent;
|
||||||
|
button.dataset.originalLabel = original;
|
||||||
|
button.textContent = message;
|
||||||
|
setTimeout(() => {
|
||||||
|
button.textContent = button.dataset.originalLabel || original;
|
||||||
|
}, duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function handleTranscriptDownload(item, button) {
|
||||||
|
if (!item.video_id) return;
|
||||||
|
button.disabled = true;
|
||||||
|
try {
|
||||||
|
const data = await getTranscriptData(item.video_id);
|
||||||
|
if (!data) {
|
||||||
|
throw new Error("Transcript unavailable");
|
||||||
|
}
|
||||||
|
const text = buildTranscriptDownloadText(item, data);
|
||||||
|
const blob = new Blob([text], { type: "text/plain" });
|
||||||
|
const url = URL.createObjectURL(blob);
|
||||||
|
const link = document.createElement("a");
|
||||||
|
link.href = url;
|
||||||
|
link.download = `${item.video_id || "transcript"}.txt`;
|
||||||
|
document.body.appendChild(link);
|
||||||
|
link.click();
|
||||||
|
document.body.removeChild(link);
|
||||||
|
URL.revokeObjectURL(url);
|
||||||
|
flashButtonMessage(button, "Downloaded");
|
||||||
|
} catch (err) {
|
||||||
|
console.error("Download failed", err);
|
||||||
|
console.error("Download failed", err);
|
||||||
|
alert("Unable to download transcript right now.");
|
||||||
|
} finally {
|
||||||
|
button.disabled = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function handleCopyCitation(item, button) {
|
||||||
|
const citation = buildMlaCitation(item);
|
||||||
|
try {
|
||||||
|
if (navigator.clipboard && window.isSecureContext) {
|
||||||
|
await navigator.clipboard.writeText(citation);
|
||||||
|
} else {
|
||||||
|
const textarea = document.createElement("textarea");
|
||||||
|
textarea.value = citation;
|
||||||
|
textarea.style.position = "fixed";
|
||||||
|
textarea.style.opacity = "0";
|
||||||
|
document.body.appendChild(textarea);
|
||||||
|
textarea.focus();
|
||||||
|
textarea.select();
|
||||||
|
document.execCommand("copy");
|
||||||
|
document.body.removeChild(textarea);
|
||||||
|
}
|
||||||
|
flashButtonMessage(button, "Copied!");
|
||||||
|
} catch (err) {
|
||||||
|
console.error("Citation copy failed", err);
|
||||||
|
alert(citation);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function getVideoStatus(item) {
|
||||||
|
if (!item || !item.video_status) return "";
|
||||||
|
return String(item.video_status).toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
function isLikelyDeleted(item) {
|
||||||
|
return getVideoStatus(item) === "deleted";
|
||||||
|
}
|
||||||
|
|
||||||
function formatTimestamp(seconds) {
|
function formatTimestamp(seconds) {
|
||||||
if (!seconds && seconds !== 0) return "00:00";
|
if (!seconds && seconds !== 0) return "00:00";
|
||||||
const hours = Math.floor(seconds / 3600);
|
const hours = Math.floor(seconds / 3600);
|
||||||
@ -621,7 +759,65 @@
|
|||||||
}, 3000);
|
}, 3000);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchAndDisplayTranscript(videoId, videoUrl, containerElement, button, highlightText = null) {
|
const COMMON_STOP_WORDS = new Set([
|
||||||
|
"the","and","that","this","with","for","are","but","not","you","your","they","their",
|
||||||
|
"have","from","was","been","has","had","were","about","what","when","where","which",
|
||||||
|
"will","would","there","here","into","them","then","than","also","more","some","just",
|
||||||
|
"like","said","because","make","made","could","should","might"
|
||||||
|
]);
|
||||||
|
|
||||||
|
const tokenizeContent = (text) => {
|
||||||
|
if (!text) return [];
|
||||||
|
return text
|
||||||
|
.toLowerCase()
|
||||||
|
.split(/[^a-z0-9]+/g)
|
||||||
|
.filter((token) => token.length > 2 && !COMMON_STOP_WORDS.has(token))
|
||||||
|
.slice(0, 20);
|
||||||
|
};
|
||||||
|
|
||||||
|
function collectHighlightTokens(entries) {
|
||||||
|
const collected = [];
|
||||||
|
if (!Array.isArray(entries)) return collected;
|
||||||
|
entries.forEach((entry) => {
|
||||||
|
const raw = typeof entry === "string" ? entry : entry?.html || entry?.text || "";
|
||||||
|
if (!raw) return;
|
||||||
|
const marked = extractMarkedText(raw);
|
||||||
|
if (marked) {
|
||||||
|
collected.push(...tokenizeContent(marked));
|
||||||
|
} else {
|
||||||
|
collected.push(...tokenizeContent(stripHtmlAndNormalize(raw)));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return collected;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildQueryTokens(query) {
|
||||||
|
return tokenizeContent(query || "").slice(0, 20);
|
||||||
|
}
|
||||||
|
|
||||||
|
function highlightTranscriptMatches(transcriptDiv, entries, searchQuery) {
|
||||||
|
if (!transcriptDiv) return;
|
||||||
|
const tokens = new Set();
|
||||||
|
collectHighlightTokens(entries).forEach((token) => tokens.add(token));
|
||||||
|
buildQueryTokens(searchQuery).forEach((token) => tokens.add(token));
|
||||||
|
if (!tokens.size) return;
|
||||||
|
const segments = transcriptDiv.querySelectorAll(".transcript-segment");
|
||||||
|
segments.forEach((segment) => {
|
||||||
|
const text = segment.dataset.text || "";
|
||||||
|
const matched = Array.from(tokens).some((token) => text.includes(token));
|
||||||
|
segment.classList.toggle("transcript-segment--matched", matched);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchAndDisplayTranscript(
|
||||||
|
videoId,
|
||||||
|
videoUrl,
|
||||||
|
containerElement,
|
||||||
|
button,
|
||||||
|
highlightText = null,
|
||||||
|
allHighlights = null,
|
||||||
|
searchQuery = ""
|
||||||
|
) {
|
||||||
const existingTranscript = containerElement.querySelector('.full-transcript');
|
const existingTranscript = containerElement.querySelector('.full-transcript');
|
||||||
if (existingTranscript && !highlightText) {
|
if (existingTranscript && !highlightText) {
|
||||||
existingTranscript.remove();
|
existingTranscript.remove();
|
||||||
@ -631,6 +827,7 @@
|
|||||||
|
|
||||||
// If transcript exists and we have highlight text, just scroll to it
|
// If transcript exists and we have highlight text, just scroll to it
|
||||||
if (existingTranscript && highlightText) {
|
if (existingTranscript && highlightText) {
|
||||||
|
highlightTranscriptMatches(existingTranscript, allHighlights, searchQuery);
|
||||||
const segment = findMatchingSegment(existingTranscript, highlightText);
|
const segment = findMatchingSegment(existingTranscript, highlightText);
|
||||||
if (segment) {
|
if (segment) {
|
||||||
scrollToSegment(segment);
|
scrollToSegment(segment);
|
||||||
@ -728,6 +925,7 @@
|
|||||||
}
|
}
|
||||||
}, 100);
|
}, 100);
|
||||||
}
|
}
|
||||||
|
highlightTranscriptMatches(transcriptDiv, allHighlights, searchQuery);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error('Error fetching transcript:', err);
|
console.error('Error fetching transcript:', err);
|
||||||
button.textContent = 'View Full Transcript';
|
button.textContent = 'View Full Transcript';
|
||||||
@ -797,7 +995,8 @@ function clearFrequency(message) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function renderFrequencyChart(buckets, channelTotals) {
|
|
||||||
|
function renderFrequencyChart(buckets, channelTotals) {
|
||||||
if (!freqChart || typeof d3 === "undefined") {
|
if (!freqChart || typeof d3 === "undefined") {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -807,6 +1006,26 @@ function renderFrequencyChart(buckets, channelTotals) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const channelNameFallback = new Map();
|
||||||
|
(channelTotals || []).forEach((entry) => {
|
||||||
|
if (!entry || !entry.id) return;
|
||||||
|
if (entry.name) {
|
||||||
|
channelNameFallback.set(entry.id, entry.name);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
buckets.forEach((bucket) => {
|
||||||
|
(bucket.channels || []).forEach((entry) => {
|
||||||
|
if (entry && entry.id && entry.name && !channelNameFallback.has(entry.id)) {
|
||||||
|
channelNameFallback.set(entry.id, entry.name);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
const getChannelLabel = (id) => {
|
||||||
|
if (!id) return "";
|
||||||
|
return channelMap.get(id) || channelNameFallback.get(id) || id;
|
||||||
|
};
|
||||||
|
|
||||||
let channelsOrder =
|
let channelsOrder =
|
||||||
(channelTotals && channelTotals.length
|
(channelTotals && channelTotals.length
|
||||||
? channelTotals.map((entry) => entry.id)
|
? channelTotals.map((entry) => entry.id)
|
||||||
@ -929,7 +1148,7 @@ function renderFrequencyChart(buckets, channelTotals) {
|
|||||||
.text(function (d) {
|
.text(function (d) {
|
||||||
const group = this.parentNode ? this.parentNode.parentNode : null;
|
const group = this.parentNode ? this.parentNode.parentNode : null;
|
||||||
const key = group ? d3.select(group).datum().key : undefined;
|
const key = group ? d3.select(group).datum().key : undefined;
|
||||||
const label = key ? channelMap.get(key) || key : key || '';
|
const label = key ? getChannelLabel(key) : key || '';
|
||||||
return `${dateKeyFormat(d.data.date)}: ${d[1] - d[0]}${label ? " (" + label + ")" : ''}`;
|
return `${dateKeyFormat(d.data.date)}: ${d[1] - d[0]}${label ? " (" + label + ")" : ''}`;
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -942,7 +1161,7 @@ function renderFrequencyChart(buckets, channelTotals) {
|
|||||||
swatch.className = "freq-legend-swatch";
|
swatch.className = "freq-legend-swatch";
|
||||||
swatch.style.backgroundColor = color(key);
|
swatch.style.backgroundColor = color(key);
|
||||||
const label = document.createElement("span");
|
const label = document.createElement("span");
|
||||||
label.textContent = channelMap.get(key) || key;
|
label.textContent = getChannelLabel(key) || key;
|
||||||
item.appendChild(swatch);
|
item.appendChild(swatch);
|
||||||
item.appendChild(label);
|
item.appendChild(label);
|
||||||
legend.appendChild(item);
|
legend.appendChild(item);
|
||||||
@ -1027,12 +1246,15 @@ async function updateFrequencyChart(term, channels, year, queryMode, toggles = {
|
|||||||
item.descriptionHtml || escapeHtml(item.description || "");
|
item.descriptionHtml || escapeHtml(item.description || "");
|
||||||
|
|
||||||
const header = document.createElement("div");
|
const header = document.createElement("div");
|
||||||
|
header.className = "result-header";
|
||||||
|
const headerMain = document.createElement("div");
|
||||||
|
headerMain.className = "result-header-main";
|
||||||
const badgeDefs = [];
|
const badgeDefs = [];
|
||||||
if (item.highlightSource && item.highlightSource.primary) {
|
if (item.highlightSource && item.highlightSource.primary) {
|
||||||
badgeDefs.push({ label: "primary transcript" });
|
badgeDefs.push({ label: "primary transcript", badgeType: "transcript-primary" });
|
||||||
}
|
}
|
||||||
if (item.highlightSource && item.highlightSource.secondary) {
|
if (item.highlightSource && item.highlightSource.secondary) {
|
||||||
badgeDefs.push({ label: "secondary transcript" });
|
badgeDefs.push({ label: "secondary transcript", badgeType: "transcript-secondary" });
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add reference count badges
|
// Add reference count badges
|
||||||
@ -1068,13 +1290,47 @@ async function updateFrequencyChart(term, channels, year, queryMode, toggles = {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
header.innerHTML = `
|
const titleEl = document.createElement("strong");
|
||||||
<strong>${titleHtml}</strong>
|
titleEl.innerHTML = titleHtml;
|
||||||
<div class="muted">${escapeHtml(item.channel_name || "")} • ${fmtDate(
|
headerMain.appendChild(titleEl);
|
||||||
item.date
|
|
||||||
)}</div>
|
const metaLine = document.createElement("div");
|
||||||
<div class="muted"><a href="${item.url}" target="_blank" rel="noopener">Open on YouTube</a></div>
|
metaLine.className = "muted result-meta";
|
||||||
`;
|
const channelLabel = item.channel_name || "";
|
||||||
|
const dateLabel = fmtDate(item.date);
|
||||||
|
let durationSeconds = null;
|
||||||
|
if (typeof item.duration === "number") {
|
||||||
|
durationSeconds = item.duration;
|
||||||
|
} else if (typeof item.duration === "string" && item.duration.trim()) {
|
||||||
|
const parsed = parseFloat(item.duration);
|
||||||
|
if (!Number.isNaN(parsed)) {
|
||||||
|
durationSeconds = parsed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const durationLabel = durationSeconds != null ? ` • ${formatTimestamp(durationSeconds)}` : "";
|
||||||
|
metaLine.textContent = channelLabel
|
||||||
|
? `${channelLabel} • ${dateLabel}${durationLabel}`
|
||||||
|
: `${dateLabel}${durationLabel}`;
|
||||||
|
if (isLikelyDeleted(item)) {
|
||||||
|
metaLine.appendChild(document.createTextNode(" "));
|
||||||
|
const statusEl = document.createElement("span");
|
||||||
|
statusEl.className = "result-status result-status--deleted";
|
||||||
|
statusEl.textContent = "Likely deleted";
|
||||||
|
statusEl.title = "YouTube reported this video as unavailable when we last checked.";
|
||||||
|
metaLine.appendChild(statusEl);
|
||||||
|
}
|
||||||
|
headerMain.appendChild(metaLine);
|
||||||
|
|
||||||
|
const linkLine = document.createElement("div");
|
||||||
|
linkLine.className = "muted";
|
||||||
|
const openLink = document.createElement("a");
|
||||||
|
openLink.href = item.url;
|
||||||
|
openLink.target = "_blank";
|
||||||
|
openLink.rel = "noopener";
|
||||||
|
openLink.textContent = "Open on YouTube";
|
||||||
|
linkLine.appendChild(openLink);
|
||||||
|
headerMain.appendChild(linkLine);
|
||||||
|
header.appendChild(headerMain);
|
||||||
if (badgeDefs.length) {
|
if (badgeDefs.length) {
|
||||||
const badgeRow = document.createElement("div");
|
const badgeRow = document.createElement("div");
|
||||||
badgeRow.className = "badge-row";
|
badgeRow.className = "badge-row";
|
||||||
@ -1086,6 +1342,9 @@ async function updateFrequencyChart(term, channels, year, queryMode, toggles = {
|
|||||||
if (badge.title) {
|
if (badge.title) {
|
||||||
badgeEl.title = badge.title;
|
badgeEl.title = badge.title;
|
||||||
}
|
}
|
||||||
|
if (badge.badgeType) {
|
||||||
|
badgeEl.classList.add(`badge--${badge.badgeType}`);
|
||||||
|
}
|
||||||
if (badge.query) {
|
if (badge.query) {
|
||||||
badgeEl.classList.add("badge-clickable");
|
badgeEl.classList.add("badge-clickable");
|
||||||
badgeEl.setAttribute("role", "button");
|
badgeEl.setAttribute("role", "button");
|
||||||
@ -1110,7 +1369,45 @@ async function updateFrequencyChart(term, channels, year, queryMode, toggles = {
|
|||||||
badgeRow.appendChild(badgeEl);
|
badgeRow.appendChild(badgeEl);
|
||||||
});
|
});
|
||||||
if (badgeRow.childElementCount) {
|
if (badgeRow.childElementCount) {
|
||||||
header.appendChild(badgeRow);
|
headerMain.appendChild(badgeRow);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (item.video_id) {
|
||||||
|
const actions = document.createElement("div");
|
||||||
|
actions.className = "result-actions";
|
||||||
|
|
||||||
|
const downloadBtn = document.createElement("button");
|
||||||
|
downloadBtn.type = "button";
|
||||||
|
downloadBtn.className = "result-action-btn";
|
||||||
|
downloadBtn.textContent = "Download transcript";
|
||||||
|
downloadBtn.addEventListener("click", () => handleTranscriptDownload(item, downloadBtn));
|
||||||
|
actions.appendChild(downloadBtn);
|
||||||
|
|
||||||
|
const citationBtn = document.createElement("button");
|
||||||
|
citationBtn.type = "button";
|
||||||
|
citationBtn.className = "result-action-btn";
|
||||||
|
citationBtn.textContent = "Copy citation";
|
||||||
|
citationBtn.addEventListener("click", () => handleCopyCitation(item, citationBtn));
|
||||||
|
actions.appendChild(citationBtn);
|
||||||
|
|
||||||
|
if (graphOverlay) {
|
||||||
|
const graphBtn = document.createElement("button");
|
||||||
|
graphBtn.type = "button";
|
||||||
|
graphBtn.className = "result-action-btn graph-launch-btn";
|
||||||
|
graphBtn.textContent = "Graph";
|
||||||
|
if (graphUiAvailable()) {
|
||||||
|
graphBtn.title = "Open reference graph";
|
||||||
|
} else {
|
||||||
|
graphBtn.disabled = true;
|
||||||
|
graphBtn.title = "Reference graph is still loading…";
|
||||||
|
graphBtn.dataset.awaitGraphReady = "1";
|
||||||
|
}
|
||||||
|
graphBtn.addEventListener("click", () => openGraphModal(item.video_id));
|
||||||
|
actions.appendChild(graphBtn);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (actions.childElementCount) {
|
||||||
|
header.appendChild(actions);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
el.appendChild(header);
|
el.appendChild(header);
|
||||||
@ -1128,9 +1425,25 @@ async function updateFrequencyChart(term, channels, year, queryMode, toggles = {
|
|||||||
item.toHighlight.forEach((entry) => {
|
item.toHighlight.forEach((entry) => {
|
||||||
const html = typeof entry === "string" ? entry : entry?.html;
|
const html = typeof entry === "string" ? entry : entry?.html;
|
||||||
if (!html) return;
|
if (!html) return;
|
||||||
|
const source = entry && typeof entry === "object" ? entry.source : null;
|
||||||
const row = document.createElement("div");
|
const row = document.createElement("div");
|
||||||
row.className = "highlight-row";
|
row.className = "highlight-row";
|
||||||
row.innerHTML = html;
|
if (source === "primary") {
|
||||||
|
row.classList.add("highlight-row--primary");
|
||||||
|
} else if (source === "secondary") {
|
||||||
|
row.classList.add("highlight-row--secondary");
|
||||||
|
}
|
||||||
|
const textBlock = document.createElement("div");
|
||||||
|
textBlock.className = "highlight-text";
|
||||||
|
textBlock.innerHTML = html;
|
||||||
|
row.appendChild(textBlock);
|
||||||
|
if (source) {
|
||||||
|
const indicator = document.createElement("span");
|
||||||
|
indicator.className = `highlight-source-indicator highlight-source-indicator--${source}`;
|
||||||
|
indicator.title =
|
||||||
|
source === "primary" ? "Highlight from primary transcript" : "Highlight from secondary transcript";
|
||||||
|
row.appendChild(indicator);
|
||||||
|
}
|
||||||
row.title = "Click to jump to this location in the transcript";
|
row.title = "Click to jump to this location in the transcript";
|
||||||
|
|
||||||
// Make highlight clickable
|
// Make highlight clickable
|
||||||
@ -1138,7 +1451,15 @@ async function updateFrequencyChart(term, channels, year, queryMode, toggles = {
|
|||||||
const transcriptBtn = el.querySelector(".transcript-toggle");
|
const transcriptBtn = el.querySelector(".transcript-toggle");
|
||||||
if (transcriptBtn && item.video_id) {
|
if (transcriptBtn && item.video_id) {
|
||||||
const highlightText = stripHtmlAndNormalize(html);
|
const highlightText = stripHtmlAndNormalize(html);
|
||||||
fetchAndDisplayTranscript(item.video_id, item.url, el, transcriptBtn, highlightText);
|
fetchAndDisplayTranscript(
|
||||||
|
item.video_id,
|
||||||
|
item.url,
|
||||||
|
el,
|
||||||
|
transcriptBtn,
|
||||||
|
highlightText,
|
||||||
|
item.toHighlight,
|
||||||
|
qInput.value
|
||||||
|
);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1154,7 +1475,15 @@ async function updateFrequencyChart(term, channels, year, queryMode, toggles = {
|
|||||||
transcriptBtn.className = "transcript-toggle";
|
transcriptBtn.className = "transcript-toggle";
|
||||||
transcriptBtn.textContent = "View Full Transcript";
|
transcriptBtn.textContent = "View Full Transcript";
|
||||||
transcriptBtn.onclick = () => {
|
transcriptBtn.onclick = () => {
|
||||||
fetchAndDisplayTranscript(item.video_id, item.url, el, transcriptBtn);
|
fetchAndDisplayTranscript(
|
||||||
|
item.video_id,
|
||||||
|
item.url,
|
||||||
|
el,
|
||||||
|
transcriptBtn,
|
||||||
|
null,
|
||||||
|
item.toHighlight,
|
||||||
|
qInput.value
|
||||||
|
);
|
||||||
};
|
};
|
||||||
el.appendChild(transcriptBtn);
|
el.appendChild(transcriptBtn);
|
||||||
}
|
}
|
||||||
@ -1223,10 +1552,28 @@ async function updateFrequencyChart(term, channels, year, queryMode, toggles = {
|
|||||||
updateFrequencyChart(q, channels, year, queryMode, { exact, fuzzy, phrase });
|
updateFrequencyChart(q, channels, year, queryMode, { exact, fuzzy, phrase });
|
||||||
}
|
}
|
||||||
|
|
||||||
searchBtn.addEventListener("click", () => runSearch(0));
|
searchBtn.addEventListener("click", () => runSearch(0));
|
||||||
|
if (aboutBtn && aboutPanel) {
|
||||||
|
aboutBtn.addEventListener("click", () => {
|
||||||
|
const isHidden = aboutPanel.hasAttribute("hidden");
|
||||||
|
toggleAboutPanel(isHidden);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (aboutCloseBtn) {
|
||||||
|
aboutCloseBtn.addEventListener("click", () => toggleAboutPanel(false));
|
||||||
|
}
|
||||||
|
|
||||||
qInput.addEventListener("keypress", (e) => {
|
qInput.addEventListener("keypress", (e) => {
|
||||||
if (e.key === "Enter") runSearch(0);
|
if (e.key === "Enter") runSearch(0);
|
||||||
});
|
});
|
||||||
|
if (channelSelect) {
|
||||||
|
channelSelect.addEventListener("change", () => {
|
||||||
|
pendingChannelSelection = channelSelect.value || "";
|
||||||
|
if (channelsReady) {
|
||||||
|
runSearch(0);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
yearSel.addEventListener("change", () => runSearch(0));
|
yearSel.addEventListener("change", () => runSearch(0));
|
||||||
sortSel.addEventListener("change", () => runSearch(0));
|
sortSel.addEventListener("change", () => runSearch(0));
|
||||||
sizeSel.addEventListener("change", () => runSearch(0));
|
sizeSel.addEventListener("change", () => runSearch(0));
|
||||||
|
|||||||
85
static/graph.html
Normal file
85
static/graph.html
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
|
<title>TLC Reference Graph</title>
|
||||||
|
<link rel="icon" href="/static/favicon.png" type="image/png" />
|
||||||
|
<link rel="stylesheet" href="https://unpkg.com/xp.css" />
|
||||||
|
<link rel="stylesheet" href="/static/style.css" />
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="window graph-window" style="max-width: 1100px; margin: 20px auto;">
|
||||||
|
<div class="title-bar">
|
||||||
|
<div class="title-bar-text">Reference Graph</div>
|
||||||
|
<div class="title-bar-controls">
|
||||||
|
<a class="title-bar-link" href="/">⬅ Search</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="window-body">
|
||||||
|
<p>
|
||||||
|
Explore how videos reference each other. Enter a <code>video_id</code> to see its immediate
|
||||||
|
neighbors (referenced and referencing videos). Choose a larger depth to expand the graph.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<form id="graphForm" class="graph-controls">
|
||||||
|
<div class="field-group">
|
||||||
|
<label for="graphVideoId">Video ID</label>
|
||||||
|
<input
|
||||||
|
id="graphVideoId"
|
||||||
|
name="video_id"
|
||||||
|
type="text"
|
||||||
|
placeholder="e.g. dQw4w9WgXcQ"
|
||||||
|
required
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="field-group">
|
||||||
|
<label for="graphDepth">Depth</label>
|
||||||
|
<select id="graphDepth" name="depth">
|
||||||
|
<option value="1">1 hop</option>
|
||||||
|
<option value="2">2 hops</option>
|
||||||
|
<option value="3">3 hops</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="field-group">
|
||||||
|
<label for="graphMaxNodes">Max nodes</label>
|
||||||
|
<select id="graphMaxNodes" name="max_nodes">
|
||||||
|
<option value="100">100</option>
|
||||||
|
<option value="150">150</option>
|
||||||
|
<option value="200" selected>200</option>
|
||||||
|
<option value="300">300</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="field-group">
|
||||||
|
<label for="graphLabelSize">Labels</label>
|
||||||
|
<select id="graphLabelSize" name="label_size">
|
||||||
|
<option value="off">Off</option>
|
||||||
|
<option value="tiny" selected>Tiny</option>
|
||||||
|
<option value="small">Small</option>
|
||||||
|
<option value="normal">Normal</option>
|
||||||
|
<option value="medium">Medium</option>
|
||||||
|
<option value="large">Large</option>
|
||||||
|
<option value="xlarge">Extra large</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<button type="submit">Build graph</button>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<div id="graphStatus" class="graph-status">Enter a video ID to begin.</div>
|
||||||
|
<div id="graphContainer" class="graph-container"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="status-bar">
|
||||||
|
<p class="status-bar-field">Click nodes to open the video on YouTube</p>
|
||||||
|
<p class="status-bar-field">Colors represent channels</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script src="/static/graph.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
670
static/graph.js
Normal file
670
static/graph.js
Normal file
@ -0,0 +1,670 @@
|
|||||||
|
(() => {
|
||||||
|
const global = window;
|
||||||
|
const GraphUI = (global.GraphUI = global.GraphUI || {});
|
||||||
|
GraphUI.ready = false;
|
||||||
|
const form = document.getElementById("graphForm");
|
||||||
|
const videoInput = document.getElementById("graphVideoId");
|
||||||
|
const depthInput = document.getElementById("graphDepth");
|
||||||
|
const maxNodesInput = document.getElementById("graphMaxNodes");
|
||||||
|
const labelSizeInput = document.getElementById("graphLabelSize");
|
||||||
|
const statusEl = document.getElementById("graphStatus");
|
||||||
|
const container = document.getElementById("graphContainer");
|
||||||
|
const isEmbedded =
|
||||||
|
container && container.dataset && container.dataset.embedded === "true";
|
||||||
|
|
||||||
|
if (!form || !videoInput || !depthInput || !maxNodesInput || !labelSizeInput || !container) {
|
||||||
|
console.error("Graph: required DOM elements missing.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const color = d3.scaleOrdinal(d3.schemeTableau10);
|
||||||
|
const colorRange = typeof color.range === "function" ? color.range() : [];
|
||||||
|
const paletteSizeDefault = colorRange.length || 10;
|
||||||
|
const PATTERN_TYPES = [
|
||||||
|
{ key: "none", legendClass: "none" },
|
||||||
|
{ key: "diag-forward", legendClass: "diag-forward" },
|
||||||
|
{ key: "diag-back", legendClass: "diag-back" },
|
||||||
|
{ key: "cross", legendClass: "cross" },
|
||||||
|
{ key: "dots", legendClass: "dots" },
|
||||||
|
];
|
||||||
|
const ADDITIONAL_PATTERNS = PATTERN_TYPES.filter((pattern) => pattern.key !== "none");
|
||||||
|
|
||||||
|
const sanitizeDepth = (value) => {
|
||||||
|
const parsed = parseInt(value, 10);
|
||||||
|
if (Number.isNaN(parsed)) return 1;
|
||||||
|
return Math.max(0, Math.min(parsed, 3));
|
||||||
|
};
|
||||||
|
|
||||||
|
const sanitizeMaxNodes = (value) => {
|
||||||
|
const parsed = parseInt(value, 10);
|
||||||
|
if (Number.isNaN(parsed)) return 200;
|
||||||
|
return Math.max(10, Math.min(parsed, 400));
|
||||||
|
};
|
||||||
|
|
||||||
|
const LABEL_SIZE_VALUES = ["off", "tiny", "small", "normal", "medium", "large", "xlarge"];
|
||||||
|
const LABEL_FONT_SIZES = {
|
||||||
|
tiny: "7px",
|
||||||
|
small: "8px",
|
||||||
|
normal: "9px",
|
||||||
|
medium: "10px",
|
||||||
|
large: "11px",
|
||||||
|
xlarge: "13px",
|
||||||
|
};
|
||||||
|
const DEFAULT_LABEL_SIZE = "tiny";
|
||||||
|
const isValidLabelSize = (value) => LABEL_SIZE_VALUES.includes(value);
|
||||||
|
|
||||||
|
const getLabelSize = () => {
|
||||||
|
if (!labelSizeInput) return DEFAULT_LABEL_SIZE;
|
||||||
|
const value = labelSizeInput.value;
|
||||||
|
return isValidLabelSize(value) ? value : DEFAULT_LABEL_SIZE;
|
||||||
|
};
|
||||||
|
|
||||||
|
function setLabelSizeInput(value) {
|
||||||
|
if (!labelSizeInput) return;
|
||||||
|
labelSizeInput.value = isValidLabelSize(value) ? value : DEFAULT_LABEL_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
const getChannelLabel = (node) =>
|
||||||
|
(node && (node.channel_name || node.channel_id)) || "Unknown";
|
||||||
|
|
||||||
|
function appendPatternContent(pattern, baseColor, patternKey) {
|
||||||
|
pattern.append("rect").attr("width", 8).attr("height", 8).attr("fill", baseColor);
|
||||||
|
|
||||||
|
const strokeColor = "#1f1f1f";
|
||||||
|
const strokeOpacity = 0.35;
|
||||||
|
|
||||||
|
const addForward = () => {
|
||||||
|
pattern
|
||||||
|
.append("path")
|
||||||
|
.attr("d", "M-2,6 L2,2 M0,8 L8,0 M6,10 L10,4")
|
||||||
|
.attr("stroke", strokeColor)
|
||||||
|
.attr("stroke-width", 1)
|
||||||
|
.attr("stroke-opacity", strokeOpacity)
|
||||||
|
.attr("fill", "none");
|
||||||
|
};
|
||||||
|
|
||||||
|
const addBackward = () => {
|
||||||
|
pattern
|
||||||
|
.append("path")
|
||||||
|
.attr("d", "M-2,2 L2,6 M0,0 L8,8 M6,-2 L10,2")
|
||||||
|
.attr("stroke", strokeColor)
|
||||||
|
.attr("stroke-width", 1)
|
||||||
|
.attr("stroke-opacity", strokeOpacity)
|
||||||
|
.attr("fill", "none");
|
||||||
|
};
|
||||||
|
|
||||||
|
switch (patternKey) {
|
||||||
|
case "diag-forward":
|
||||||
|
addForward();
|
||||||
|
break;
|
||||||
|
case "diag-back":
|
||||||
|
addBackward();
|
||||||
|
break;
|
||||||
|
case "cross":
|
||||||
|
addForward();
|
||||||
|
addBackward();
|
||||||
|
break;
|
||||||
|
case "dots":
|
||||||
|
pattern
|
||||||
|
.append("circle")
|
||||||
|
.attr("cx", 4)
|
||||||
|
.attr("cy", 4)
|
||||||
|
.attr("r", 1.5)
|
||||||
|
.attr("fill", strokeColor)
|
||||||
|
.attr("fill-opacity", strokeOpacity);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function createChannelStyle(label, baseColor, patternKey) {
|
||||||
|
const patternInfo =
|
||||||
|
PATTERN_TYPES.find((pattern) => pattern.key === patternKey) || PATTERN_TYPES[0];
|
||||||
|
return {
|
||||||
|
baseColor,
|
||||||
|
hatch: patternInfo ? patternInfo.key : "none",
|
||||||
|
legendClass: patternInfo ? patternInfo.legendClass : "none",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
let currentGraphData = null;
|
||||||
|
let currentChannelStyles = new Map();
|
||||||
|
let currentDepth = sanitizeDepth(depthInput.value);
|
||||||
|
let currentMaxNodes = sanitizeMaxNodes(maxNodesInput.value);
|
||||||
|
let currentSimulation = null;
|
||||||
|
|
||||||
|
function setStatus(message, isError = false) {
|
||||||
|
if (!statusEl) return;
|
||||||
|
statusEl.textContent = message;
|
||||||
|
if (isError) {
|
||||||
|
statusEl.classList.add("error");
|
||||||
|
} else {
|
||||||
|
statusEl.classList.remove("error");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function sanitizeId(value) {
|
||||||
|
return (value || "").trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchGraph(videoId, depth, maxNodes) {
|
||||||
|
const params = new URLSearchParams();
|
||||||
|
params.set("video_id", videoId);
|
||||||
|
params.set("depth", String(depth));
|
||||||
|
params.set("max_nodes", String(maxNodes));
|
||||||
|
const response = await fetch(`/api/graph?${params.toString()}`);
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorPayload = await response.json().catch(() => ({}));
|
||||||
|
const errorMessage =
|
||||||
|
errorPayload.error ||
|
||||||
|
`Graph request failed (${response.status} ${response.statusText})`;
|
||||||
|
throw new Error(errorMessage);
|
||||||
|
}
|
||||||
|
return response.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
function resizeContainer() {
|
||||||
|
if (!container) return;
|
||||||
|
const minHeight = 520;
|
||||||
|
const viewportHeight = window.innerHeight;
|
||||||
|
container.style.height = `${Math.max(minHeight, Math.round(viewportHeight * 0.6))}px`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderGraph(data, labelSize = "normal") {
|
||||||
|
if (!container) return;
|
||||||
|
|
||||||
|
if (currentSimulation) {
|
||||||
|
currentSimulation.stop();
|
||||||
|
currentSimulation = null;
|
||||||
|
}
|
||||||
|
container.innerHTML = "";
|
||||||
|
|
||||||
|
const width = container.clientWidth || 900;
|
||||||
|
const height = container.clientHeight || 600;
|
||||||
|
|
||||||
|
const svg = d3
|
||||||
|
.select(container)
|
||||||
|
.append("svg")
|
||||||
|
.attr("viewBox", [0, 0, width, height])
|
||||||
|
.attr("width", "100%")
|
||||||
|
.attr("height", height);
|
||||||
|
|
||||||
|
const defs = svg.append("defs");
|
||||||
|
|
||||||
|
defs
|
||||||
|
.append("marker")
|
||||||
|
.attr("id", "arrow-references")
|
||||||
|
.attr("viewBox", "0 -5 10 10")
|
||||||
|
.attr("refX", 18)
|
||||||
|
.attr("refY", 0)
|
||||||
|
.attr("markerWidth", 6)
|
||||||
|
.attr("markerHeight", 6)
|
||||||
|
.attr("orient", "auto")
|
||||||
|
.append("path")
|
||||||
|
.attr("d", "M0,-5L10,0L0,5")
|
||||||
|
.attr("fill", "#6c83c7");
|
||||||
|
|
||||||
|
defs
|
||||||
|
.append("marker")
|
||||||
|
.attr("id", "arrow-referenced-by")
|
||||||
|
.attr("viewBox", "0 -5 10 10")
|
||||||
|
.attr("refX", 18)
|
||||||
|
.attr("refY", 0)
|
||||||
|
.attr("markerWidth", 6)
|
||||||
|
.attr("markerHeight", 6)
|
||||||
|
.attr("orient", "auto")
|
||||||
|
.append("path")
|
||||||
|
.attr("d", "M0,-5L10,0L0,5")
|
||||||
|
.attr("fill", "#c76c6c");
|
||||||
|
|
||||||
|
const contentGroup = svg.append("g").attr("class", "graph-content");
|
||||||
|
const linkGroup = contentGroup.append("g").attr("class", "graph-links");
|
||||||
|
const nodeGroup = contentGroup.append("g").attr("class", "graph-nodes");
|
||||||
|
const labelGroup = contentGroup.append("g").attr("class", "graph-labels");
|
||||||
|
|
||||||
|
const links = data.links || [];
|
||||||
|
const nodes = data.nodes || [];
|
||||||
|
|
||||||
|
currentChannelStyles = new Map();
|
||||||
|
const uniqueChannels = [];
|
||||||
|
nodes.forEach((node) => {
|
||||||
|
const label = getChannelLabel(node);
|
||||||
|
if (!currentChannelStyles.has(label)) {
|
||||||
|
uniqueChannels.push(label);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const additionalPatternCount = ADDITIONAL_PATTERNS.length;
|
||||||
|
uniqueChannels.forEach((label, idx) => {
|
||||||
|
const baseColor = color(label);
|
||||||
|
let patternKey = "none";
|
||||||
|
if (idx >= paletteSizeDefault && additionalPatternCount > 0) {
|
||||||
|
const patternInfo =
|
||||||
|
ADDITIONAL_PATTERNS[(idx - paletteSizeDefault) % additionalPatternCount];
|
||||||
|
patternKey = patternInfo.key;
|
||||||
|
}
|
||||||
|
const style = createChannelStyle(label, baseColor, patternKey);
|
||||||
|
currentChannelStyles.set(label, style);
|
||||||
|
});
|
||||||
|
|
||||||
|
const linkSelection = linkGroup
|
||||||
|
.selectAll("line")
|
||||||
|
.data(links)
|
||||||
|
.enter()
|
||||||
|
.append("line")
|
||||||
|
.attr("stroke-width", 1.2)
|
||||||
|
.attr("stroke", (d) =>
|
||||||
|
d.relation === "references" ? "#6c83c7" : "#c76c6c"
|
||||||
|
)
|
||||||
|
.attr("stroke-opacity", 0.7)
|
||||||
|
.attr("marker-end", (d) =>
|
||||||
|
d.relation === "references" ? "url(#arrow-references)" : "url(#arrow-referenced-by)"
|
||||||
|
);
|
||||||
|
|
||||||
|
let nodePatternCounter = 0;
|
||||||
|
const nodePatternRefs = new Map();
|
||||||
|
|
||||||
|
const getNodeFill = (node) => {
|
||||||
|
const style = currentChannelStyles.get(getChannelLabel(node));
|
||||||
|
if (!style) {
|
||||||
|
return color(getChannelLabel(node));
|
||||||
|
}
|
||||||
|
if (!style.hatch || style.hatch === "none") {
|
||||||
|
return style.baseColor;
|
||||||
|
}
|
||||||
|
const patternId = `node-pattern-${nodePatternCounter++}`;
|
||||||
|
const pattern = defs
|
||||||
|
.append("pattern")
|
||||||
|
.attr("id", patternId)
|
||||||
|
.attr("patternUnits", "userSpaceOnUse")
|
||||||
|
.attr("width", 8)
|
||||||
|
.attr("height", 8);
|
||||||
|
appendPatternContent(pattern, style.baseColor, style.hatch);
|
||||||
|
pattern.attr("patternTransform", "translate(0,0)");
|
||||||
|
nodePatternRefs.set(node.id, pattern);
|
||||||
|
return `url(#${patternId})`;
|
||||||
|
};
|
||||||
|
|
||||||
|
const nodeSelection = nodeGroup
|
||||||
|
.selectAll("circle")
|
||||||
|
.data(nodes, (d) => d.id)
|
||||||
|
.enter()
|
||||||
|
.append("circle")
|
||||||
|
.attr("r", (d) => (d.is_root ? 10 : 7))
|
||||||
|
.attr("fill", (d) => getNodeFill(d))
|
||||||
|
.attr("stroke", "#1f1f1f")
|
||||||
|
.attr("stroke-width", (d) => (d.is_root ? 2 : 1))
|
||||||
|
.call(
|
||||||
|
d3
|
||||||
|
.drag()
|
||||||
|
.on("start", (event, d) => {
|
||||||
|
if (!event.active) simulation.alphaTarget(0.3).restart();
|
||||||
|
d.fx = d.x;
|
||||||
|
d.fy = d.y;
|
||||||
|
})
|
||||||
|
.on("drag", (event, d) => {
|
||||||
|
d.fx = event.x;
|
||||||
|
d.fy = event.y;
|
||||||
|
})
|
||||||
|
.on("end", (event, d) => {
|
||||||
|
if (!event.active) simulation.alphaTarget(0);
|
||||||
|
d.fx = null;
|
||||||
|
d.fy = null;
|
||||||
|
})
|
||||||
|
)
|
||||||
|
.on("click", (event, d) => {
|
||||||
|
if (d.url) {
|
||||||
|
window.open(d.url, "_blank", "noopener");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.on("contextmenu", (event, d) => {
|
||||||
|
event.preventDefault();
|
||||||
|
loadGraph(d.id, currentDepth, currentMaxNodes, { updateInputs: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
nodeSelection
|
||||||
|
.append("title")
|
||||||
|
.text((d) => {
|
||||||
|
const parts = [];
|
||||||
|
parts.push(d.title || d.id);
|
||||||
|
if (d.channel_name) {
|
||||||
|
parts.push(`Channel: ${d.channel_name}`);
|
||||||
|
}
|
||||||
|
if (d.date) {
|
||||||
|
parts.push(`Date: ${d.date}`);
|
||||||
|
}
|
||||||
|
return parts.join("\n");
|
||||||
|
});
|
||||||
|
|
||||||
|
const labelSelection = labelGroup
|
||||||
|
.selectAll("text")
|
||||||
|
.data(nodes, (d) => d.id)
|
||||||
|
.enter()
|
||||||
|
.append("text")
|
||||||
|
.attr("class", "graph-node-label")
|
||||||
|
.attr("text-anchor", "middle")
|
||||||
|
.attr("fill", "#1f1f1f")
|
||||||
|
.attr("pointer-events", "none")
|
||||||
|
.text((d) => d.title || d.id);
|
||||||
|
|
||||||
|
applyLabelAppearance(labelSelection, labelSize);
|
||||||
|
|
||||||
|
const simulation = d3
|
||||||
|
.forceSimulation(nodes)
|
||||||
|
.force(
|
||||||
|
"link",
|
||||||
|
d3
|
||||||
|
.forceLink(links)
|
||||||
|
.id((d) => d.id)
|
||||||
|
.distance(120)
|
||||||
|
.strength(0.8)
|
||||||
|
)
|
||||||
|
.force("charge", d3.forceManyBody().strength(-320))
|
||||||
|
.force("center", d3.forceCenter(width / 2, height / 2))
|
||||||
|
.force(
|
||||||
|
"collide",
|
||||||
|
d3.forceCollide().radius((d) => (d.is_root ? 20 : 14)).iterations(2)
|
||||||
|
);
|
||||||
|
|
||||||
|
simulation.on("tick", () => {
|
||||||
|
linkSelection
|
||||||
|
.attr("x1", (d) => d.source.x)
|
||||||
|
.attr("y1", (d) => d.source.y)
|
||||||
|
.attr("x2", (d) => d.target.x)
|
||||||
|
.attr("y2", (d) => d.target.y);
|
||||||
|
|
||||||
|
nodeSelection.attr("cx", (d) => d.x).attr("cy", (d) => d.y);
|
||||||
|
|
||||||
|
labelSelection.attr("x", (d) => d.x).attr("y", (d) => d.y - (d.is_root ? 14 : 12));
|
||||||
|
|
||||||
|
nodeSelection.each(function (d) {
|
||||||
|
const pattern = nodePatternRefs.get(d.id);
|
||||||
|
if (pattern) {
|
||||||
|
const safeX = Number.isFinite(d.x) ? d.x : 0;
|
||||||
|
const safeY = Number.isFinite(d.y) ? d.y : 0;
|
||||||
|
pattern.attr("patternTransform", `translate(${safeX}, ${safeY})`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
const zoomBehavior = d3
|
||||||
|
.zoom()
|
||||||
|
.scaleExtent([0.3, 3])
|
||||||
|
.on("zoom", (event) => {
|
||||||
|
contentGroup.attr("transform", event.transform);
|
||||||
|
});
|
||||||
|
|
||||||
|
svg.call(zoomBehavior);
|
||||||
|
currentSimulation = simulation;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadGraph(videoId, depth, maxNodes, { updateInputs = false } = {}) {
|
||||||
|
const sanitizedId = sanitizeId(videoId);
|
||||||
|
if (!sanitizedId) {
|
||||||
|
setStatus("Please enter a video ID.", true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const safeDepth = sanitizeDepth(depth);
|
||||||
|
const safeMaxNodes = sanitizeMaxNodes(maxNodes);
|
||||||
|
|
||||||
|
if (updateInputs) {
|
||||||
|
videoInput.value = sanitizedId;
|
||||||
|
depthInput.value = String(safeDepth);
|
||||||
|
maxNodesInput.value = String(safeMaxNodes);
|
||||||
|
}
|
||||||
|
|
||||||
|
setStatus("Loading graph…");
|
||||||
|
try {
|
||||||
|
const data = await fetchGraph(sanitizedId, safeDepth, safeMaxNodes);
|
||||||
|
if (!data.nodes || data.nodes.length === 0) {
|
||||||
|
setStatus("No nodes returned for this video.", true);
|
||||||
|
container.innerHTML = "";
|
||||||
|
currentGraphData = null;
|
||||||
|
currentChannelStyles = new Map();
|
||||||
|
renderLegend([]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
currentGraphData = data;
|
||||||
|
currentDepth = safeDepth;
|
||||||
|
currentMaxNodes = safeMaxNodes;
|
||||||
|
renderGraph(data, getLabelSize());
|
||||||
|
renderLegend(data.nodes);
|
||||||
|
setStatus(
|
||||||
|
`Showing ${data.nodes.length} nodes and ${data.links.length} links (depth ${data.depth})`
|
||||||
|
);
|
||||||
|
updateUrlState(sanitizedId, safeDepth, safeMaxNodes, getLabelSize());
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err);
|
||||||
|
setStatus(err.message || "Failed to build graph.", true);
|
||||||
|
container.innerHTML = "";
|
||||||
|
currentGraphData = null;
|
||||||
|
currentChannelStyles = new Map();
|
||||||
|
renderLegend([]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function handleSubmit(event) {
|
||||||
|
event.preventDefault();
|
||||||
|
await loadGraph(videoInput.value, depthInput.value, maxNodesInput.value, {
|
||||||
|
updateInputs: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderLegend(nodes) {
|
||||||
|
let legend = document.getElementById("graphLegend");
|
||||||
|
if (!legend) {
|
||||||
|
legend = document.createElement("div");
|
||||||
|
legend.id = "graphLegend";
|
||||||
|
legend.className = "graph-legend";
|
||||||
|
if (statusEl && statusEl.parentNode) {
|
||||||
|
statusEl.insertAdjacentElement("afterend", legend);
|
||||||
|
} else {
|
||||||
|
container.parentElement?.insertBefore(legend, container);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
legend.innerHTML = "";
|
||||||
|
|
||||||
|
const edgesSection = document.createElement("div");
|
||||||
|
edgesSection.className = "graph-legend-section";
|
||||||
|
|
||||||
|
const edgesTitle = document.createElement("div");
|
||||||
|
edgesTitle.className = "graph-legend-title";
|
||||||
|
edgesTitle.textContent = "Edges";
|
||||||
|
edgesSection.appendChild(edgesTitle);
|
||||||
|
|
||||||
|
const createEdgeRow = (swatchClass, text) => {
|
||||||
|
const row = document.createElement("div");
|
||||||
|
row.className = "graph-legend-row";
|
||||||
|
const swatch = document.createElement("span");
|
||||||
|
swatch.className = `graph-legend-swatch ${swatchClass}`;
|
||||||
|
const label = document.createElement("span");
|
||||||
|
label.textContent = text;
|
||||||
|
row.appendChild(swatch);
|
||||||
|
row.appendChild(label);
|
||||||
|
return row;
|
||||||
|
};
|
||||||
|
|
||||||
|
edgesSection.appendChild(
|
||||||
|
createEdgeRow(
|
||||||
|
"graph-legend-swatch--references",
|
||||||
|
"Outgoing reference (video references other)"
|
||||||
|
)
|
||||||
|
);
|
||||||
|
edgesSection.appendChild(
|
||||||
|
createEdgeRow(
|
||||||
|
"graph-legend-swatch--referenced",
|
||||||
|
"Incoming reference (other video references this)"
|
||||||
|
)
|
||||||
|
);
|
||||||
|
legend.appendChild(edgesSection);
|
||||||
|
|
||||||
|
const channelSection = document.createElement("div");
|
||||||
|
channelSection.className = "graph-legend-section";
|
||||||
|
const channelTitle = document.createElement("div");
|
||||||
|
channelTitle.className = "graph-legend-title";
|
||||||
|
channelTitle.textContent = "Channels in view";
|
||||||
|
channelSection.appendChild(channelTitle);
|
||||||
|
|
||||||
|
const channelList = document.createElement("div");
|
||||||
|
channelList.className = "graph-legend-channel-list";
|
||||||
|
|
||||||
|
const channelEntries = Array.from(currentChannelStyles.entries()).sort((a, b) =>
|
||||||
|
a[0].localeCompare(b[0], undefined, { sensitivity: "base" })
|
||||||
|
);
|
||||||
|
const maxChannelItems = 20;
|
||||||
|
|
||||||
|
channelEntries.slice(0, maxChannelItems).forEach(([label, style]) => {
|
||||||
|
const item = document.createElement("div");
|
||||||
|
item.className = `graph-legend-channel graph-legend-channel--${
|
||||||
|
style.legendClass || "none"
|
||||||
|
}`;
|
||||||
|
const swatch = document.createElement("span");
|
||||||
|
swatch.className = "graph-legend-swatch graph-legend-channel-swatch";
|
||||||
|
swatch.style.backgroundColor = style.baseColor;
|
||||||
|
const text = document.createElement("span");
|
||||||
|
text.textContent = label;
|
||||||
|
item.appendChild(swatch);
|
||||||
|
item.appendChild(text);
|
||||||
|
channelList.appendChild(item);
|
||||||
|
});
|
||||||
|
|
||||||
|
const totalChannels = channelEntries.length;
|
||||||
|
if (channelList.childElementCount) {
|
||||||
|
channelSection.appendChild(channelList);
|
||||||
|
if (totalChannels > maxChannelItems) {
|
||||||
|
const note = document.createElement("div");
|
||||||
|
note.className = "graph-legend-note";
|
||||||
|
note.textContent = `+${totalChannels - maxChannelItems} more channels`;
|
||||||
|
channelSection.appendChild(note);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
const empty = document.createElement("div");
|
||||||
|
empty.className = "graph-legend-note";
|
||||||
|
empty.textContent = "No channel data available.";
|
||||||
|
channelSection.appendChild(empty);
|
||||||
|
}
|
||||||
|
|
||||||
|
legend.appendChild(channelSection);
|
||||||
|
}
|
||||||
|
|
||||||
|
function applyLabelAppearance(selection, labelSize) {
|
||||||
|
if (labelSize === "off") {
|
||||||
|
selection.style("display", "none");
|
||||||
|
} else {
|
||||||
|
selection
|
||||||
|
.style("display", null)
|
||||||
|
.attr("font-size", LABEL_FONT_SIZES[labelSize] || LABEL_FONT_SIZES.normal);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateUrlState(videoId, depth, maxNodes, labelSize) {
|
||||||
|
if (isEmbedded) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const next = new URL(window.location.href);
|
||||||
|
next.searchParams.set("video_id", videoId);
|
||||||
|
next.searchParams.set("depth", String(depth));
|
||||||
|
next.searchParams.set("max_nodes", String(maxNodes));
|
||||||
|
if (labelSize && labelSize !== "normal") {
|
||||||
|
next.searchParams.set("label_size", labelSize);
|
||||||
|
} else {
|
||||||
|
next.searchParams.delete("label_size");
|
||||||
|
}
|
||||||
|
history.replaceState({}, "", next.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
function initFromQuery() {
|
||||||
|
const params = new URLSearchParams(window.location.search);
|
||||||
|
const videoId = sanitizeId(params.get("video_id"));
|
||||||
|
const depth = sanitizeDepth(params.get("depth") || "");
|
||||||
|
const maxNodes = sanitizeMaxNodes(params.get("max_nodes") || "");
|
||||||
|
const labelSizeParam = params.get("label_size");
|
||||||
|
if (videoId) {
|
||||||
|
videoInput.value = videoId;
|
||||||
|
}
|
||||||
|
depthInput.value = String(depth);
|
||||||
|
maxNodesInput.value = String(maxNodes);
|
||||||
|
if (labelSizeParam && isValidLabelSize(labelSizeParam)) {
|
||||||
|
setLabelSizeInput(labelSizeParam);
|
||||||
|
} else {
|
||||||
|
setLabelSizeInput(getLabelSize());
|
||||||
|
}
|
||||||
|
if (!videoId || isEmbedded) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
loadGraph(videoId, depth, maxNodes, { updateInputs: false });
|
||||||
|
}
|
||||||
|
|
||||||
|
resizeContainer();
|
||||||
|
window.addEventListener("resize", resizeContainer);
|
||||||
|
form.addEventListener("submit", handleSubmit);
|
||||||
|
labelSizeInput.addEventListener("change", () => {
|
||||||
|
const size = getLabelSize();
|
||||||
|
if (currentGraphData) {
|
||||||
|
renderGraph(currentGraphData, size);
|
||||||
|
renderLegend(currentGraphData.nodes);
|
||||||
|
}
|
||||||
|
updateUrlState(
|
||||||
|
sanitizeId(videoInput.value),
|
||||||
|
currentDepth,
|
||||||
|
currentMaxNodes,
|
||||||
|
size
|
||||||
|
);
|
||||||
|
});
|
||||||
|
initFromQuery();
|
||||||
|
|
||||||
|
Object.assign(GraphUI, {
|
||||||
|
load(videoId, depth, maxNodes, options = {}) {
|
||||||
|
const targetDepth = depth != null ? depth : currentDepth;
|
||||||
|
const targetMax = maxNodes != null ? maxNodes : currentMaxNodes;
|
||||||
|
return loadGraph(videoId, targetDepth, targetMax, {
|
||||||
|
updateInputs: options.updateInputs !== false,
|
||||||
|
});
|
||||||
|
},
|
||||||
|
setLabelSize(size) {
|
||||||
|
if (!labelSizeInput || !size) return;
|
||||||
|
setLabelSizeInput(size);
|
||||||
|
labelSizeInput.dispatchEvent(new Event("change", { bubbles: true }));
|
||||||
|
},
|
||||||
|
setDepth(value) {
|
||||||
|
if (!depthInput) return;
|
||||||
|
const safe = sanitizeDepth(value);
|
||||||
|
depthInput.value = String(safe);
|
||||||
|
currentDepth = safe;
|
||||||
|
},
|
||||||
|
setMaxNodes(value) {
|
||||||
|
if (!maxNodesInput) return;
|
||||||
|
const safe = sanitizeMaxNodes(value);
|
||||||
|
maxNodesInput.value = String(safe);
|
||||||
|
currentMaxNodes = safe;
|
||||||
|
},
|
||||||
|
focusInput() {
|
||||||
|
if (videoInput) {
|
||||||
|
videoInput.focus();
|
||||||
|
videoInput.select();
|
||||||
|
}
|
||||||
|
},
|
||||||
|
stop() {
|
||||||
|
if (currentSimulation) {
|
||||||
|
currentSimulation.stop();
|
||||||
|
currentSimulation = null;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
getState() {
|
||||||
|
return {
|
||||||
|
depth: currentDepth,
|
||||||
|
maxNodes: currentMaxNodes,
|
||||||
|
labelSize: getLabelSize(),
|
||||||
|
nodes: currentGraphData ? currentGraphData.nodes.slice() : [],
|
||||||
|
links: currentGraphData ? currentGraphData.links.slice() : [],
|
||||||
|
};
|
||||||
|
},
|
||||||
|
isEmbedded,
|
||||||
|
});
|
||||||
|
GraphUI.ready = true;
|
||||||
|
setTimeout(() => {
|
||||||
|
window.dispatchEvent(new CustomEvent("graph-ui-ready"));
|
||||||
|
}, 0);
|
||||||
|
})();
|
||||||
@ -14,6 +14,7 @@
|
|||||||
<div class="title-bar">
|
<div class="title-bar">
|
||||||
<div class="title-bar-text">This Little Corner</div>
|
<div class="title-bar-text">This Little Corner</div>
|
||||||
<div class="title-bar-controls">
|
<div class="title-bar-controls">
|
||||||
|
<button id="aboutBtn" aria-label="About">?</button>
|
||||||
<button id="minimizeBtn" aria-label="Minimize"></button>
|
<button id="minimizeBtn" aria-label="Minimize"></button>
|
||||||
<button aria-label="Maximize"></button>
|
<button aria-label="Maximize"></button>
|
||||||
<button aria-label="Close"></button>
|
<button aria-label="Close"></button>
|
||||||
@ -21,6 +22,10 @@
|
|||||||
</div>
|
</div>
|
||||||
<div class="window-body">
|
<div class="window-body">
|
||||||
<p>Enter a phrase to query title, description, and transcript text.</p>
|
<p>Enter a phrase to query title, description, and transcript text.</p>
|
||||||
|
<p style="font-size: 11px;">
|
||||||
|
Looking for semantic matches? Try the
|
||||||
|
<a href="/vector-search">vector search beta</a>.
|
||||||
|
</p>
|
||||||
|
|
||||||
<fieldset>
|
<fieldset>
|
||||||
<legend>Search</legend>
|
<legend>Search</legend>
|
||||||
@ -31,13 +36,10 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="field-row" style="margin-bottom: 8px; align-items: center;">
|
<div class="field-row" style="margin-bottom: 8px; align-items: center;">
|
||||||
<label style="width: 60px;">Channel:</label>
|
<label for="channel" style="width: 60px;">Channel:</label>
|
||||||
<details id="channelDropdown" class="channel-dropdown" style="flex: 1;">
|
<select id="channel" style="flex: 1;">
|
||||||
<summary id="channelSummary">All Channels</summary>
|
<option value="">All Channels</option>
|
||||||
<div id="channelOptions" class="channel-options">
|
</select>
|
||||||
<div>Loading channels…</div>
|
|
||||||
</div>
|
|
||||||
</details>
|
|
||||||
|
|
||||||
<label for="year" style="margin-left: 8px;">Year:</label>
|
<label for="year" style="margin-left: 8px;">Year:</label>
|
||||||
<select id="year">
|
<select id="year">
|
||||||
@ -64,21 +66,25 @@
|
|||||||
<div class="toggle-item toggle-item--first">
|
<div class="toggle-item toggle-item--first">
|
||||||
<input type="checkbox" id="exactToggle" checked />
|
<input type="checkbox" id="exactToggle" checked />
|
||||||
<label for="exactToggle">Exact</label>
|
<label for="exactToggle">Exact</label>
|
||||||
|
<span class="toggle-help">Match all terms exactly.</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="toggle-item">
|
<div class="toggle-item">
|
||||||
<input type="checkbox" id="fuzzyToggle" checked />
|
<input type="checkbox" id="fuzzyToggle" checked />
|
||||||
<label for="fuzzyToggle">Fuzzy</label>
|
<label for="fuzzyToggle">Fuzzy</label>
|
||||||
|
<span class="toggle-help">Allow small typos and variations.</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="toggle-item">
|
<div class="toggle-item">
|
||||||
<input type="checkbox" id="phraseToggle" checked />
|
<input type="checkbox" id="phraseToggle" checked />
|
||||||
<label for="phraseToggle">Phrase</label>
|
<label for="phraseToggle">Phrase</label>
|
||||||
|
<span class="toggle-help">Boost exact phrases inside transcripts.</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="toggle-item">
|
<div class="toggle-item">
|
||||||
<input type="checkbox" id="queryStringToggle" />
|
<input type="checkbox" id="queryStringToggle" />
|
||||||
<label for="queryStringToggle">Query string mode</label>
|
<label for="queryStringToggle">Query string mode</label>
|
||||||
|
<span class="toggle-help">Use raw Lucene syntax (overrides other toggles).</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</fieldset>
|
</fieldset>
|
||||||
@ -112,6 +118,105 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class="about-panel" id="aboutPanel" hidden>
|
||||||
|
<div class="about-panel__header">
|
||||||
|
<strong>About This App</strong>
|
||||||
|
<button id="aboutCloseBtn" aria-label="Close about panel">×</button>
|
||||||
|
</div>
|
||||||
|
<div class="about-panel__body">
|
||||||
|
<p>Use the toggles to choose exact, fuzzy, or phrase matching. Query string mode accepts raw Lucene syntax.</p>
|
||||||
|
<p>Results are ranked by your chosen sort order; the timeline summarizes the same query.</p>
|
||||||
|
<p>You can download transcripts, copy MLA citations, or explore references via the graph button.</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div
|
||||||
|
id="graphModalOverlay"
|
||||||
|
class="graph-modal-overlay"
|
||||||
|
aria-hidden="true"
|
||||||
|
>
|
||||||
|
<div
|
||||||
|
class="window graph-window graph-modal-window"
|
||||||
|
id="graphModalWindow"
|
||||||
|
role="dialog"
|
||||||
|
aria-modal="true"
|
||||||
|
aria-labelledby="graphModalTitle"
|
||||||
|
>
|
||||||
|
<div class="title-bar">
|
||||||
|
<div class="title-bar-text" id="graphModalTitle">Reference Graph</div>
|
||||||
|
<div class="title-bar-controls">
|
||||||
|
<button id="graphModalClose" aria-label="Close"></button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="window-body">
|
||||||
|
<p>
|
||||||
|
Explore how this video links with its neighbors. Adjust depth or node cap to expand the graph.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<form id="graphForm" class="graph-controls">
|
||||||
|
<div class="field-group">
|
||||||
|
<label for="graphVideoId">Video ID</label>
|
||||||
|
<input
|
||||||
|
id="graphVideoId"
|
||||||
|
name="video_id"
|
||||||
|
type="text"
|
||||||
|
placeholder="e.g. dQw4w9WgXcQ"
|
||||||
|
required
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="field-group">
|
||||||
|
<label for="graphDepth">Depth</label>
|
||||||
|
<select id="graphDepth" name="depth">
|
||||||
|
<option value="1" selected>1 hop</option>
|
||||||
|
<option value="2">2 hops</option>
|
||||||
|
<option value="3">3 hops</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="field-group">
|
||||||
|
<label for="graphMaxNodes">Max nodes</label>
|
||||||
|
<select id="graphMaxNodes" name="max_nodes">
|
||||||
|
<option value="100">100</option>
|
||||||
|
<option value="150">150</option>
|
||||||
|
<option value="200" selected>200</option>
|
||||||
|
<option value="300">300</option>
|
||||||
|
<option value="400">400</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="field-group">
|
||||||
|
<label for="graphLabelSize">Labels</label>
|
||||||
|
<select id="graphLabelSize" name="label_size">
|
||||||
|
<option value="off">Off</option>
|
||||||
|
<option value="tiny" selected>Tiny</option>
|
||||||
|
<option value="small">Small</option>
|
||||||
|
<option value="normal">Normal</option>
|
||||||
|
<option value="medium">Medium</option>
|
||||||
|
<option value="large">Large</option>
|
||||||
|
<option value="xlarge">Extra large</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<button type="submit">Build graph</button>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<div id="graphStatus" class="graph-status">Enter a video ID to begin.</div>
|
||||||
|
<div
|
||||||
|
id="graphContainer"
|
||||||
|
class="graph-container"
|
||||||
|
data-embedded="true"
|
||||||
|
></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="status-bar">
|
||||||
|
<p class="status-bar-field">Right-click a node to set a new root</p>
|
||||||
|
<p class="status-bar-field">Colors (and hatches) represent channels</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script src="/static/graph.js"></script>
|
||||||
<script src="/static/app.js"></script>
|
<script src="/static/app.js"></script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
500
static/style.css
500
static/style.css
@ -63,7 +63,7 @@ body.dimmed {
|
|||||||
}
|
}
|
||||||
|
|
||||||
.field-row input[type="text"],
|
.field-row input[type="text"],
|
||||||
.field-row .channel-dropdown {
|
.field-row select#channel {
|
||||||
flex: 1 1 100% !important;
|
flex: 1 1 100% !important;
|
||||||
min-width: 0 !important;
|
min-width: 0 !important;
|
||||||
max-width: 100% !important;
|
max-width: 100% !important;
|
||||||
@ -86,37 +86,18 @@ body.dimmed {
|
|||||||
max-width: 100%;
|
max-width: 100%;
|
||||||
min-width: 100%;
|
min-width: 100%;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/* Channel dropdown custom styling */
|
.graph-controls {
|
||||||
.channel-dropdown {
|
flex-direction: column;
|
||||||
position: relative;
|
align-items: stretch;
|
||||||
display: inline-block;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
.channel-dropdown summary {
|
.graph-controls .field-group,
|
||||||
list-style: none;
|
.graph-controls input,
|
||||||
cursor: pointer;
|
.graph-controls select {
|
||||||
padding: 3px 4px;
|
width: 100%;
|
||||||
background: ButtonFace;
|
min-width: 0;
|
||||||
border: 1px solid;
|
}
|
||||||
border-color: ButtonHighlight ButtonShadow ButtonShadow ButtonHighlight;
|
|
||||||
min-width: 180px;
|
|
||||||
text-align: left;
|
|
||||||
}
|
|
||||||
|
|
||||||
.channel-dropdown summary::-webkit-details-marker {
|
|
||||||
display: none;
|
|
||||||
}
|
|
||||||
|
|
||||||
.channel-dropdown summary::after {
|
|
||||||
content: ' ▼';
|
|
||||||
font-size: 8px;
|
|
||||||
float: right;
|
|
||||||
}
|
|
||||||
|
|
||||||
.channel-dropdown[open] summary::after {
|
|
||||||
content: ' ▲';
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.toggle-row {
|
.toggle-row {
|
||||||
@ -174,32 +155,6 @@ body.dimmed {
|
|||||||
overflow-y: auto;
|
overflow-y: auto;
|
||||||
}
|
}
|
||||||
|
|
||||||
.channel-options {
|
|
||||||
position: absolute;
|
|
||||||
margin-top: 2px;
|
|
||||||
padding: 4px;
|
|
||||||
background: ButtonFace;
|
|
||||||
border: 1px solid;
|
|
||||||
border-color: ButtonHighlight ButtonShadow ButtonShadow ButtonHighlight;
|
|
||||||
max-height: 300px;
|
|
||||||
overflow-y: auto;
|
|
||||||
box-shadow: 2px 2px 0 rgba(0, 0, 0, 0.2);
|
|
||||||
z-index: 100;
|
|
||||||
min-width: 220px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.channel-option {
|
|
||||||
display: flex;
|
|
||||||
align-items: center;
|
|
||||||
gap: 6px;
|
|
||||||
margin-bottom: 4px;
|
|
||||||
font-size: 11px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.channel-option:last-child {
|
|
||||||
margin-bottom: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Layout helpers */
|
/* Layout helpers */
|
||||||
.summary-row {
|
.summary-row {
|
||||||
display: flex;
|
display: flex;
|
||||||
@ -218,6 +173,344 @@ body.dimmed {
|
|||||||
min-width: 300px;
|
min-width: 300px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.graph-window {
|
||||||
|
width: 95%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-controls {
|
||||||
|
display: flex;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
gap: 12px;
|
||||||
|
align-items: flex-end;
|
||||||
|
margin-bottom: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-controls .field-group {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-controls label {
|
||||||
|
font-size: 11px;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-controls input,
|
||||||
|
.graph-controls select {
|
||||||
|
min-width: 160px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-status {
|
||||||
|
font-size: 11px;
|
||||||
|
margin-bottom: 8px;
|
||||||
|
color: #1f1f1f;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-status.error {
|
||||||
|
color: #b00020;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-container {
|
||||||
|
background: Window;
|
||||||
|
border: 1px solid #919b9c;
|
||||||
|
box-shadow: inset -1px -1px #0a0a0a, inset 1px 1px #fff;
|
||||||
|
position: relative;
|
||||||
|
width: 100%;
|
||||||
|
min-height: 520px;
|
||||||
|
height: auto;
|
||||||
|
overflow: visible;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-modal-overlay {
|
||||||
|
position: fixed;
|
||||||
|
inset: 0;
|
||||||
|
display: none;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
padding: 24px;
|
||||||
|
background: rgba(0, 0, 0, 0.35);
|
||||||
|
z-index: 2000;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-modal-overlay.active {
|
||||||
|
display: flex;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-modal-window {
|
||||||
|
width: min(960px, 100%);
|
||||||
|
max-height: calc(100vh - 48px);
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-modal-window .window-body {
|
||||||
|
max-height: calc(100vh - 180px);
|
||||||
|
overflow-y: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-modal-window .graph-container {
|
||||||
|
height: 560px;
|
||||||
|
}
|
||||||
|
|
||||||
|
body.modal-open {
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-header {
|
||||||
|
display: flex;
|
||||||
|
justify-content: flex-start;
|
||||||
|
gap: 6px;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
align-items: flex-start;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-header-main {
|
||||||
|
flex: 1 1 auto;
|
||||||
|
min-width: 220px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-actions {
|
||||||
|
display: flex;
|
||||||
|
align-items: flex-start;
|
||||||
|
gap: 6px;
|
||||||
|
margin-left: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-action-btn {
|
||||||
|
white-space: nowrap;
|
||||||
|
font-family: "Tahoma", "MS Sans Serif", sans-serif;
|
||||||
|
font-size: 11px;
|
||||||
|
padding: 4px 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-meta {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
gap: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-status {
|
||||||
|
display: inline-flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 4px;
|
||||||
|
padding: 1px 6px;
|
||||||
|
border-radius: 3px;
|
||||||
|
font-size: 10px;
|
||||||
|
line-height: 1.3;
|
||||||
|
border: 1px solid #c4a3a3;
|
||||||
|
background: #fff6f6;
|
||||||
|
color: #6b1f1f;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-status::before {
|
||||||
|
content: "⚠";
|
||||||
|
font-size: 10px;
|
||||||
|
line-height: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-status--deleted {
|
||||||
|
border-color: #d1a6a6;
|
||||||
|
background: #fff8f8;
|
||||||
|
color: #6b1f1f;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-launch-btn {
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-node-label {
|
||||||
|
text-shadow: -1px -1px 0 #fff, 1px -1px 0 #fff, -1px 1px 0 #fff, 1px 1px 0 #fff;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-nodes circle {
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-legend {
|
||||||
|
margin: 12px 0;
|
||||||
|
font-size: 11px;
|
||||||
|
background: Window;
|
||||||
|
border: 1px solid #919b9c;
|
||||||
|
padding: 8px 10px;
|
||||||
|
display: inline-flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 4px;
|
||||||
|
box-shadow: inset -1px -1px #0a0a0a, inset 1px 1px #fff;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-legend-section {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-legend-title {
|
||||||
|
font-weight: bold;
|
||||||
|
color: #1f1f1f;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-legend-row {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-legend-swatch {
|
||||||
|
display: inline-block;
|
||||||
|
width: 18px;
|
||||||
|
height: 12px;
|
||||||
|
border: 1px solid #1f1f1f;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-legend-swatch--references {
|
||||||
|
background: #6c83c7;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-legend-swatch--referenced {
|
||||||
|
background: #c76c6c;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-legend-channel-list {
|
||||||
|
display: flex;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
gap: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-legend-channel {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-legend-channel-swatch {
|
||||||
|
width: 14px;
|
||||||
|
height: 14px;
|
||||||
|
background-repeat: repeat;
|
||||||
|
background-position: 0 0;
|
||||||
|
background-size: 6px 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-legend-channel--none .graph-legend-channel-swatch {
|
||||||
|
background-image: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-legend-channel--diag-forward .graph-legend-channel-swatch {
|
||||||
|
background-image: repeating-linear-gradient(
|
||||||
|
45deg,
|
||||||
|
rgba(0, 0, 0, 0.35) 0,
|
||||||
|
rgba(0, 0, 0, 0.35) 2px,
|
||||||
|
transparent 2px,
|
||||||
|
transparent 4px
|
||||||
|
);
|
||||||
|
background-blend-mode: multiply;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-legend-channel--diag-back .graph-legend-channel-swatch {
|
||||||
|
background-image: repeating-linear-gradient(
|
||||||
|
-45deg,
|
||||||
|
rgba(0, 0, 0, 0.35) 0,
|
||||||
|
rgba(0, 0, 0, 0.35) 2px,
|
||||||
|
transparent 2px,
|
||||||
|
transparent 4px
|
||||||
|
);
|
||||||
|
background-blend-mode: multiply;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-legend-channel--cross .graph-legend-channel-swatch {
|
||||||
|
background-image:
|
||||||
|
repeating-linear-gradient(
|
||||||
|
45deg,
|
||||||
|
rgba(0, 0, 0, 0.25) 0,
|
||||||
|
rgba(0, 0, 0, 0.25) 2px,
|
||||||
|
transparent 2px,
|
||||||
|
transparent 4px
|
||||||
|
),
|
||||||
|
repeating-linear-gradient(
|
||||||
|
-45deg,
|
||||||
|
rgba(0, 0, 0, 0.25) 0,
|
||||||
|
rgba(0, 0, 0, 0.25) 2px,
|
||||||
|
transparent 2px,
|
||||||
|
transparent 4px
|
||||||
|
);
|
||||||
|
background-blend-mode: multiply;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-legend-channel--dots .graph-legend-channel-swatch {
|
||||||
|
background-image: radial-gradient(rgba(0, 0, 0, 0.35) 30%, transparent 31%);
|
||||||
|
background-size: 6px 6px;
|
||||||
|
background-blend-mode: multiply;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-legend-note {
|
||||||
|
font-size: 10px;
|
||||||
|
color: #555;
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
.title-bar-link {
|
||||||
|
display: inline-block;
|
||||||
|
color: inherit;
|
||||||
|
text-decoration: none;
|
||||||
|
font-size: 11px;
|
||||||
|
padding: 2px 6px;
|
||||||
|
border: 1px solid;
|
||||||
|
border-color: ButtonHighlight ButtonShadow ButtonShadow ButtonHighlight;
|
||||||
|
background: ButtonFace;
|
||||||
|
}
|
||||||
|
|
||||||
|
.title-bar-controls #aboutBtn {
|
||||||
|
font-weight: bold;
|
||||||
|
font-size: 12px;
|
||||||
|
padding: 0 6px;
|
||||||
|
margin-right: 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.toggle-item {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.toggle-help {
|
||||||
|
font-size: 10px;
|
||||||
|
color: #555;
|
||||||
|
}
|
||||||
|
|
||||||
|
.about-panel {
|
||||||
|
position: fixed;
|
||||||
|
top: 20px;
|
||||||
|
right: 20px;
|
||||||
|
width: 280px;
|
||||||
|
background: Window;
|
||||||
|
border: 2px solid #919b9c;
|
||||||
|
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.25);
|
||||||
|
z-index: 2100;
|
||||||
|
font-size: 11px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.about-panel__header {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
padding: 6px 8px;
|
||||||
|
background: #0055aa;
|
||||||
|
color: #fff;
|
||||||
|
}
|
||||||
|
|
||||||
|
.about-panel__body {
|
||||||
|
padding: 8px;
|
||||||
|
background: Window;
|
||||||
|
color: #000;
|
||||||
|
}
|
||||||
|
|
||||||
|
.about-panel__header button {
|
||||||
|
border: none;
|
||||||
|
background: transparent;
|
||||||
|
color: inherit;
|
||||||
|
font-weight: bold;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
/* Results styling */
|
/* Results styling */
|
||||||
#results .item {
|
#results .item {
|
||||||
background: Window;
|
background: Window;
|
||||||
@ -227,6 +520,7 @@ body.dimmed {
|
|||||||
max-width: 100%;
|
max-width: 100%;
|
||||||
overflow: hidden;
|
overflow: hidden;
|
||||||
word-wrap: break-word;
|
word-wrap: break-word;
|
||||||
|
box-sizing: border-box;
|
||||||
box-shadow: 2px 2px 0 rgba(0, 0, 0, 0.15);
|
box-shadow: 2px 2px 0 rgba(0, 0, 0, 0.15);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -243,7 +537,9 @@ body.dimmed {
|
|||||||
.window-body {
|
.window-body {
|
||||||
max-width: 100%;
|
max-width: 100%;
|
||||||
overflow-x: hidden;
|
overflow-x: hidden;
|
||||||
margin: 1rem;
|
margin: 0;
|
||||||
|
padding: 1rem;
|
||||||
|
box-sizing: border-box;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Badges */
|
/* Badges */
|
||||||
@ -267,6 +563,14 @@ body.dimmed {
|
|||||||
word-break: keep-all;
|
word-break: keep-all;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.badge--transcript-primary {
|
||||||
|
background: #0b6efd;
|
||||||
|
}
|
||||||
|
|
||||||
|
.badge--transcript-secondary {
|
||||||
|
background: #8f4bff;
|
||||||
|
}
|
||||||
|
|
||||||
.badge-clickable {
|
.badge-clickable {
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
}
|
}
|
||||||
@ -297,9 +601,14 @@ body.dimmed {
|
|||||||
}
|
}
|
||||||
|
|
||||||
.highlight-row {
|
.highlight-row {
|
||||||
padding: 4px;
|
padding: 4px 6px;
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
border: 1px solid transparent;
|
border: 1px solid transparent;
|
||||||
|
display: flex;
|
||||||
|
align-items: flex-start;
|
||||||
|
gap: 8px;
|
||||||
|
max-width: 100%;
|
||||||
|
box-sizing: border-box;
|
||||||
}
|
}
|
||||||
|
|
||||||
.highlight-row:hover {
|
.highlight-row:hover {
|
||||||
@ -308,6 +617,77 @@ body.dimmed {
|
|||||||
border: 1px dotted WindowText;
|
border: 1px dotted WindowText;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.highlight-text {
|
||||||
|
flex: 1 1 auto;
|
||||||
|
word-break: break-word;
|
||||||
|
overflow-wrap: anywhere;
|
||||||
|
}
|
||||||
|
|
||||||
|
.highlight-source-indicator {
|
||||||
|
width: 10px;
|
||||||
|
height: 10px;
|
||||||
|
border-radius: 2px;
|
||||||
|
border: 1px solid transparent;
|
||||||
|
margin-left: auto;
|
||||||
|
flex: 0 0 auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.highlight-source-indicator--primary {
|
||||||
|
background: #0b6efd;
|
||||||
|
border-color: #084bb5;
|
||||||
|
}
|
||||||
|
|
||||||
|
.highlight-source-indicator--secondary {
|
||||||
|
background: #8f4bff;
|
||||||
|
border-color: #5d2db3;
|
||||||
|
}
|
||||||
|
|
||||||
|
.vector-chunk {
|
||||||
|
margin-top: 8px;
|
||||||
|
padding: 8px;
|
||||||
|
background: #f3f7ff;
|
||||||
|
border: 1px solid #c7d0e2;
|
||||||
|
font-size: 11px;
|
||||||
|
line-height: 1.5;
|
||||||
|
word-break: break-word;
|
||||||
|
}
|
||||||
|
|
||||||
|
@media screen and (max-width: 640px) {
|
||||||
|
.result-header {
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-header-main {
|
||||||
|
flex: 1 1 auto;
|
||||||
|
min-width: 0;
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-actions {
|
||||||
|
width: auto;
|
||||||
|
align-self: flex-start;
|
||||||
|
justify-content: flex-start;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
gap: 4px;
|
||||||
|
margin-left: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-action-btn {
|
||||||
|
width: 100%;
|
||||||
|
text-align: left;
|
||||||
|
}
|
||||||
|
|
||||||
|
.highlight-row {
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.highlight-source-indicator {
|
||||||
|
align-self: flex-end;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
mark {
|
mark {
|
||||||
background: yellow;
|
background: yellow;
|
||||||
color: black;
|
color: black;
|
||||||
@ -334,6 +714,10 @@ mark {
|
|||||||
border-bottom: 1px solid ButtonShadow;
|
border-bottom: 1px solid ButtonShadow;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.transcript-segment--matched {
|
||||||
|
background: #fff6cc;
|
||||||
|
}
|
||||||
|
|
||||||
.transcript-segment:last-child {
|
.transcript-segment:last-child {
|
||||||
border-bottom: none;
|
border-bottom: none;
|
||||||
margin-bottom: 0;
|
margin-bottom: 0;
|
||||||
|
|||||||
46
static/vector.html
Normal file
46
static/vector.html
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
|
<title>TLC Vector Search</title>
|
||||||
|
<link rel="icon" href="/static/favicon.png" type="image/png" />
|
||||||
|
<link rel="stylesheet" href="https://unpkg.com/xp.css" />
|
||||||
|
<link rel="stylesheet" href="/static/style.css" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="window" style="max-width: 1200px; margin: 20px auto;">
|
||||||
|
<div class="title-bar">
|
||||||
|
<div class="title-bar-text">Vector Search (Experimental)</div>
|
||||||
|
<div class="title-bar-controls">
|
||||||
|
<a class="title-bar-link" href="/">⬅ Back to Search</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="window-body">
|
||||||
|
<p>Enter a natural language prompt; results come from the Qdrant vector index.</p>
|
||||||
|
|
||||||
|
<fieldset>
|
||||||
|
<legend>Vector Query</legend>
|
||||||
|
<div class="field-row" style="margin-bottom: 8px;">
|
||||||
|
<label for="vectorQuery" style="width: 60px;">Query:</label>
|
||||||
|
<input id="vectorQuery" type="text" placeholder="Describe what you are looking for" style="flex: 1;" />
|
||||||
|
<button id="vectorSearchBtn">Search</button>
|
||||||
|
</div>
|
||||||
|
</fieldset>
|
||||||
|
|
||||||
|
<div id="vectorMeta" style="margin-top: 12px; font-size: 11px;"></div>
|
||||||
|
|
||||||
|
<fieldset style="margin-top: 16px;">
|
||||||
|
<legend>Results</legend>
|
||||||
|
<div id="vectorResults"></div>
|
||||||
|
</fieldset>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="status-bar">
|
||||||
|
<p class="status-bar-field">Experimental mode • Qdrant</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script src="/static/vector.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
423
static/vector.js
Normal file
423
static/vector.js
Normal file
@ -0,0 +1,423 @@
|
|||||||
|
(() => {
|
||||||
|
const queryInput = document.getElementById("vectorQuery");
|
||||||
|
const searchBtn = document.getElementById("vectorSearchBtn");
|
||||||
|
const resultsDiv = document.getElementById("vectorResults");
|
||||||
|
const metaDiv = document.getElementById("vectorMeta");
|
||||||
|
const transcriptCache = new Map();
|
||||||
|
|
||||||
|
if (!queryInput || !searchBtn || !resultsDiv || !metaDiv) {
|
||||||
|
console.error("Vector search elements missing");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Utility helpers **/
|
||||||
|
const escapeHtml = (str) =>
|
||||||
|
(str || "").replace(/[&<>"']/g, (ch) => {
|
||||||
|
switch (ch) {
|
||||||
|
case "&":
|
||||||
|
return "&";
|
||||||
|
case "<":
|
||||||
|
return "<";
|
||||||
|
case ">":
|
||||||
|
return ">";
|
||||||
|
case '"':
|
||||||
|
return """;
|
||||||
|
case "'":
|
||||||
|
return "'";
|
||||||
|
default:
|
||||||
|
return ch;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const fmtDate = (value) => {
|
||||||
|
try {
|
||||||
|
return (value || "").split("T")[0];
|
||||||
|
} catch {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const fmtSimilarity = (score) => {
|
||||||
|
if (typeof score !== "number" || Number.isNaN(score)) return "";
|
||||||
|
return score.toFixed(3);
|
||||||
|
};
|
||||||
|
|
||||||
|
const getVideoStatus = (item) =>
|
||||||
|
(item && item.video_status ? String(item.video_status).toLowerCase() : "");
|
||||||
|
const isLikelyDeleted = (item) => getVideoStatus(item) === "deleted";
|
||||||
|
|
||||||
|
const formatTimestamp = (seconds) => {
|
||||||
|
if (!seconds && seconds !== 0) return "00:00";
|
||||||
|
const hours = Math.floor(seconds / 3600);
|
||||||
|
const mins = Math.floor((seconds % 3600) / 60);
|
||||||
|
const secs = Math.floor(seconds % 60);
|
||||||
|
if (hours > 0) {
|
||||||
|
return `${hours}:${mins.toString().padStart(2, "0")}:${secs
|
||||||
|
.toString()
|
||||||
|
.padStart(2, "0")}`;
|
||||||
|
}
|
||||||
|
return `${mins}:${secs.toString().padStart(2, "0")}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
const formatSegmentTimestamp = (segment) => {
|
||||||
|
if (!segment) return "";
|
||||||
|
if (segment.timestamp) return segment.timestamp;
|
||||||
|
const fields = [
|
||||||
|
segment.start_seconds,
|
||||||
|
segment.start,
|
||||||
|
segment.offset,
|
||||||
|
segment.time,
|
||||||
|
];
|
||||||
|
for (const value of fields) {
|
||||||
|
if (value == null) continue;
|
||||||
|
const num = parseFloat(value);
|
||||||
|
if (!Number.isNaN(num)) {
|
||||||
|
return formatTimestamp(num);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
};
|
||||||
|
|
||||||
|
const serializeTranscriptSection = (label, parts, fullText) => {
|
||||||
|
let content = "";
|
||||||
|
if (typeof fullText === "string" && fullText.trim()) {
|
||||||
|
content = fullText.trim();
|
||||||
|
} else if (Array.isArray(parts) && parts.length) {
|
||||||
|
content = parts
|
||||||
|
.map((segment) => {
|
||||||
|
const ts = formatSegmentTimestamp(segment);
|
||||||
|
const text = segment && segment.text ? segment.text : "";
|
||||||
|
return ts ? `[${ts}] ${text}` : text;
|
||||||
|
})
|
||||||
|
.join("\n")
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
if (!content) return "";
|
||||||
|
return `${label}\n${content}\n`;
|
||||||
|
};
|
||||||
|
|
||||||
|
const fetchTranscriptData = async (videoId) => {
|
||||||
|
if (!videoId) return null;
|
||||||
|
if (transcriptCache.has(videoId)) {
|
||||||
|
return transcriptCache.get(videoId);
|
||||||
|
}
|
||||||
|
const res = await fetch(`/api/transcript?video_id=${encodeURIComponent(videoId)}`);
|
||||||
|
if (!res.ok) {
|
||||||
|
throw new Error(`Transcript fetch failed (${res.status})`);
|
||||||
|
}
|
||||||
|
const data = await res.json();
|
||||||
|
transcriptCache.set(videoId, data);
|
||||||
|
return data;
|
||||||
|
};
|
||||||
|
|
||||||
|
const buildTranscriptDownloadText = (item, transcriptData) => {
|
||||||
|
const lines = [];
|
||||||
|
lines.push(`Title: ${item.title || "Untitled"}`);
|
||||||
|
if (item.channel_name) lines.push(`Channel: ${item.channel_name}`);
|
||||||
|
if (item.date) lines.push(`Published: ${item.date}`);
|
||||||
|
if (item.url) lines.push(`URL: ${item.url}`);
|
||||||
|
lines.push("");
|
||||||
|
|
||||||
|
const primaryText = serializeTranscriptSection(
|
||||||
|
"Primary Transcript",
|
||||||
|
transcriptData.transcript_parts,
|
||||||
|
transcriptData.transcript_full
|
||||||
|
);
|
||||||
|
const secondaryText = serializeTranscriptSection(
|
||||||
|
"Secondary Transcript",
|
||||||
|
transcriptData.transcript_secondary_parts,
|
||||||
|
transcriptData.transcript_secondary_full
|
||||||
|
);
|
||||||
|
|
||||||
|
if (primaryText) lines.push(primaryText);
|
||||||
|
if (secondaryText) lines.push(secondaryText);
|
||||||
|
if (!primaryText && !secondaryText) {
|
||||||
|
lines.push("No transcript available.");
|
||||||
|
}
|
||||||
|
return lines.join("\n").trim() + "\n";
|
||||||
|
};
|
||||||
|
|
||||||
|
const flashButtonMessage = (button, message, duration = 1800) => {
|
||||||
|
if (!button) return;
|
||||||
|
const original = button.dataset.originalLabel || button.textContent;
|
||||||
|
button.dataset.originalLabel = original;
|
||||||
|
button.textContent = message;
|
||||||
|
setTimeout(() => {
|
||||||
|
button.textContent = button.dataset.originalLabel || original;
|
||||||
|
}, duration);
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleTranscriptDownload = async (item, button) => {
|
||||||
|
if (!item.video_id) return;
|
||||||
|
button.disabled = true;
|
||||||
|
try {
|
||||||
|
const transcriptData = await fetchTranscriptData(item.video_id);
|
||||||
|
if (!transcriptData) throw new Error("Transcript unavailable");
|
||||||
|
const text = buildTranscriptDownloadText(item, transcriptData);
|
||||||
|
const blob = new Blob([text], { type: "text/plain" });
|
||||||
|
const url = URL.createObjectURL(blob);
|
||||||
|
const link = document.createElement("a");
|
||||||
|
link.href = url;
|
||||||
|
link.download = `${item.video_id}.txt`;
|
||||||
|
document.body.appendChild(link);
|
||||||
|
link.click();
|
||||||
|
document.body.removeChild(link);
|
||||||
|
URL.revokeObjectURL(url);
|
||||||
|
flashButtonMessage(button, "Downloaded");
|
||||||
|
} catch (err) {
|
||||||
|
console.error("Download failed", err);
|
||||||
|
alert("Unable to download transcript right now.");
|
||||||
|
} finally {
|
||||||
|
button.disabled = false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const formatMlaDate = (value) => {
|
||||||
|
if (!value) return "n.d.";
|
||||||
|
const parsed = new Date(value);
|
||||||
|
if (Number.isNaN(parsed.valueOf())) return value;
|
||||||
|
const months = [
|
||||||
|
"Jan.", "Feb.", "Mar.", "Apr.", "May", "June",
|
||||||
|
"July", "Aug.", "Sept.", "Oct.", "Nov.", "Dec.",
|
||||||
|
];
|
||||||
|
return `${parsed.getDate()} ${months[parsed.getMonth()]} ${parsed.getFullYear()}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
const buildMlaCitation = (item) => {
|
||||||
|
const channel = (item.channel_name || item.channel_id || "Unknown").trim();
|
||||||
|
const title = (item.title || "Untitled").trim();
|
||||||
|
const url = item.url || "";
|
||||||
|
const publishDate = formatMlaDate(item.date);
|
||||||
|
const today = formatMlaDate(new Date().toISOString().split("T")[0]);
|
||||||
|
return `${channel}. "${title}." YouTube, uploaded by ${channel}, ${publishDate}, ${url}. Accessed ${today}.`;
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleCopyCitation = async (item, button) => {
|
||||||
|
const citation = buildMlaCitation(item);
|
||||||
|
try {
|
||||||
|
if (navigator.clipboard && window.isSecureContext) {
|
||||||
|
await navigator.clipboard.writeText(citation);
|
||||||
|
} else {
|
||||||
|
const textarea = document.createElement("textarea");
|
||||||
|
textarea.value = citation;
|
||||||
|
textarea.style.position = "fixed";
|
||||||
|
textarea.style.opacity = "0";
|
||||||
|
document.body.appendChild(textarea);
|
||||||
|
textarea.select();
|
||||||
|
document.execCommand("copy");
|
||||||
|
document.body.removeChild(textarea);
|
||||||
|
}
|
||||||
|
flashButtonMessage(button, "Copied!");
|
||||||
|
} catch (err) {
|
||||||
|
console.error("Citation copy failed", err);
|
||||||
|
alert(citation);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Rendering helpers **/
|
||||||
|
const createHighlightRows = (entries) => {
|
||||||
|
if (!Array.isArray(entries) || !entries.length) return null;
|
||||||
|
const container = document.createElement("div");
|
||||||
|
container.className = "transcript highlight-list";
|
||||||
|
entries.forEach((entry) => {
|
||||||
|
if (!entry) return;
|
||||||
|
const row = document.createElement("div");
|
||||||
|
row.className = "highlight-row";
|
||||||
|
const textBlock = document.createElement("div");
|
||||||
|
textBlock.className = "highlight-text";
|
||||||
|
const html = entry.html || entry.text || entry;
|
||||||
|
textBlock.innerHTML = html || "";
|
||||||
|
row.appendChild(textBlock);
|
||||||
|
const indicator = document.createElement("span");
|
||||||
|
indicator.className = "highlight-source-indicator highlight-source-indicator--primary";
|
||||||
|
indicator.title = "Vector highlight";
|
||||||
|
row.appendChild(indicator);
|
||||||
|
container.appendChild(row);
|
||||||
|
});
|
||||||
|
return container;
|
||||||
|
};
|
||||||
|
|
||||||
|
const createActions = (item) => {
|
||||||
|
const actions = document.createElement("div");
|
||||||
|
actions.className = "result-actions";
|
||||||
|
const downloadBtn = document.createElement("button");
|
||||||
|
downloadBtn.type = "button";
|
||||||
|
downloadBtn.className = "result-action-btn";
|
||||||
|
downloadBtn.textContent = "Download transcript";
|
||||||
|
downloadBtn.addEventListener("click", () => handleTranscriptDownload(item, downloadBtn));
|
||||||
|
actions.appendChild(downloadBtn);
|
||||||
|
|
||||||
|
const citationBtn = document.createElement("button");
|
||||||
|
citationBtn.type = "button";
|
||||||
|
citationBtn.className = "result-action-btn";
|
||||||
|
citationBtn.textContent = "Copy citation";
|
||||||
|
citationBtn.addEventListener("click", () => handleCopyCitation(item, citationBtn));
|
||||||
|
actions.appendChild(citationBtn);
|
||||||
|
|
||||||
|
const graphBtn = document.createElement("button");
|
||||||
|
graphBtn.type = "button";
|
||||||
|
graphBtn.className = "result-action-btn graph-launch-btn";
|
||||||
|
graphBtn.textContent = "Graph";
|
||||||
|
graphBtn.disabled = !item.video_id;
|
||||||
|
graphBtn.addEventListener("click", () => {
|
||||||
|
if (!item.video_id) return;
|
||||||
|
const target = `/graph?video_id=${encodeURIComponent(item.video_id)}`;
|
||||||
|
window.open(target, "_blank", "noopener");
|
||||||
|
});
|
||||||
|
actions.appendChild(graphBtn);
|
||||||
|
|
||||||
|
return actions;
|
||||||
|
};
|
||||||
|
|
||||||
|
const renderVectorResults = (payload) => {
|
||||||
|
resultsDiv.innerHTML = "";
|
||||||
|
const items = payload.items || [];
|
||||||
|
if (!items.length) {
|
||||||
|
metaDiv.textContent = "No vector matches for this prompt.";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
metaDiv.textContent = `Matches: ${items.length} (vector mode)`;
|
||||||
|
|
||||||
|
items.forEach((item) => {
|
||||||
|
const el = document.createElement("div");
|
||||||
|
el.className = "item";
|
||||||
|
const header = document.createElement("div");
|
||||||
|
header.className = "result-header";
|
||||||
|
const headerMain = document.createElement("div");
|
||||||
|
headerMain.className = "result-header-main";
|
||||||
|
const titleEl = document.createElement("strong");
|
||||||
|
titleEl.innerHTML = item.titleHtml || escapeHtml(item.title || "Untitled");
|
||||||
|
headerMain.appendChild(titleEl);
|
||||||
|
|
||||||
|
const metaLine = document.createElement("div");
|
||||||
|
metaLine.className = "muted result-meta";
|
||||||
|
const channelLabel = item.channel_name || item.channel_id || "Unknown";
|
||||||
|
const dateLabel = fmtDate(item.date);
|
||||||
|
let durationSeconds = null;
|
||||||
|
if (typeof item.duration === "number") {
|
||||||
|
durationSeconds = item.duration;
|
||||||
|
} else if (typeof item.duration === "string" && item.duration.trim()) {
|
||||||
|
const parsed = parseFloat(item.duration);
|
||||||
|
if (!Number.isNaN(parsed)) {
|
||||||
|
durationSeconds = parsed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const durationLabel = durationSeconds != null ? ` • ${formatTimestamp(durationSeconds)}` : "";
|
||||||
|
metaLine.textContent = channelLabel ? `${channelLabel} • ${dateLabel}${durationLabel}` : `${dateLabel}${durationLabel}`;
|
||||||
|
if (isLikelyDeleted(item)) {
|
||||||
|
metaLine.appendChild(document.createTextNode(" "));
|
||||||
|
const statusEl = document.createElement("span");
|
||||||
|
statusEl.className = "result-status result-status--deleted";
|
||||||
|
statusEl.textContent = "Likely deleted";
|
||||||
|
metaLine.appendChild(statusEl);
|
||||||
|
}
|
||||||
|
headerMain.appendChild(metaLine);
|
||||||
|
|
||||||
|
if (item.url) {
|
||||||
|
const linkLine = document.createElement("div");
|
||||||
|
linkLine.className = "muted";
|
||||||
|
const anchor = document.createElement("a");
|
||||||
|
anchor.href = item.url;
|
||||||
|
anchor.target = "_blank";
|
||||||
|
anchor.rel = "noopener";
|
||||||
|
anchor.textContent = "Open on YouTube";
|
||||||
|
linkLine.appendChild(anchor);
|
||||||
|
headerMain.appendChild(linkLine);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof item.distance === "number") {
|
||||||
|
const scoreLine = document.createElement("div");
|
||||||
|
scoreLine.className = "muted";
|
||||||
|
scoreLine.textContent = `Similarity score: ${fmtSimilarity(item.distance)}`;
|
||||||
|
headerMain.appendChild(scoreLine);
|
||||||
|
}
|
||||||
|
|
||||||
|
header.appendChild(headerMain);
|
||||||
|
header.appendChild(createActions(item));
|
||||||
|
el.appendChild(header);
|
||||||
|
|
||||||
|
if (item.descriptionHtml || item.description) {
|
||||||
|
const desc = document.createElement("div");
|
||||||
|
desc.className = "muted description-block";
|
||||||
|
desc.innerHTML = item.descriptionHtml || escapeHtml(item.description);
|
||||||
|
el.appendChild(desc);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (item.chunkText) {
|
||||||
|
const chunkBlock = document.createElement("div");
|
||||||
|
chunkBlock.className = "vector-chunk";
|
||||||
|
if (item.chunkTimestamp && item.url) {
|
||||||
|
const tsObj =
|
||||||
|
typeof item.chunkTimestamp === "object"
|
||||||
|
? item.chunkTimestamp
|
||||||
|
: { timestamp: item.chunkTimestamp };
|
||||||
|
const ts = formatSegmentTimestamp(tsObj);
|
||||||
|
const tsLink = document.createElement("a");
|
||||||
|
const paramValue =
|
||||||
|
typeof item.chunkTimestamp === "number"
|
||||||
|
? Math.floor(item.chunkTimestamp)
|
||||||
|
: item.chunkTimestamp;
|
||||||
|
tsLink.href = `${item.url}${item.url.includes("?") ? "&" : "?"}t=${encodeURIComponent(
|
||||||
|
paramValue
|
||||||
|
)}`;
|
||||||
|
tsLink.target = "_blank";
|
||||||
|
tsLink.rel = "noopener";
|
||||||
|
tsLink.textContent = ts ? `[${ts}]` : "[timestamp]";
|
||||||
|
chunkBlock.appendChild(tsLink);
|
||||||
|
chunkBlock.appendChild(document.createTextNode(" "));
|
||||||
|
}
|
||||||
|
const chunkTextSpan = document.createElement("span");
|
||||||
|
chunkTextSpan.textContent = item.chunkText;
|
||||||
|
chunkBlock.appendChild(chunkTextSpan);
|
||||||
|
el.appendChild(chunkBlock);
|
||||||
|
}
|
||||||
|
|
||||||
|
const highlights = createHighlightRows(item.toHighlight);
|
||||||
|
if (highlights) {
|
||||||
|
el.appendChild(highlights);
|
||||||
|
}
|
||||||
|
|
||||||
|
resultsDiv.appendChild(el);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Search handler **/
|
||||||
|
const runVectorSearch = async () => {
|
||||||
|
const query = queryInput.value.trim();
|
||||||
|
if (!query) {
|
||||||
|
alert("Please enter a query.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
metaDiv.textContent = "Searching vector index…";
|
||||||
|
resultsDiv.innerHTML = "";
|
||||||
|
searchBtn.disabled = true;
|
||||||
|
try {
|
||||||
|
const res = await fetch("/api/vector-search", {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({ query }),
|
||||||
|
});
|
||||||
|
if (!res.ok) {
|
||||||
|
throw new Error(`Vector search failed (${res.status})`);
|
||||||
|
}
|
||||||
|
const data = await res.json();
|
||||||
|
if (data.error) {
|
||||||
|
metaDiv.textContent = "Vector search unavailable.";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
renderVectorResults(data);
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err);
|
||||||
|
metaDiv.textContent = "Vector search unavailable.";
|
||||||
|
} finally {
|
||||||
|
searchBtn.disabled = false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
searchBtn.addEventListener("click", runVectorSearch);
|
||||||
|
queryInput.addEventListener("keypress", (event) => {
|
||||||
|
if (event.key === "Enter") {
|
||||||
|
runVectorSearch();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
})();
|
||||||
188
sync_qdrant_channels.py
Normal file
188
sync_qdrant_channels.py
Normal file
@ -0,0 +1,188 @@
|
|||||||
|
"""
|
||||||
|
Utility to backfill channel titles/names inside the Qdrant payloads.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python -m python_app.sync_qdrant_channels \
|
||||||
|
--batch-size 512 \
|
||||||
|
--max-batches 200 \
|
||||||
|
--dry-run
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Iterable, List, Optional, Set, Tuple
|
||||||
|
import time
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from .config import CONFIG
|
||||||
|
from .search_app import _ensure_client
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def chunked(iterable: Iterable, size: int):
|
||||||
|
chunk: List = []
|
||||||
|
for item in iterable:
|
||||||
|
chunk.append(item)
|
||||||
|
if len(chunk) >= size:
|
||||||
|
yield chunk
|
||||||
|
chunk = []
|
||||||
|
if chunk:
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_channels(channel_ids: Iterable[str]) -> Dict[str, str]:
|
||||||
|
client = _ensure_client(CONFIG)
|
||||||
|
ids = list(set(channel_ids))
|
||||||
|
if not ids:
|
||||||
|
return {}
|
||||||
|
body = {
|
||||||
|
"size": len(ids) * 2,
|
||||||
|
"_source": ["channel_id", "channel_name"],
|
||||||
|
"query": {"terms": {"channel_id.keyword": ids}},
|
||||||
|
}
|
||||||
|
response = client.search(index=CONFIG.elastic.index, body=body)
|
||||||
|
resolved: Dict[str, str] = {}
|
||||||
|
for hit in response.get("hits", {}).get("hits", []):
|
||||||
|
source = hit.get("_source") or {}
|
||||||
|
cid = source.get("channel_id")
|
||||||
|
cname = source.get("channel_name")
|
||||||
|
if cid and cname and cid not in resolved:
|
||||||
|
resolved[cid] = cname
|
||||||
|
return resolved
|
||||||
|
|
||||||
|
|
||||||
|
def upsert_channel_payload(
|
||||||
|
qdrant_url: str,
|
||||||
|
collection: str,
|
||||||
|
channel_id: str,
|
||||||
|
channel_name: str,
|
||||||
|
*,
|
||||||
|
dry_run: bool = False,
|
||||||
|
) -> bool:
|
||||||
|
"""Set channel_name/channel_title for all vectors with this channel_id."""
|
||||||
|
payload = {"channel_name": channel_name, "channel_title": channel_name}
|
||||||
|
body = {
|
||||||
|
"payload": payload,
|
||||||
|
"filter": {"must": [{"key": "channel_id", "match": {"value": channel_id}}]},
|
||||||
|
}
|
||||||
|
LOGGER.info("Updating channel_id=%s -> %s", channel_id, channel_name)
|
||||||
|
if dry_run:
|
||||||
|
return True
|
||||||
|
resp = requests.post(
|
||||||
|
f"{qdrant_url}/collections/{collection}/points/payload",
|
||||||
|
json=body,
|
||||||
|
timeout=120,
|
||||||
|
)
|
||||||
|
if resp.status_code >= 400:
|
||||||
|
LOGGER.error("Failed to update %s: %s", channel_id, resp.text)
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def scroll_missing_payloads(
|
||||||
|
qdrant_url: str,
|
||||||
|
collection: str,
|
||||||
|
batch_size: int,
|
||||||
|
*,
|
||||||
|
max_points: Optional[int] = None,
|
||||||
|
) -> Iterable[List[Tuple[str, Dict[str, any]]]]:
|
||||||
|
"""Yield batches of (point_id, payload) missing channel names."""
|
||||||
|
fetched = 0
|
||||||
|
next_page = None
|
||||||
|
while True:
|
||||||
|
current_limit = batch_size
|
||||||
|
while True:
|
||||||
|
body = {
|
||||||
|
"limit": current_limit,
|
||||||
|
"with_payload": True,
|
||||||
|
"filter": {"must": [{"is_empty": {"key": "channel_name"}}]},
|
||||||
|
}
|
||||||
|
if next_page:
|
||||||
|
body["offset"] = next_page
|
||||||
|
try:
|
||||||
|
resp = requests.post(
|
||||||
|
f"{qdrant_url}/collections/{collection}/points/scroll",
|
||||||
|
json=body,
|
||||||
|
timeout=120,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
break
|
||||||
|
except requests.HTTPError as exc:
|
||||||
|
LOGGER.warning(
|
||||||
|
"Scroll request failed at limit=%s: %s", current_limit, exc
|
||||||
|
)
|
||||||
|
if current_limit <= 5:
|
||||||
|
raise
|
||||||
|
current_limit = max(5, current_limit // 2)
|
||||||
|
LOGGER.info("Reducing scroll batch size to %s", current_limit)
|
||||||
|
time.sleep(2)
|
||||||
|
except requests.RequestException as exc: # type: ignore[attr-defined]
|
||||||
|
LOGGER.warning("Transient scroll error: %s", exc)
|
||||||
|
time.sleep(2)
|
||||||
|
payload = resp.json().get("result", {})
|
||||||
|
points = payload.get("points", [])
|
||||||
|
if not points:
|
||||||
|
break
|
||||||
|
batch: List[Tuple[str, Dict[str, any]]] = []
|
||||||
|
for point in points:
|
||||||
|
pid = point.get("id")
|
||||||
|
p_payload = point.get("payload") or {}
|
||||||
|
batch.append((pid, p_payload))
|
||||||
|
yield batch
|
||||||
|
fetched += len(points)
|
||||||
|
if max_points and fetched >= max_points:
|
||||||
|
break
|
||||||
|
next_page = payload.get("next_page_offset")
|
||||||
|
if not next_page:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Backfill missing channel_name/channel_title in Qdrant payloads"
|
||||||
|
)
|
||||||
|
parser.add_argument("--batch-size", type=int, default=512)
|
||||||
|
parser.add_argument(
|
||||||
|
"--max-points",
|
||||||
|
type=int,
|
||||||
|
default=None,
|
||||||
|
help="Limit processing to the first N points for testing",
|
||||||
|
)
|
||||||
|
parser.add_argument("--dry-run", action="store_true")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
q_url = CONFIG.qdrant_url
|
||||||
|
collection = CONFIG.qdrant_collection
|
||||||
|
total_updates = 0
|
||||||
|
|
||||||
|
for batch in scroll_missing_payloads(
|
||||||
|
q_url, collection, args.batch_size, max_points=args.max_points
|
||||||
|
):
|
||||||
|
channel_ids: Set[str] = set()
|
||||||
|
for _, payload in batch:
|
||||||
|
cid = payload.get("channel_id")
|
||||||
|
if cid:
|
||||||
|
channel_ids.add(str(cid))
|
||||||
|
if not channel_ids:
|
||||||
|
continue
|
||||||
|
resolved = resolve_channels(channel_ids)
|
||||||
|
if not resolved:
|
||||||
|
LOGGER.warning("No channel names resolved for ids: %s", channel_ids)
|
||||||
|
continue
|
||||||
|
for cid, name in resolved.items():
|
||||||
|
if upsert_channel_payload(
|
||||||
|
q_url, collection, cid, name, dry_run=args.dry_run
|
||||||
|
):
|
||||||
|
total_updates += 1
|
||||||
|
LOGGER.info("Updated %s channel payloads so far", total_updates)
|
||||||
|
|
||||||
|
LOGGER.info("Finished. Total channel updates attempted: %s", total_updates)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
x
Reference in New Issue
Block a user