Initial commit

2025-11-02 01:14:36 -04:00 · 2025-11-02 01:14:36 -04:00 · fcdc6ecb9b
commit fcdc6ecb9b
13 changed files with 2883 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,60 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual environments
+.venv/
+venv/
+ENV/
+env/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# Environment variables
+.env
+.env.local
+
+# Elasticsearch data
+data/
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Logs
+*.log
+
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+
+# mypy
+.mypy_cache/
--- a/README.md
+++ b/README.md
@ -0,0 +1,87 @@
+# Python Search Toolkit (Rough Draft)
+
+This minimal Python implementation covers three core needs:
+
+1. **Collect transcripts** from YouTube channels.
+2. **Ingest transcripts/metadata** into Elasticsearch.
+3. **Expose a simple Flask search UI** that queries Elasticsearch directly.
+
+The code lives alongside the existing C# stack so you can experiment without
+touching production infrastructure.
+
+## Setup
+
+```bash
+python -m venv .venv
+source .venv/bin/activate
+pip install -r python_app/requirements.txt
+```
+
+Configure your environment as needed:
+
+```bash
+export ELASTIC_URL=http://localhost:9200
+export ELASTIC_INDEX=this_little_corner_py
+export ELASTIC_USERNAME=elastic          # optional
+export ELASTIC_PASSWORD=secret           # optional
+export ELASTIC_API_KEY=XXXX              # optional alternative auth
+export ELASTIC_CA_CERT=/path/to/ca.pem   # optional, for self-signed TLS
+export ELASTIC_VERIFY_CERTS=1            # set to 0 to skip verification (dev only)
+export ELASTIC_DEBUG=0                   # set to 1 for verbose request/response logging
+export LOCAL_DATA_DIR=./data/video_metadata  # defaults to this
+export YOUTUBE_API_KEY=AIza...           # required for live collection
+```
+
+## 1. Collect Transcripts
+
+```bash
+python -m python_app.transcript_collector \
+  --channel UCxxxx \
+  --output data/raw \
+  --max-pages 2
+```
+
+Each video becomes a JSON file containing metadata plus transcript segments
+(`TranscriptSegment`). Downloads require both `google-api-python-client` and
+`youtube-transcript-api`, as well as a valid `YOUTUBE_API_KEY`.
+
+> Already have cached JSON? You can skip this step and move straight to ingesting.
+
+## 2. Ingest Into Elasticsearch
+
+```bash
+python -m python_app.ingest \
+  --source data/video_metadata \
+  --index this_little_corner_py
+```
+
+The script walks the source directory, builds `bulk` requests, and creates the
+index with a lightweight mapping when needed. Authentication is handled via
+`ELASTIC_USERNAME` / `ELASTIC_PASSWORD` if set.
+
+## 3. Serve the Search Frontend
+
+```bash
+python -m python_app.search_app
+```
+
+Visit <http://localhost:8080/> and you’ll see a barebones UI that:
+
+- Lists channels via a terms aggregation.
+- Queries titles/descriptions/transcripts with toggleable exact, fuzzy, and phrase clauses plus optional date sorting.
+- Surfaces transcript highlights.
+- Lets you pull the full transcript for any result on demand.
+- Shows a stacked-by-channel timeline for each search query (with `/frequency` offering a standalone explorer) powered by D3.js.
+- Supports a query-string mode toggle so you can write advanced Lucene queries (e.g. `meaning OR purpose`, `meaning~2` for fuzzy matches, `title:(meaning crisis)`), while the default toggles stay AND-backed.
+
+## Integration Notes
+
+- All modules share configuration through `python_app.config.CONFIG`, so you can
+  fine-tune paths or credentials centrally.
+- The ingest flow reuses existing JSON schema from `data/video_metadata`, so no
+  re-download is necessary if you already have the dumps.
+- Everything is intentionally simple (no Celery, task queues, or custom auth) to
+  keep the draft approachable and easy to extend.
+
+Feel free to expand on this scaffold—add proper logging, schedule transcript
+updates, or flesh out the UI—once you’re happy with the baseline behaviour.
--- a/init.py
+++ b/init.py
@ -0,0 +1,11 @@
+"""
+Minimal Python toolkit for collecting YouTube transcripts, ingesting them into
+Elasticsearch, and serving a lightweight search API/front-end.
+
+Modules:
+    config: shared configuration helpers (Elastic endpoint, data paths, etc.).
+    transcript_collector: fetches channel metadata and transcripts.
+    ingest: pushes transcript JSON into Elasticsearch.
+    search_app: Flask app exposing simple search and transcript endpoints.
+"""
+
--- a/config.py
+++ b/config.py
@ -0,0 +1,81 @@
+"""
+Centralised configuration helpers for the Python search toolkit.
+
+Environment Variables:
+    ELASTIC_URL: Base URL to the Elasticsearch node (default: http://localhost:9200).
+    ELASTIC_USERNAME / ELASTIC_PASSWORD: Optional basic auth credentials.
+    ELASTIC_INDEX: Target index name (default: this_little_corner_py).
+    LOCAL_DATA_DIR: Root folder containing JSON metadata (default: ../data/video_metadata).
+    YOUTUBE_API_KEY: Optional API key for pulling metadata directly from YouTube.
+"""
+
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional
+
+
+@dataclass(frozen=True)
+class ElasticSettings:
+    url: str
+    username: Optional[str]
+    password: Optional[str]
+    index: str
+    ca_cert: Optional[Path]
+    verify_certs: bool
+    api_key: Optional[str]
+    debug: bool
+
+
+@dataclass(frozen=True)
+class DataSettings:
+    root: Path
+
+
+@dataclass(frozen=True)
+class YoutubeSettings:
+    api_key: Optional[str]
+
+
+@dataclass(frozen=True)
+class AppConfig:
+    elastic: ElasticSettings
+    data: DataSettings
+    youtube: YoutubeSettings
+
+
+def _env(name: str, default: Optional[str] = None) -> Optional[str]:
+    """Return an environment variable value with optional default."""
+    value = os.environ.get(name)
+    if value is None:
+        return default
+    stripped = value.strip()
+    return stripped or default
+
+
+def load_config() -> AppConfig:
+    """Collect configuration from environment variables."""
+    elastic = ElasticSettings(
+        url=_env("ELASTIC_URL", "http://localhost:9200"),
+        username=_env("ELASTIC_USERNAME"),
+        password=_env("ELASTIC_PASSWORD"),
+        index=_env("ELASTIC_INDEX", "this_little_corner_py"),
+        ca_cert=Path(_env("ELASTIC_CA_CERT")).expanduser() if _env("ELASTIC_CA_CERT") else None,
+        verify_certs=_env("ELASTIC_VERIFY_CERTS", "1") not in {"0", "false", "False"},
+        api_key=_env("ELASTIC_API_KEY"),
+        debug=_env("ELASTIC_DEBUG", "0") in {"1", "true", "True"},
+    )
+    data_root = Path(
+        _env(
+            "LOCAL_DATA_DIR",
+            Path(__file__).resolve().parents[1] / "data" / "video_metadata",
+        )
+    )
+    data = DataSettings(root=data_root)
+    youtube = YoutubeSettings(api_key=_env("YOUTUBE_API_KEY"))
+    return AppConfig(elastic=elastic, data=data, youtube=youtube)
+
+
+CONFIG = load_config()
--- a/ingest.py
+++ b/ingest.py
@ -0,0 +1,193 @@
+"""
+Utilities for indexing transcript JSON documents into Elasticsearch.
+
+Usage:
+    python -m python_app.ingest --source data/video_metadata --index corner
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import logging
+from pathlib import Path
+from typing import Dict, Iterable, Iterator, Optional
+
+from .config import CONFIG, AppConfig
+
+try:
+    from elasticsearch import Elasticsearch, helpers  # type: ignore
+except ImportError:  # pragma: no cover - dependency optional
+    Elasticsearch = None
+    helpers = None
+
+
+LOGGER = logging.getLogger(__name__)
+
+
+def _ensure_client(config: AppConfig) -> "Elasticsearch":
+    if Elasticsearch is None:
+        raise RuntimeError(
+            "elasticsearch package not installed. "
+            "Install elasticsearch>=7 to index documents."
+        )
+    kwargs = {}
+    if config.elastic.api_key:
+        kwargs["api_key"] = config.elastic.api_key
+    elif config.elastic.username and config.elastic.password:
+        kwargs["basic_auth"] = (
+            config.elastic.username,
+            config.elastic.password,
+        )
+    if config.elastic.ca_cert:
+        kwargs["ca_certs"] = str(config.elastic.ca_cert)
+    kwargs["verify_certs"] = config.elastic.verify_certs
+    return Elasticsearch(config.elastic.url, **kwargs)
+
+
+def iter_json_documents(data_root: Path) -> Iterator[Dict]:
+    """Yield JSON objects from the provided directory tree."""
+    if not data_root.exists():
+        raise FileNotFoundError(f"{data_root} does not exist")
+    for path in sorted(data_root.rglob("*.json")):
+        try:
+            with path.open("r", encoding="utf-8") as handle:
+                doc = json.load(handle)
+                doc.setdefault("video_id", path.stem)
+                yield doc
+        except Exception as exc:
+            LOGGER.warning("Skipping %s: %s", path, exc)
+
+
+def build_bulk_actions(
+    docs: Iterable[Dict], *, index: Optional[str] = None
+) -> Iterator[Dict]:
+    """Translate raw JSON dictionaries into Elasticsearch bulk actions."""
+    for doc in docs:
+        video_id = doc.get("video_id")
+        if not video_id:
+            continue
+        parts = doc.get("transcript_parts") or doc.get("transcript") or []
+        transcript_full = doc.get("transcript_full")
+        if not transcript_full and isinstance(parts, list):
+            transcript_full = " ".join(
+                segment.get("text", "") if isinstance(segment, dict) else str(segment)
+                for segment in parts
+            ).strip()
+        yield {
+            "_id": video_id,
+            "_index": index or CONFIG.elastic.index,
+            "_op_type": "index",
+            "_source": {
+                "video_id": video_id,
+                "channel_id": doc.get("channel_id"),
+                "channel_name": doc.get("channel_name"),
+                "title": doc.get("title"),
+                "description": doc.get("description"),
+                "date": doc.get("date") or doc.get("published_at"),
+                "url": doc.get("url"),
+                "duration": doc.get("duration"),
+                "transcript_full": transcript_full,
+                "transcript_secondary_full": doc.get("transcript_secondary_full"),
+                "transcript_parts": parts,
+            },
+        }
+
+
+def ensure_index(client: "Elasticsearch", index: str) -> None:
+    """Create the target index with a minimal mapping if it is missing."""
+    if client.indices.exists(index=index):
+        return
+    LOGGER.info("Creating index %s", index)
+    client.indices.create(
+        index=index,
+        mappings={
+            "properties": {
+                "video_id": {"type": "keyword"},
+                "channel_id": {"type": "keyword"},
+                "channel_name": {"type": "keyword"},
+                "title": {"type": "text"},
+                "description": {"type": "text"},
+                "date": {"type": "date", "format": "strict_date_optional_time"},
+                "url": {"type": "keyword"},
+                "duration": {"type": "float"},
+                "transcript_full": {"type": "text"},
+                "transcript_secondary_full": {"type": "text"},
+                "transcript_parts": {
+                    "type": "nested",
+                    "properties": {
+                        "start": {"type": "float"},
+                        "duration": {"type": "float"},
+                        "text": {"type": "text"},
+                    },
+                },
+            }
+        },
+    )
+
+
+def ingest_directory(
+    data_root: Path,
+    *,
+    config: AppConfig = CONFIG,
+    index: Optional[str] = None,
+    batch_size: int = 500,
+    request_timeout: int = 120,
+) -> None:
+    """Bulk index every JSON file in the directory tree."""
+    client = _ensure_client(config)
+    target_index = index or config.elastic.index
+    ensure_index(client, target_index)
+    docs = iter_json_documents(data_root)
+    actions = build_bulk_actions(docs, index=target_index)
+    bulk_client = client.options(request_timeout=request_timeout)
+    helpers.bulk(
+        bulk_client,
+        actions,
+        chunk_size=batch_size,
+    )
+    LOGGER.info("Ingestion complete for %s", target_index)
+
+
+def _build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description="Ingest transcript JSON files into Elasticsearch."
+    )
+    parser.add_argument(
+        "--source",
+        type=Path,
+        default=CONFIG.data.root,
+        help="Directory containing per-video JSON files.",
+    )
+    parser.add_argument(
+        "--index",
+        help="Override the Elasticsearch index name.",
+    )
+    parser.add_argument(
+        "--batch-size",
+        type=int,
+        default=500,
+        help="Bulk ingest batch size.",
+    )
+    parser.add_argument(
+        "--timeout",
+        type=int,
+        default=120,
+        help="Request timeout (seconds) for bulk operations.",
+    )
+    return parser
+
+
+def main() -> None:
+    logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
+    args = _build_parser().parse_args()
+    ingest_directory(
+        args.source,
+        index=args.index,
+        batch_size=args.batch_size,
+        request_timeout=args.timeout,
+    )
+
+
+if __name__ == "__main__":  # pragma: no cover
+    main()
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,4 @@
+Flask>=2.3
+elasticsearch>=7.0.0,<9.0.0
+youtube-transcript-api>=0.6
+google-api-python-client>=2.0.0
--- a/search_app.py
+++ b/search_app.py
@ -0,0 +1,910 @@
+"""
+Flask application exposing a minimal search API backed by Elasticsearch.
+
+Routes:
+    GET /                -> Static HTML search page.
+    GET /api/channels    -> List available channels (via terms aggregation).
+    GET /api/search      -> Search index with pagination and simple highlighting.
+    GET /api/transcript  -> Return full transcript for a given video_id.
+"""
+
+from __future__ import annotations
+
+import copy
+import json
+import logging
+import re
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Set
+
+from collections import Counter
+from datetime import datetime
+
+from flask import Flask, jsonify, request, send_from_directory
+
+from .config import CONFIG, AppConfig
+
+try:
+    from elasticsearch import Elasticsearch  # type: ignore
+    from elasticsearch import BadRequestError  # type: ignore
+except ImportError:  # pragma: no cover - dependency optional
+    Elasticsearch = None
+    BadRequestError = Exception  # type: ignore
+
+LOGGER = logging.getLogger(__name__)
+
+
+def _ensure_client(config: AppConfig) -> "Elasticsearch":
+    if Elasticsearch is None:
+        raise RuntimeError(
+            "elasticsearch package not installed. "
+            "Install elasticsearch>=7 to run the Flask search app."
+        )
+    kwargs = {}
+    if config.elastic.api_key:
+        kwargs["api_key"] = config.elastic.api_key
+    elif config.elastic.username and config.elastic.password:
+        kwargs["basic_auth"] = (
+            config.elastic.username,
+            config.elastic.password,
+        )
+    if config.elastic.ca_cert:
+        kwargs["ca_certs"] = str(config.elastic.ca_cert)
+    kwargs["verify_certs"] = config.elastic.verify_certs
+    return Elasticsearch(config.elastic.url, **kwargs)
+
+
+def metrics_payload(data_root: Path) -> Dict[str, Any]:
+    total_items = 0
+    channel_counter: Counter = Counter()
+    channel_name_map: Dict[str, str] = {}
+    year_counter: Counter = Counter()
+    month_counter: Counter = Counter()
+
+    if not data_root.exists():
+        LOGGER.warning("Data directory %s not found; metrics will be empty.", data_root)
+        return {
+            "totalItems": 0,
+            "totalChannels": 0,
+            "itemsPerChannel": [],
+            "yearHistogram": [],
+            "recentMonths": [],
+        }
+
+    for path in data_root.rglob("*.json"):
+        try:
+            with path.open("r", encoding="utf-8") as handle:
+                doc = json.load(handle)
+        except Exception:
+            continue
+
+        total_items += 1
+
+        channel_id = doc.get("channel_id")
+        channel_name = doc.get("channel_name") or channel_id
+        if channel_id:
+            channel_counter[channel_id] += 1
+            if channel_name and channel_id not in channel_name_map:
+                channel_name_map[channel_id] = channel_name
+
+        date_value = doc.get("date") or doc.get("published_at")
+        dt: Optional[datetime] = None
+        if isinstance(date_value, str):
+            for fmt in ("%Y-%m-%d", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%SZ"):
+                try:
+                    dt = datetime.strptime(date_value[: len(fmt)], fmt)
+                    break
+                except Exception:
+                    continue
+        elif isinstance(date_value, (int, float)):
+            try:
+                dt = datetime.fromtimestamp(date_value)
+            except Exception:
+                dt = None
+
+        if dt:
+            year_counter[str(dt.year)] += 1
+            month_counter[dt.strftime("%Y-%m")] += 1
+
+    items_per_channel = [
+        {
+            "label": channel_name_map.get(cid, cid),
+            "count": count,
+        }
+        for cid, count in channel_counter.most_common()
+    ]
+
+    year_histogram = [
+        {"bucket": year, "count": year_counter[year]}
+        for year in sorted(year_counter.keys())
+    ]
+
+    recent_months = sorted(month_counter.keys())
+    recent_months = recent_months[-12:]
+    recent_months_payload = [
+        {"bucket": month, "count": month_counter[month]} for month in recent_months
+    ]
+
+    return {
+        "totalItems": total_items,
+        "totalChannels": len(channel_counter),
+        "itemsPerChannel": items_per_channel,
+        "yearHistogram": year_histogram,
+        "recentMonths": recent_months_payload,
+    }
+
+
+def elastic_metrics_payload(
+    client: "Elasticsearch",
+    index: str,
+    *,
+    channel_field_candidates: Optional[List[str]] = None,
+    debug: bool = False,
+) -> Dict[str, Any]:
+    if channel_field_candidates is None:
+        channel_field_candidates = ["channel_id.keyword", "channel_id"]
+
+    base_body: Dict[str, Any] = {
+        "size": 0,
+        "track_total_hits": True,
+        "aggs": {
+            "channels": {
+                "terms": {
+                    "field": "channel_id.keyword",
+                    "size": 500,
+                    "order": {"_count": "desc"},
+                },
+                "aggs": {
+                    "name": {
+                        "top_hits": {
+                            "size": 1,
+                            "_source": {"includes": ["channel_name"]},
+                        }
+                    }
+                },
+            },
+            "year_histogram": {
+                "date_histogram": {
+                    "field": "date",
+                    "calendar_interval": "year",
+                    "format": "yyyy",
+                }
+            },
+            "month_histogram": {
+                "date_histogram": {
+                    "field": "date",
+                    "calendar_interval": "month",
+                    "format": "yyyy-MM",
+                    "order": {"_key": "asc"},
+                }
+            },
+        },
+    }
+
+    last_error: Optional[Exception] = None
+    response: Optional[Dict[str, Any]] = None
+    for candidate_field in channel_field_candidates:
+        body = json.loads(json.dumps(base_body))
+        body["aggs"]["channels"]["terms"]["field"] = candidate_field
+        try:
+            if debug:
+                LOGGER.info(
+                    "Elasticsearch metrics request: %s",
+                    json.dumps({"index": index, "body": body}, indent=2),
+                )
+            response = client.search(index=index, body=body)
+            break
+        except BadRequestError as exc:
+            last_error = exc
+            if debug:
+                LOGGER.warning(
+                    "Metrics aggregation failed for field %s: %s",
+                    candidate_field,
+                    exc,
+                )
+    if response is None:
+        raise last_error or RuntimeError("Unable to compute metrics from Elasticsearch.")
+
+    hits = response.get("hits", {})
+    total_items = hits.get("total", {}).get("value", 0)
+
+    if debug:
+        LOGGER.info(
+            "Elasticsearch metrics response: %s",
+            json.dumps(response, indent=2, default=str),
+        )
+
+    aggregations = response.get("aggregations", {})
+    channel_buckets = aggregations.get("channels", {}).get("buckets", [])
+    items_per_channel = []
+    for bucket in channel_buckets:
+        key = bucket.get("key")
+        channel_name = key
+        top_hits = (
+            bucket.get("name", {})
+            .get("hits", {})
+            .get("hits", [])
+        )
+        if top_hits:
+            channel_name = (
+                top_hits[0]
+                .get("_source", {})
+                .get("channel_name", channel_name)
+            )
+        items_per_channel.append(
+            {"label": channel_name or key, "count": bucket.get("doc_count", 0)}
+        )
+
+    year_buckets = aggregations.get("year_histogram", {}).get("buckets", [])
+    year_histogram = [
+        {
+            "bucket": bucket.get("key_as_string")
+            or str(bucket.get("key")),
+            "count": bucket.get("doc_count", 0),
+        }
+        for bucket in year_buckets
+    ]
+
+    month_buckets = aggregations.get("month_histogram", {}).get("buckets", [])
+    recent_months_entries = [
+        {
+            "bucket": bucket.get("key_as_string")
+            or str(bucket.get("key")),
+            "count": bucket.get("doc_count", 0),
+            "_key": bucket.get("key"),
+        }
+        for bucket in month_buckets
+    ]
+    recent_months_entries.sort(key=lambda item: item.get("_key", 0))
+    recent_months_payload = [
+        {"bucket": entry["bucket"], "count": entry["count"]}
+        for entry in recent_months_entries[-12:]
+    ]
+
+    return {
+        "totalItems": total_items,
+        "totalChannels": len(items_per_channel),
+        "itemsPerChannel": items_per_channel,
+        "yearHistogram": year_histogram,
+        "recentMonths": recent_months_payload,
+    }
+
+
+def parse_channel_params(values: Iterable[Optional[str]]) -> List[str]:
+    seen: Set[str] = set()
+    channels: List[str] = []
+    for value in values:
+        if not value:
+            continue
+        for part in str(value).split(","):
+            cleaned = part.strip()
+            if not cleaned or cleaned.lower() == "all":
+                continue
+            if cleaned not in seen:
+                seen.add(cleaned)
+                channels.append(cleaned)
+    return channels
+
+
+def build_channel_filter(channels: Optional[Sequence[str]]) -> Optional[Dict]:
+    if not channels:
+        return None
+    per_channel_clauses: List[Dict[str, Any]] = []
+    for value in channels:
+        if not value:
+            continue
+        per_channel_clauses.append(
+            {
+                "bool": {
+                    "should": [
+                        {"term": {"channel_id.keyword": value}},
+                        {"term": {"channel_id": value}},
+                    ],
+                    "minimum_should_match": 1,
+                }
+            }
+        )
+    if not per_channel_clauses:
+        return None
+    if len(per_channel_clauses) == 1:
+        return per_channel_clauses[0]
+    return {
+        "bool": {
+            "should": per_channel_clauses,
+            "minimum_should_match": 1,
+        }
+    }
+
+
+def build_query_payload(
+    query: str,
+    *,
+    channels: Optional[Sequence[str]] = None,
+    sort: str = "relevant",
+    use_exact: bool = True,
+    use_fuzzy: bool = True,
+    use_phrase: bool = True,
+    use_query_string: bool = False,
+) -> Dict:
+    filters: List[Dict] = []
+    should: List[Dict] = []
+
+    channel_filter = build_channel_filter(channels)
+    if channel_filter:
+        filters.append(channel_filter)
+
+    if use_query_string:
+        base_fields = ["title^3", "description^2", "transcript_full", "transcript_secondary_full"]
+        qs_query = (query or "").strip() or "*"
+        query_body: Dict[str, Any] = {
+            "query_string": {
+                "query": qs_query,
+                "default_operator": "AND",
+                "fields": base_fields,
+            }
+        }
+        if filters:
+            query_body = {"bool": {"must": query_body, "filter": filters}}
+        body: Dict = {
+            "query": query_body,
+            "highlight": {
+                "fields": {
+                    "transcript_full": {
+                    "fragment_size": 160,
+                    "number_of_fragments": 5,
+                    "fragmenter": "span",
+                },
+                "transcript_secondary_full": {
+                    "fragment_size": 160,
+                    "number_of_fragments": 5,
+                    "fragmenter": "span",
+                },
+                "title": {"number_of_fragments": 0},
+                "description": {
+                        "fragment_size": 160,
+                        "number_of_fragments": 1,
+                    },
+                },
+                "require_field_match": False,
+                "pre_tags": ["<mark>"],
+                "post_tags": ["</mark>"],
+                "encoder": "html",
+                "max_analyzed_offset": 900000,
+            },
+        }
+        if sort == "newer":
+            body["sort"] = [{"date": {"order": "desc"}}]
+        elif sort == "older":
+            body["sort"] = [{"date": {"order": "asc"}}]
+        return body
+
+    if query:
+        base_fields = ["title^3", "description^2", "transcript_full", "transcript_secondary_full"]
+        if use_phrase:
+            should.append(
+                {
+                    "match_phrase": {
+                        "transcript_full": {
+                            "query": query,
+                            "slop": 2,
+                            "boost": 10.0,
+                        }
+                    }
+                }
+            )
+            should.append(
+                {
+                    "match_phrase": {
+                        "transcript_secondary_full": {
+                            "query": query,
+                            "slop": 2,
+                            "boost": 10.0,
+                        }
+                    }
+                }
+            )
+        if use_fuzzy:
+            should.append(
+                {
+                    "multi_match": {
+                        "query": query,
+                        "fields": base_fields,
+                        "type": "best_fields",
+                        "operator": "and",
+                        "fuzziness": "AUTO",
+                        "prefix_length": 1,
+                        "max_expansions": 50,
+                        "boost": 1.5,
+                    }
+                }
+            )
+        if use_exact:
+            should.append(
+                {
+                    "multi_match": {
+                        "query": query,
+                        "fields": base_fields,
+                        "type": "best_fields",
+                        "operator": "and",
+                        "boost": 3.0,
+                    }
+                }
+            )
+
+    if should:
+        query_body: Dict = {
+            "bool": {
+                "should": should,
+                "minimum_should_match": 1,
+            }
+        }
+        if filters:
+            query_body["bool"]["filter"] = filters
+    elif filters:
+        query_body = {"bool": {"filter": filters}}
+    else:
+        query_body = {"match_all": {}}
+
+    body: Dict = {
+        "query": query_body,
+        "highlight": {
+            "fields": {
+                "transcript_full": {
+                    "fragment_size": 160,
+                    "number_of_fragments": 5,
+                    "fragmenter": "span",
+                },
+                "transcript_secondary_full": {
+                    "fragment_size": 160,
+                    "number_of_fragments": 5,
+                    "fragmenter": "span",
+                },
+                "title": {"number_of_fragments": 0},
+                "description": {
+                    "fragment_size": 160,
+                    "number_of_fragments": 1,
+                },
+            },
+            "require_field_match": False,
+            "pre_tags": ["<mark>"],
+            "post_tags": ["</mark>"],
+            "encoder": "html",
+            "max_analyzed_offset": 900000,
+        },
+    }
+    if query_body.get("match_all") is None:
+        body["highlight"]["highlight_query"] = copy.deepcopy(query_body)
+
+    if sort == "newer":
+        body["sort"] = [{"date": {"order": "desc"}}]
+    elif sort == "older":
+        body["sort"] = [{"date": {"order": "asc"}}]
+    return body
+
+
+def create_app(config: AppConfig = CONFIG) -> Flask:
+    app = Flask(__name__, static_folder=str(Path(__file__).parent / "static"))
+    client = _ensure_client(config)
+    index = config.elastic.index
+
+    @app.route("/")
+    def index_page():
+        return send_from_directory(app.static_folder, "index.html")
+
+    @app.route("/static/<path:filename>")
+    def static_files(filename: str):
+        return send_from_directory(app.static_folder, filename)
+
+    @app.route("/api/channels")
+    def channels():
+        base_channels_body = {
+            "size": 0,
+            "aggs": {
+                "channels": {
+                    "terms": {"field": "channel_id", "size": 200},
+                    "aggs": {
+                        "name": {
+                            "top_hits": {
+                                "size": 1,
+                                "_source": {"includes": ["channel_name"]},
+                            }
+                        }
+                    },
+                }
+            },
+        }
+
+        def run_channels_request(field_name: str):
+            body = json.loads(json.dumps(base_channels_body))  # deep copy
+            body["aggs"]["channels"]["terms"]["field"] = field_name
+            if config.elastic.debug:
+                LOGGER.info(
+                    "Elasticsearch channels request: %s",
+                    json.dumps({"index": index, "body": body}, indent=2),
+                )
+            return client.search(index=index, body=body)
+
+        response = None
+        last_error = None
+        for candidate_field in ("channel_id.keyword", "channel_id"):
+            try:
+                response = run_channels_request(candidate_field)
+                if config.elastic.debug:
+                    LOGGER.info("Channels aggregation used field: %s", candidate_field)
+                break
+            except BadRequestError as exc:
+                last_error = exc
+                if config.elastic.debug:
+                    LOGGER.warning(
+                        "Channels aggregation failed for field %s: %s",
+                        candidate_field,
+                        exc,
+                    )
+        if response is None:
+            raise last_error or RuntimeError("Unable to aggregate channels.")
+
+        if config.elastic.debug:
+            LOGGER.info(
+                "Elasticsearch channels response: %s",
+                json.dumps(response, indent=2, default=str),
+            )
+        buckets = (
+            response.get("aggregations", {})
+            .get("channels", {})
+            .get("buckets", [])
+        )
+        data = [
+            {
+                "Id": bucket.get("key"),
+                "Name": (
+                    bucket.get("name", {})
+                    .get("hits", {})
+                    .get("hits", [{}])[0]
+                    .get("_source", {})
+                    .get("channel_name", bucket.get("key"))
+                ),
+                "Count": bucket.get("doc_count", 0),
+            }
+            for bucket in buckets
+        ]
+        data.sort(key=lambda item: item["Name"].lower())
+        return jsonify(data)
+
+    @app.route("/api/search")
+    def search():
+        query = request.args.get("q", "", type=str)
+        raw_channels: List[Optional[str]] = request.args.getlist("channel_id")
+        legacy_channel = request.args.get("channel", type=str)
+        if legacy_channel:
+            raw_channels.append(legacy_channel)
+        channels = parse_channel_params(raw_channels)
+        sort = request.args.get("sort", "relevant", type=str)
+        page = max(request.args.get("page", 0, type=int), 0)
+        size = max(request.args.get("size", 10, type=int), 1)
+
+        def parse_flag(name: str, default: bool = True) -> bool:
+            value = request.args.get(name)
+            if value is None:
+                return default
+            return value.lower() not in {"0", "false", "no"}
+
+        use_exact = parse_flag("exact", True)
+        use_fuzzy = parse_flag("fuzzy", True)
+        use_phrase = parse_flag("phrase", True)
+        use_query_string = parse_flag("query_string", False)
+        if use_query_string:
+            use_exact = use_fuzzy = use_phrase = False
+
+        payload = build_query_payload(
+            query,
+            channels=channels,
+            sort=sort,
+            use_exact=use_exact,
+            use_fuzzy=use_fuzzy,
+            use_phrase=use_phrase,
+            use_query_string=use_query_string,
+        )
+        start = page * size
+        if config.elastic.debug:
+            LOGGER.info(
+                "Elasticsearch search request: %s",
+                json.dumps(
+                    {
+                        "index": index,
+                        "from": start,
+                        "size": size,
+                        "body": payload,
+                        "channels": channels,
+                        "toggles": {
+                            "exact": use_exact,
+                            "fuzzy": use_fuzzy,
+                            "phrase": use_phrase,
+                        },
+                    },
+                    indent=2,
+                ),
+            )
+        response = client.search(
+            index=index,
+            from_=start,
+            size=size,
+            body=payload,
+        )
+        if config.elastic.debug:
+            LOGGER.info(
+                "Elasticsearch search response: %s",
+                json.dumps(response, indent=2, default=str),
+            )
+
+        hits = response.get("hits", {})
+        total = hits.get("total", {}).get("value", 0)
+        documents = []
+        for hit in hits.get("hits", []):
+            source = hit.get("_source", {})
+            highlight_map = hit.get("highlight", {})
+            transcript_highlight = (
+                (highlight_map.get("transcript_full", []) or [])
+                + (highlight_map.get("transcript_secondary_full", []) or [])
+            )
+
+            title_html = (
+                highlight_map.get("title")
+                or [source.get("title") or "Untitled"]
+            )[0]
+            description_html = (
+                highlight_map.get("description")
+                or [source.get("description") or ""]
+            )[0]
+            documents.append(
+                {
+                    "video_id": source.get("video_id"),
+                    "channel_id": source.get("channel_id"),
+                    "channel_name": source.get("channel_name"),
+                    "title": source.get("title"),
+                    "titleHtml": title_html,
+                    "description": source.get("description"),
+                    "descriptionHtml": description_html,
+                    "date": source.get("date"),
+                    "url": source.get("url"),
+                    "toHighlight": transcript_highlight,
+                    "highlightSource": {
+                        "primary": bool(highlight_map.get("transcript_full")),
+                        "secondary": bool(highlight_map.get("transcript_secondary_full")),
+                    },
+                }
+            )
+
+        return jsonify(
+            {
+                "items": documents,
+                "totalResults": total,
+                "totalPages": (total + size - 1) // size,
+                "currentPage": page,
+            }
+        )
+
+    @app.route("/api/metrics")
+    def metrics():
+        try:
+            data = elastic_metrics_payload(
+                client,
+                index,
+                channel_field_candidates=["channel_id.keyword", "channel_id"],
+                debug=config.elastic.debug,
+            )
+        except Exception:
+            LOGGER.exception(
+                "Falling back to local metrics payload due to Elasticsearch error.",
+                exc_info=True,
+            )
+            data = metrics_payload(config.data.root)
+        return jsonify(data)
+
+    @app.route("/api/frequency")
+    def frequency():
+        raw_term = request.args.get("term", type=str) or ""
+        use_query_string = request.args.get("query_string", default="0", type=str)
+        use_query_string = (use_query_string or "").lower() in {"1", "true", "yes"}
+        term = raw_term.strip()
+        if not term and not use_query_string:
+            return ("term parameter is required", 400)
+        if use_query_string and not term:
+            term = "*"
+
+        raw_channels: List[Optional[str]] = request.args.getlist("channel_id")
+        legacy_channel = request.args.get("channel", type=str)
+        if legacy_channel:
+            raw_channels.append(legacy_channel)
+        channels = parse_channel_params(raw_channels)
+        interval = (request.args.get("interval", "month") or "month").lower()
+        allowed_intervals = {"day", "week", "month", "quarter", "year"}
+        if interval not in allowed_intervals:
+            interval = "month"
+        start = request.args.get("start", type=str)
+        end = request.args.get("end", type=str)
+
+        filters: List[Dict] = []
+        channel_filter = build_channel_filter(channels)
+        if channel_filter:
+            filters.append(channel_filter)
+        if start or end:
+            range_filter: Dict[str, Dict[str, Dict[str, str]]] = {"range": {"date": {}}}
+            if start:
+                range_filter["range"]["date"]["gte"] = start
+            if end:
+                range_filter["range"]["date"]["lte"] = end
+            filters.append(range_filter)
+
+        base_fields = ["title^3", "description^2", "transcript_full", "transcript_secondary_full"]
+        if use_query_string:
+            qs_query = term or "*"
+            must_clause: List[Dict[str, Any]] = [
+                {
+                    "query_string": {
+                        "query": qs_query,
+                        "default_operator": "AND",
+                        "fields": base_fields,
+                    }
+                }
+            ]
+        else:
+            must_clause = [
+                {
+                    "multi_match": {
+                        "query": term,
+                        "fields": base_fields,
+                        "type": "best_fields",
+                        "operator": "and",
+                    }
+                }
+            ]
+
+        query: Dict[str, Any] = {"bool": {"must": must_clause}}
+        if filters:
+            query["bool"]["filter"] = filters
+
+        histogram: Dict[str, Any] = {
+            "field": "date",
+            "calendar_interval": interval,
+            "min_doc_count": 0,
+        }
+        if start or end:
+            bounds: Dict[str, str] = {}
+            if start:
+                bounds["min"] = start
+            if end:
+                bounds["max"] = end
+            if bounds:
+                histogram["extended_bounds"] = bounds
+
+        channel_terms_size = max(6, len(channels)) if channels else 6
+
+        body = {
+            "size": 0,
+            "query": query,
+            "aggs": {
+                "over_time": {
+                    "date_histogram": histogram,
+                    "aggs": {
+                        "by_channel": {
+                            "terms": {
+                                "field": "channel_id.keyword",
+                                "size": channel_terms_size,
+                                "order": {"_count": "desc"},
+                            }
+                        }
+                    },
+                }
+            },
+        }
+
+        if config.elastic.debug:
+            LOGGER.info(
+                "Elasticsearch frequency request: %s",
+                json.dumps(
+                    {
+                        "index": index,
+                        "body": body,
+                        "term": term,
+                        "interval": interval,
+                        "channels": channels,
+                        "start": start,
+                        "end": end,
+                        "query_string": use_query_string,
+                    },
+                    indent=2,
+                ),
+            )
+
+        response = client.search(index=index, body=body)
+
+        if config.elastic.debug:
+            LOGGER.info(
+                "Elasticsearch frequency response: %s",
+                json.dumps(response, indent=2, default=str),
+            )
+
+        raw_buckets = (
+            response.get("aggregations", {})
+            .get("over_time", {})
+            .get("buckets", [])
+        )
+
+        channel_totals: Dict[str, int] = {}
+        buckets: List[Dict[str, Any]] = []
+        for bucket in raw_buckets:
+            date_str = bucket.get("key_as_string")
+            total = bucket.get("doc_count", 0)
+            channel_entries: List[Dict[str, Any]] = []
+            for ch_bucket in bucket.get("by_channel", {}).get("buckets", []):
+                cid = ch_bucket.get("key")
+                count = ch_bucket.get("doc_count", 0)
+                if cid:
+                    channel_entries.append({"id": cid, "count": count})
+                    channel_totals[cid] = channel_totals.get(cid, 0) + count
+            buckets.append(
+                {"date": date_str, "total": total, "channels": channel_entries}
+            )
+
+        ranked_channels = sorted(
+            [{"id": cid, "total": total} for cid, total in channel_totals.items()],
+            key=lambda item: item["total"],
+            reverse=True,
+        )
+
+        payload = {
+            "term": raw_term if not use_query_string else term,
+            "interval": interval,
+            "buckets": buckets,
+            "channels": ranked_channels,
+            "totalResults": response.get("hits", {})
+            .get("total", {})
+            .get("value", 0),
+        }
+        return jsonify(payload)
+
+    @app.route("/frequency")
+    def frequency_page():
+        return send_from_directory(app.static_folder, "frequency.html")
+
+    @app.route("/api/transcript")
+    def transcript():
+        video_id = request.args.get("video_id", type=str)
+        if not video_id:
+            return ("video_id not set", 400)
+        response = client.get(index=index, id=video_id, ignore=[404])
+        if config.elastic.debug:
+            LOGGER.info(
+                "Elasticsearch transcript request: index=%s id=%s", index, video_id
+            )
+            LOGGER.info(
+                "Elasticsearch transcript response: %s",
+                json.dumps(response, indent=2, default=str)
+                if response
+                else "None",
+            )
+        if not response or not response.get("found"):
+            return ("not found", 404)
+        source = response["_source"]
+        return jsonify(
+            {
+                "video_id": source.get("video_id"),
+                "title": source.get("title"),
+                "transcript_parts": source.get("transcript_parts", []),
+                "transcript_full": source.get("transcript_full"),
+                "transcript_secondary_parts": source.get("transcript_secondary_parts", []),
+                "transcript_secondary_full": source.get("transcript_secondary_full"),
+            }
+        )
+
+    return app
+
+
+def main() -> None:  # pragma: no cover
+    logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
+    app = create_app()
+    app.run(host="0.0.0.0", port=8080, debug=True)
+
+
+if __name__ == "__main__":  # pragma: no cover
+    main()
--- a/static/app.js
+++ b/static/app.js
@ -0,0 +1,733 @@
+(() => {
+  let qs = new URLSearchParams(window.location.search);
+  const qInput = document.getElementById("q");
+  const channelDropdown = document.getElementById("channelDropdown");
+  const channelSummary = document.getElementById("channelSummary");
+  const channelOptions = document.getElementById("channelOptions");
+  const sortSel = document.getElementById("sort");
+  const sizeSel = document.getElementById("size");
+  const exactToggle = document.getElementById("exactToggle");
+  const fuzzyToggle = document.getElementById("fuzzyToggle");
+  const phraseToggle = document.getElementById("phraseToggle");
+  const queryToggle = document.getElementById("queryStringToggle");
+  const searchBtn = document.getElementById("searchBtn");
+  const resultsDiv = document.getElementById("results");
+  const metaDiv = document.getElementById("meta");
+  const metricsContainer = document.getElementById("metrics");
+  const metricsStatus = document.getElementById("metricsStatus");
+  const metricsContent = document.getElementById("metricsContent");
+  const freqSummary = document.getElementById("frequencySummary");
+  const freqChart = document.getElementById("frequencyChart");
+  const channelMap = new Map();
+  const selectedChannels = new Set();
+  let pendingChannelSelection = [];
+  let channelsReady = false;
+  let suppressChannelChange = false;
+  let allChannelsCheckbox = null;
+  let previousToggleState = { exact: true, fuzzy: true, phrase: true };
+  let currentPage =
+    parseInt(qs.get("page") || "0", 10) ||
+    0;
+
+  function parseBoolParam(name, defaultValue) {
+    const raw = qs.get(name);
+    if (raw === null) return defaultValue;
+    const lowered = raw.toLowerCase();
+    return !["0", "false", "no"].includes(lowered);
+  }
+
+  function parseChannelParams(params) {
+    const collected = [];
+    if (!params) return collected;
+    const seen = new Set();
+    const rawValues = params.getAll("channel_id");
+    const legacy = params.get("channel");
+    if (legacy) rawValues.push(legacy);
+    rawValues.forEach((value) => {
+      if (value == null) return;
+      String(value)
+        .split(",")
+        .map((part) => part.trim())
+        .filter((part) => part && part.toLowerCase() !== "all")
+        .forEach((part) => {
+          if (!seen.has(part)) {
+            seen.add(part);
+            collected.push(part);
+          }
+        });
+    });
+    return collected;
+  }
+
+  function getSelectedChannels() {
+    return Array.from(selectedChannels);
+  }
+
+  function ensureAllCheckboxState() {
+    if (allChannelsCheckbox) {
+      allChannelsCheckbox.checked = selectedChannels.size === 0;
+    }
+  }
+
+  function updateChannelSummary() {
+    if (!channelSummary) return;
+    if (!selectedChannels.size) {
+      channelSummary.textContent = "All Channels";
+      return;
+    }
+    const names = Array.from(selectedChannels).map(
+      (id) => channelMap.get(id) || id
+    );
+    if (names.length > 1) {
+      names.sort((a, b) => a.localeCompare(b, undefined, { sensitivity: "base" }));
+    }
+    let label = names.slice(0, 3).join(", ");
+    if (names.length > 3) {
+      label += ` +${names.length - 3} more`;
+    }
+    channelSummary.textContent = label;
+  }
+
+  function applyChannelSelection(ids, { silent = false } = {}) {
+    selectedChannels.clear();
+    ids.forEach((id) => selectedChannels.add(id));
+    pendingChannelSelection = getSelectedChannels();
+    ensureAllCheckboxState();
+    if (channelOptions) {
+      suppressChannelChange = true;
+      const checkboxes = channelOptions.querySelectorAll(
+        'input[type="checkbox"][data-channel="1"]'
+      );
+      checkboxes.forEach((checkbox) => {
+        checkbox.checked = selectedChannels.has(checkbox.value);
+      });
+      suppressChannelChange = false;
+    }
+    updateChannelSummary();
+    if (!silent && channelsReady) {
+      runSearch(0);
+    }
+  }
+
+  function setFromQuery() {
+    qInput.value = qs.get("q") || "";
+    sortSel.value = qs.get("sort") || "relevant";
+    sizeSel.value = qs.get("size") || "10";
+    pendingChannelSelection = parseChannelParams(qs);
+    applyChannelSelection(pendingChannelSelection, { silent: true });
+    exactToggle.checked = parseBoolParam("exact", true);
+    fuzzyToggle.checked = parseBoolParam("fuzzy", true);
+    phraseToggle.checked = parseBoolParam("phrase", true);
+    queryToggle.checked = parseBoolParam("query_string", false);
+    applyQueryMode();
+    rememberToggleState();
+  }
+
+  function applyQueryMode() {
+    if (!queryToggle) return;
+    if (queryToggle.checked) {
+      if (!exactToggle.disabled) {
+        previousToggleState = {
+          exact: exactToggle.checked,
+          fuzzy: fuzzyToggle.checked,
+          phrase: phraseToggle.checked,
+        };
+      }
+      exactToggle.checked = false;
+      fuzzyToggle.checked = false;
+      phraseToggle.checked = false;
+      exactToggle.disabled = true;
+      fuzzyToggle.disabled = true;
+      phraseToggle.disabled = true;
+    } else {
+      exactToggle.disabled = false;
+      fuzzyToggle.disabled = false;
+      phraseToggle.disabled = false;
+      exactToggle.checked = previousToggleState.exact;
+      fuzzyToggle.checked = previousToggleState.fuzzy;
+      phraseToggle.checked = previousToggleState.phrase;
+    }
+  }
+
+  function rememberToggleState() {
+    if (queryToggle && !queryToggle.checked) {
+      previousToggleState = {
+        exact: !!exactToggle.checked,
+        fuzzy: !!fuzzyToggle.checked,
+        phrase: !!phraseToggle.checked,
+      };
+    }
+  }
+
+  if (channelOptions) {
+    channelOptions.addEventListener("change", (event) => {
+      const target = event.target;
+      if (!(target instanceof HTMLInputElement) || target.type !== "checkbox") {
+        return;
+      }
+      if (suppressChannelChange) {
+        return;
+      }
+      if (target.dataset.all === "1") {
+        if (!target.checked && !selectedChannels.size) {
+          suppressChannelChange = true;
+          target.checked = true;
+          suppressChannelChange = false;
+          return;
+        }
+        if (target.checked) {
+          selectedChannels.clear();
+          pendingChannelSelection = [];
+          suppressChannelChange = true;
+          const others = channelOptions.querySelectorAll(
+            'input[type="checkbox"][data-channel="1"]'
+          );
+          others.forEach((checkbox) => {
+            checkbox.checked = false;
+          });
+          suppressChannelChange = false;
+          ensureAllCheckboxState();
+          updateChannelSummary();
+          if (channelsReady) {
+            runSearch(0);
+          }
+        }
+        return;
+      }
+
+      const id = target.value;
+      if (!id) return;
+      if (target.checked) {
+        selectedChannels.add(id);
+      } else {
+        selectedChannels.delete(id);
+      }
+      pendingChannelSelection = getSelectedChannels();
+      ensureAllCheckboxState();
+      updateChannelSummary();
+      if (channelsReady) {
+        runSearch(0);
+      }
+    });
+  }
+
+  async function loadChannels() {
+    if (!channelOptions) {
+      channelsReady = true;
+      return;
+    }
+    try {
+      const res = await fetch("/api/channels");
+      const data = await res.json();
+      channelMap.clear();
+      channelOptions.innerHTML = "";
+
+      const listFragment = document.createDocumentFragment();
+
+      const allLabel = document.createElement("label");
+      allLabel.className = "channel-option";
+      allChannelsCheckbox = document.createElement("input");
+      allChannelsCheckbox.type = "checkbox";
+      allChannelsCheckbox.dataset.all = "1";
+      allChannelsCheckbox.checked = selectedChannels.size === 0;
+      const allText = document.createElement("span");
+      allText.textContent = "All Channels";
+      allLabel.appendChild(allChannelsCheckbox);
+      allLabel.appendChild(allText);
+      listFragment.appendChild(allLabel);
+
+      data.forEach((item) => {
+        const label = document.createElement("label");
+        label.className = "channel-option";
+        const checkbox = document.createElement("input");
+        checkbox.type = "checkbox";
+        checkbox.value = item.Id;
+        checkbox.dataset.channel = "1";
+        const text = document.createElement("span");
+        text.textContent = `${item.Name} (${item.Count})`;
+        label.appendChild(checkbox);
+        label.appendChild(text);
+        listFragment.appendChild(label);
+        channelMap.set(item.Id, item.Name);
+      });
+
+      channelOptions.appendChild(listFragment);
+
+      if (!data.length) {
+        const empty = document.createElement("div");
+        empty.textContent = "No channels available.";
+        channelOptions.appendChild(empty);
+      }
+
+      const initialSelection = pendingChannelSelection.length
+        ? pendingChannelSelection
+        : Array.from(selectedChannels);
+      applyChannelSelection(initialSelection, { silent: true });
+      channelsReady = true;
+      updateChannelSummary();
+    } catch (err) {
+      console.error("Failed to load channels", err);
+      channelOptions.innerHTML = "<div>Failed to load channels.</div>";
+      channelsReady = true;
+      ensureAllCheckboxState();
+      updateChannelSummary();
+    }
+  }
+
+  function updateUrl(q, sort, channels, page, size, exact, fuzzy, phrase, queryMode) {
+    const next = new URL(window.location.href);
+    next.searchParams.set("q", q);
+    next.searchParams.set("sort", sort);
+    next.searchParams.delete("channel_id");
+    next.searchParams.delete("channel");
+    channels.forEach((id) => next.searchParams.append("channel_id", id));
+    next.searchParams.set("page", page);
+    next.searchParams.set("size", size);
+    next.searchParams.set("exact", exact ? "1" : "0");
+    next.searchParams.set("fuzzy", fuzzy ? "1" : "0");
+    next.searchParams.set("phrase", phrase ? "1" : "0");
+    next.searchParams.set("query_string", queryMode ? "1" : "0");
+    history.pushState({}, "", next.toString());
+  }
+
+  function fmtDate(value) {
+    try {
+      return (value || "").split("T")[0];
+    } catch {
+      return value;
+    }
+  }
+
+  function fmtNumber(n) {
+    if (typeof n === "number") return n.toLocaleString();
+    return n;
+  }
+
+
+  // Transcript viewer functionality removed.
+
+  function renderMetrics(data) {
+  if (!metricsContent) return;
+  metricsContent.innerHTML = "";
+  if (!data) return;
+
+  if (metricsStatus) {
+    metricsStatus.textContent = "";
+  }
+
+  const summary = document.createElement("div");
+  summary.innerHTML = `<strong>Entries:</strong> ${fmtNumber(data.totalItems)} • <strong>Channels:</strong> ${fmtNumber(data.totalChannels)}`;
+  metricsContent.appendChild(summary);
+
+  if (Array.isArray(data.itemsPerChannel) && data.itemsPerChannel.length) {
+    const top = data.itemsPerChannel.slice(0, 5);
+    const channelHeader = document.createElement("div");
+    channelHeader.style.marginTop = "8px";
+    channelHeader.innerHTML = "<strong>Top Channels</strong>";
+    metricsContent.appendChild(channelHeader);
+
+    const channelList = document.createElement("div");
+    channelList.className = "muted";
+    top.forEach((entry) => {
+      const row = document.createElement("div");
+      row.textContent = `${entry.label}: ${fmtNumber(entry.count)}`;
+      channelList.appendChild(row);
+    });
+    metricsContent.appendChild(channelList);
+  }
+}
+
+async function loadMetrics() {
+  if (!metricsContainer) return;
+  metricsContainer.dataset.loading = "1";
+  if (!metricsContainer.dataset.loaded && metricsStatus) {
+    metricsStatus.textContent = "Loading metrics…";
+  }
+  try {
+    const res = await fetch("/api/metrics");
+    const data = await res.json();
+    renderMetrics(data);
+    metricsContainer.dataset.loaded = "1";
+  } catch (err) {
+    console.error("Failed to load metrics", err);
+    if (!metricsContainer.dataset.loaded && metricsStatus) {
+      metricsStatus.textContent = "Metrics unavailable.";
+    }
+  } finally {
+    delete metricsContainer.dataset.loading;
+  }
+}
+
+function clearFrequency(message) {
+  if (freqSummary) {
+    freqSummary.textContent = message || "";
+  }
+  if (freqChart) {
+    freqChart.innerHTML = "";
+  }
+}
+
+function renderFrequencyChart(buckets, channelTotals) {
+  if (!freqChart || typeof d3 === "undefined") {
+    return;
+  }
+  freqChart.innerHTML = "";
+  if (!buckets.length) {
+    clearFrequency("No matches for this query.");
+    return;
+  }
+
+  let channelsOrder =
+    (channelTotals && channelTotals.length
+      ? channelTotals.map((entry) => entry.id)
+      : []) || [];
+  if (!channelsOrder.length) {
+    const unique = new Set();
+    buckets.forEach((bucket) => {
+      (bucket.channels || []).forEach((entry) => unique.add(entry.id));
+    });
+    channelsOrder = Array.from(unique);
+  }
+  channelsOrder = channelsOrder.slice(0, 6);
+  if (!channelsOrder.length) {
+    clearFrequency("No matches for this query.");
+    return;
+  }
+
+  const dateKeyFormat = d3.timeFormat("%Y-%m-%d");
+  const parsed = buckets
+    .map((bucket) => {
+      const parsedDate = d3.isoParse(bucket.date) || new Date(bucket.date);
+      if (!(parsedDate instanceof Date) || Number.isNaN(parsedDate.valueOf())) {
+        return null;
+      }
+      const counts = {};
+      (bucket.channels || []).forEach((entry) => {
+        if (channelsOrder.includes(entry.id)) {
+          counts[entry.id] = entry.count || 0;
+        }
+      });
+      return {
+        date: parsedDate,
+        dateKey: dateKeyFormat(parsedDate),
+        counts,
+      };
+    })
+    .filter(Boolean);
+
+  if (!parsed.length) {
+    clearFrequency("Timeline unavailable.");
+    return;
+  }
+
+  const margin = { top: 12, right: 12, bottom: 52, left: 56 };
+  const fullWidth = freqChart.clientWidth || 360;
+  const fullHeight = 220;
+  const width = fullWidth - margin.left - margin.right;
+  const height = fullHeight - margin.top - margin.bottom;
+
+  const svg = d3
+    .select(freqChart)
+    .append("svg")
+    .attr("width", fullWidth)
+    .attr("height", fullHeight);
+
+  const g = svg
+    .append("g")
+    .attr("transform", `translate(${margin.left},${margin.top})`);
+
+  const x = d3
+    .scaleBand()
+    .domain(parsed.map((entry) => entry.dateKey))
+    .range([0, width])
+    .padding(0.25);
+
+  const yMax = d3.max(parsed, (entry) =>
+    d3.sum(channelsOrder, (key) => entry.counts[key] || 0)
+  );
+
+  const y = d3
+    .scaleLinear()
+    .domain([0, yMax || 0])
+    .nice()
+    .range([height, 0]);
+
+  const tickValues =
+    parsed.length <= 6
+      ? parsed.map((entry) => entry.dateKey)
+      : parsed
+          .filter((_, index, arr) => index % Math.ceil(arr.length / 6) === 0)
+          .map((entry) => entry.dateKey);
+
+  const xAxis = d3.axisBottom(x).tickValues(tickValues);
+  const yAxis = d3.axisLeft(y).ticks(5);
+
+  g.append("g")
+    .attr("class", "axis")
+    .attr("transform", `translate(0,${height})`)
+    .call(xAxis)
+    .selectAll("text")
+    .attr("text-anchor", "end")
+    .attr("transform", "rotate(-35)")
+    .attr("dx", "-0.8em")
+    .attr("dy", "0.15em");
+
+  g.append("g").attr("class", "axis").call(yAxis);
+
+  const stack = d3.stack().keys(channelsOrder).value((entry, key) => entry.counts[key] || 0);
+  const stacked = stack(parsed);
+  const color = d3.scaleOrdinal(channelsOrder, d3.schemeTableau10);
+
+  const layers = g
+    .selectAll(".freq-layer")
+    .data(stacked)
+    .enter()
+    .append("g")
+    .attr("class", "freq-layer")
+    .attr("fill", (d) => color(d.key));
+
+  layers
+    .selectAll("rect")
+    .data((d) => d)
+    .enter()
+    .append("rect")
+    .attr("x", (d) => x(d.data.dateKey))
+    .attr("width", x.bandwidth())
+    .attr("y", (d) => y(d[1]))
+    .attr("height", (d) => y(d[0]) - y(d[1]))
+    .append("title")
+    .text(function (d) {
+      const group = this.parentNode ? this.parentNode.parentNode : null;
+      const key = group ? d3.select(group).datum().key : undefined;
+      const label = key ? channelMap.get(key) || key : key || '';
+      return `${dateKeyFormat(d.data.date)}: ${d[1] - d[0]}${label ? " (" + label + ")" : ''}`;
+    });
+
+  const legend = document.createElement("div");
+  legend.className = "freq-legend";
+  channelsOrder.forEach((key) => {
+    const item = document.createElement("div");
+    item.className = "freq-legend-item";
+    const swatch = document.createElement("span");
+    swatch.className = "freq-legend-swatch";
+    swatch.style.backgroundColor = color(key);
+    const label = document.createElement("span");
+    label.textContent = channelMap.get(key) || key;
+    item.appendChild(swatch);
+    item.appendChild(label);
+    legend.appendChild(item);
+  });
+  freqChart.appendChild(legend);
+}
+
+async function updateFrequencyChart(term, channels, queryMode) {
+  if (!freqChart || typeof d3 === "undefined") {
+    return;
+  }
+  let trimmed = term.trim();
+  if (!trimmed) {
+    if (queryMode) {
+      trimmed = "*";
+    } else {
+      clearFrequency("Enter a query to see timeline.");
+      return;
+    }
+  }
+
+  const params = new URLSearchParams();
+  params.set("term", trimmed);
+  params.set("interval", "month");
+  (channels || []).forEach((id) => params.append("channel_id", id));
+  if (queryMode) {
+    params.set("query_string", "1");
+  }
+
+  clearFrequency("Loading timeline…");
+  try {
+    const res = await fetch(`/api/frequency?${params.toString()}`);
+    if (!res.ok) {
+      throw new Error(`Request failed with status ${res.status}`);
+    }
+    const payload = await res.json();
+    const total = payload.totalResults || 0;
+    if (freqSummary) {
+      if (total === 0) {
+        freqSummary.textContent = "No matches for this query.";
+      } else if (queryMode) {
+        freqSummary.textContent = `Matches: ${total.toLocaleString()} • Interval: ${payload.interval || "month"} (query-string)`;
+      } else {
+        freqSummary.textContent = `Matches: ${total.toLocaleString()} • Interval: ${payload.interval || "month"}`;
+      }
+    }
+    if (total === 0) {
+      freqChart.innerHTML = "";
+      return;
+    }
+    renderFrequencyChart(payload.buckets || [], payload.channels || []);
+  } catch (err) {
+    console.error(err);
+    clearFrequency("Timeline unavailable.");
+  }
+}
+
+  function renderResults(payload, page) {
+    resultsDiv.innerHTML = "";
+    metaDiv.textContent = `Total: ${payload.totalResults} • Page ${
+      page + 1
+    } of ${payload.totalPages}`;
+
+    (payload.items || []).forEach((item) => {
+      const el = document.createElement("div");
+      el.className = "item";
+      const titleHtml =
+        item.titleHtml || escapeHtml(item.title || "Untitled");
+      const descriptionHtml =
+        item.descriptionHtml || escapeHtml(item.description || "");
+
+      const header = document.createElement("div");
+      const badges = [];
+      if (item.highlightSource && item.highlightSource.primary) badges.push('primary transcript');
+      if (item.highlightSource && item.highlightSource.secondary) badges.push('secondary transcript');
+      const badgeHtml = badges.length
+        ? `<div class="badge-row">${badges
+            .map((b) => `<span class="badge">${escapeHtml(b)}</span>` )
+            .join('')}</div>`
+        : '';
+      header.innerHTML = `
+        <strong>${titleHtml}</strong>
+        <div class="muted">${escapeHtml(item.channel_name || "")} • ${fmtDate(
+        item.date
+      )}</div>
+        <div class="muted"><a href="${item.url}" target="_blank" rel="noopener">Open on YouTube</a></div>
+        ${badgeHtml}
+      `;
+      el.appendChild(header);
+
+      if (descriptionHtml) {
+        const desc = document.createElement("div");
+        desc.className = "muted";
+        desc.innerHTML = descriptionHtml;
+        el.appendChild(desc);
+      }
+
+      if (Array.isArray(item.toHighlight) && item.toHighlight.length) {
+        const highlights = document.createElement("div");
+        highlights.className = "transcript highlight-list";
+        item.toHighlight.forEach((entry) => {
+          const html = typeof entry === "string" ? entry : entry?.html;
+          if (!html) return;
+          const row = document.createElement("div");
+          row.className = "highlight-row";
+          row.innerHTML = html;
+          highlights.appendChild(row);
+        });
+        if (highlights.childElementCount) {
+          el.appendChild(highlights);
+        }
+      }
+
+      resultsDiv.appendChild(el);
+    });
+
+    const pager = document.createElement("div");
+    pager.className = "pager";
+    const prev = document.createElement("button");
+    prev.textContent = "Prev";
+    prev.disabled = page <= 0;
+    const next = document.createElement("button");
+    next.textContent = "Next";
+    next.disabled = page + 1 >= payload.totalPages;
+    prev.onclick = () => runSearch(page - 1);
+    next.onclick = () => runSearch(page + 1);
+    pager.appendChild(prev);
+    pager.appendChild(next);
+    resultsDiv.appendChild(pager);
+  }
+
+  async function runSearch(pageOverride, pushState = true) {
+    const q = qInput.value.trim();
+    const channels = getSelectedChannels();
+    const sort = sortSel.value;
+    const size = parseInt(sizeSel.value, 10) || 10;
+    const queryMode = queryToggle && queryToggle.checked;
+    let exact = !!exactToggle.checked;
+    let fuzzy = !!fuzzyToggle.checked;
+    let phrase = !!phraseToggle.checked;
+    if (queryMode) {
+      exact = false;
+      fuzzy = false;
+      phrase = false;
+    } else {
+      previousToggleState = {
+        exact,
+        fuzzy,
+        phrase,
+      };
+    }
+    const page = pageOverride != null ? pageOverride : currentPage;
+    currentPage = page;
+
+    if (pushState) {
+      updateUrl(q, sort, channels, page, size, exact, fuzzy, phrase, queryMode);
+    }
+
+    const params = new URLSearchParams();
+    params.set("q", q);
+    params.set("sort", sort);
+    params.set("size", String(size));
+    params.set("page", String(page));
+    params.set("exact", exact ? "1" : "0");
+    params.set("fuzzy", fuzzy ? "1" : "0");
+    params.set("phrase", phrase ? "1" : "0");
+    params.set("query_string", queryMode ? "1" : "0");
+    channels.forEach((id) => params.append("channel_id", id));
+
+    const res = await fetch(`/api/search?${params.toString()}`);
+    const payload = await res.json();
+    renderResults(payload, page);
+    updateFrequencyChart(q, channels, queryMode);
+  }
+
+searchBtn.addEventListener("click", () => runSearch(0));
+  qInput.addEventListener("keypress", (e) => {
+    if (e.key === "Enter") runSearch(0);
+  });
+  sortSel.addEventListener("change", () => runSearch(0));
+  sizeSel.addEventListener("change", () => runSearch(0));
+  exactToggle.addEventListener("change", () => { rememberToggleState(); runSearch(0); });
+  fuzzyToggle.addEventListener("change", () => { rememberToggleState(); runSearch(0); });
+  phraseToggle.addEventListener("change", () => { rememberToggleState(); runSearch(0); });
+  if (queryToggle) {
+    queryToggle.addEventListener("change", () => { applyQueryMode(); runSearch(0); });
+  }
+
+window.addEventListener("popstate", () => {
+    qs = new URLSearchParams(window.location.search);
+    setFromQuery();
+    currentPage = parseInt(qs.get("page") || "0", 10) || 0;
+    runSearch(currentPage, false);
+  });
+
+  setFromQuery();
+  loadMetrics();
+  loadChannels().then(() => runSearch(currentPage));
+})();
+
+function escapeHtml(str) {
+  return (str || "").replace(/[&<>"']/g, (ch) => {
+    switch (ch) {
+      case "&":
+        return "&amp;";
+      case "<":
+        return "&lt;";
+      case ">":
+        return "&gt;";
+      case '"':
+        return "&quot;";
+      case "'":
+        return "&#39;";
+      default:
+        return ch;
+    }
+  });
+}
--- a/static/frequency.html
+++ b/static/frequency.html
@ -0,0 +1,68 @@
+<!doctype html>
+<html>
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <title>Term Frequency Explorer</title>
+    <link rel="stylesheet" href="/static/style.css" />
+    <style>
+      #chart {
+        margin-top: 24px;
+      }
+      svg {
+        max-width: 100%;
+      }
+      .axis path,
+      .axis line {
+        stroke: #ccc;
+      }
+      .line {
+        fill: none;
+        stroke: #0b6efd;
+        stroke-width: 2px;
+      }
+      .dot {
+        fill: #0b6efd;
+        stroke: white;
+        stroke-width: 1px;
+      }
+      .controls label {
+        display: flex;
+        align-items: center;
+        gap: 6px;
+      }
+    </style>
+    <script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
+  </head>
+  <body>
+    <header>
+      <h1>Term Frequency Explorer</h1>
+      <p class="muted">
+        Pick a term to see how often it appears over time. <a href="/">Back to search</a>
+      </p>
+    </header>
+
+    <section class="controls">
+      <input id="term" type="text" placeholder="Term (e.g. meaning)" size="28" />
+      <select id="channel">
+        <option value="all">All Channels</option>
+      </select>
+      <select id="interval">
+        <option value="month">Per Month</option>
+        <option value="week">Per Week</option>
+        <option value="day">Per Day</option>
+        <option value="quarter">Per Quarter</option>
+        <option value="year">Per Year</option>
+      </select>
+      <input id="start" type="date" />
+      <input id="end" type="date" />
+      <button id="runBtn">Run</button>
+    </section>
+
+    <section id="summary" class="muted"></section>
+    <section id="chart"></section>
+
+    <script src="/static/frequency.js"></script>
+  </body>
+</html>
+
--- a/static/frequency.js
+++ b/static/frequency.js
@ -0,0 +1,222 @@
+(() => {
+  let qs = new URLSearchParams(window.location.search);
+
+  const termInput = document.getElementById("term");
+  const channelSel = document.getElementById("channel");
+  const intervalSel = document.getElementById("interval");
+  const startInput = document.getElementById("start");
+  const endInput = document.getElementById("end");
+  const runBtn = document.getElementById("runBtn");
+  const summaryDiv = document.getElementById("summary");
+  const chartDiv = document.getElementById("chart");
+
+  function parseParams() {
+    return {
+      term: qs.get("term") || "",
+      channel: qs.get("channel_id") || "all",
+      interval: qs.get("interval") || "month",
+      start: qs.get("start") || "",
+      end: qs.get("end") || "",
+    };
+  }
+
+  function setFormFromParams() {
+    const params = parseParams();
+    termInput.value = params.term;
+    intervalSel.value = params.interval;
+    startInput.value = params.start;
+    endInput.value = params.end;
+    return params;
+  }
+
+  function updateUrl(params) {
+    const url = new URL(window.location.href);
+    url.searchParams.set("term", params.term);
+    url.searchParams.set("channel_id", params.channel);
+    url.searchParams.set("interval", params.interval);
+    if (params.start) url.searchParams.set("start", params.start);
+    else url.searchParams.delete("start");
+    if (params.end) url.searchParams.set("end", params.end);
+    else url.searchParams.delete("end");
+    history.pushState({}, "", url.toString());
+    qs = new URLSearchParams(url.search);
+  }
+
+  async function loadChannels(initialValue) {
+    try {
+      const res = await fetch("/api/channels");
+      const data = await res.json();
+      data.forEach((item) => {
+        const opt = document.createElement("option");
+        opt.value = item.Id;
+        opt.textContent = `${item.Name} (${item.Count})`;
+        channelSel.appendChild(opt);
+      });
+    } catch (err) {
+      console.error("Failed to load channels", err);
+    }
+    channelSel.value = initialValue || "all";
+  }
+
+  function drawChart(data) {
+    chartDiv.innerHTML = "";
+    if (!data.length) {
+      const msg = document.createElement("div");
+      msg.className = "muted";
+      msg.textContent = "No matching documents for this term.";
+      chartDiv.appendChild(msg);
+      return;
+    }
+
+    const parsed = data
+      .map((d) => ({
+        date: d3.isoParse(d.date) || new Date(d.date),
+        value: d.count,
+      }))
+      .filter((d) => d.date instanceof Date && !Number.isNaN(d.date.valueOf()));
+
+    if (!parsed.length) {
+      const msg = document.createElement("div");
+      msg.className = "muted";
+      msg.textContent = "Unable to parse dates for this series.";
+      chartDiv.appendChild(msg);
+      return;
+    }
+
+    const margin = { top: 20, right: 30, bottom: 40, left: 56 };
+    const fullWidth = chartDiv.clientWidth || 900;
+    const fullHeight = 360;
+    const width = fullWidth - margin.left - margin.right;
+    const height = fullHeight - margin.top - margin.bottom;
+
+    const svg = d3
+      .select(chartDiv)
+      .append("svg")
+      .attr("width", fullWidth)
+      .attr("height", fullHeight);
+
+    const g = svg
+      .append("g")
+      .attr("transform", `translate(${margin.left},${margin.top})`);
+
+    const x = d3
+      .scaleTime()
+      .domain(d3.extent(parsed, (d) => d.date))
+      .range([0, width]);
+
+    const y = d3
+      .scaleLinear()
+      .domain([0, d3.max(parsed, (d) => d.value) || 0])
+      .nice()
+      .range([height, 0]);
+
+    const xAxis = d3.axisBottom(x).ticks(6).tickFormat(d3.timeFormat("%Y-%m-%d"));
+    const yAxis = d3.axisLeft(y).ticks(6);
+
+    g.append("g")
+      .attr("class", "axis")
+      .attr("transform", `translate(0,${height})`)
+      .call(xAxis)
+      .selectAll("text")
+      .attr("text-anchor", "end")
+      .attr("transform", "rotate(-35)")
+      .attr("dx", "-0.8em")
+      .attr("dy", "0.15em");
+
+    g.append("g").attr("class", "axis").call(yAxis);
+
+    const line = d3
+      .line()
+      .x((d) => x(d.date))
+      .y((d) => y(d.value));
+
+    g.append("path")
+      .datum(parsed)
+      .attr("class", "line")
+      .attr("d", line);
+
+    g.selectAll(".dot")
+      .data(parsed)
+      .enter()
+      .append("circle")
+      .attr("class", "dot")
+      .attr("r", 3)
+      .attr("cx", (d) => x(d.date))
+      .attr("cy", (d) => y(d.value))
+      .append("title")
+      .text((d) => `${d3.timeFormat("%Y-%m-%d")(d.date)}: ${d.value}`);
+  }
+
+  async function runFrequency(pushState = true) {
+    const term = termInput.value.trim();
+    if (!term) {
+      summaryDiv.textContent = "Enter a term to begin.";
+      chartDiv.innerHTML = "";
+      return;
+    }
+
+    const params = {
+      term,
+      channel: channelSel.value,
+      interval: intervalSel.value,
+      start: startInput.value,
+      end: endInput.value,
+    };
+
+    if (pushState) updateUrl(params);
+
+    const search = new URLSearchParams();
+    search.set("term", term);
+    if (params.channel && params.channel !== "all") {
+      search.set("channel_id", params.channel);
+    }
+    search.set("interval", params.interval);
+    if (params.start) search.set("start", params.start);
+    if (params.end) search.set("end", params.end);
+
+    summaryDiv.textContent = "Loading…";
+    chartDiv.innerHTML = "";
+
+    try {
+      const res = await fetch(`/api/frequency?${search.toString()}`);
+      if (!res.ok) {
+        throw new Error(`Request failed: ${res.status}`);
+      }
+      const payload = await res.json();
+      const total = payload.totalResults || 0;
+      summaryDiv.textContent = `Matches: ${total.toLocaleString()} • Buckets: ${
+        (payload.buckets || []).length
+      } • Interval: ${payload.interval}`;
+      drawChart(payload.buckets || []);
+    } catch (err) {
+      console.error(err);
+      summaryDiv.textContent = "Failed to load data.";
+    }
+  }
+
+  runBtn.addEventListener("click", () => runFrequency());
+  termInput.addEventListener("keypress", (e) => {
+    if (e.key === "Enter") runFrequency();
+  });
+  intervalSel.addEventListener("change", () => runFrequency());
+  channelSel.addEventListener("change", () => runFrequency());
+  startInput.addEventListener("change", () => runFrequency());
+  endInput.addEventListener("change", () => runFrequency());
+
+  window.addEventListener("popstate", () => {
+    qs = new URLSearchParams(window.location.search);
+    const params = setFormFromParams();
+    channelSel.value = params.channel;
+    runFrequency(false);
+  });
+
+  const initialParams = setFormFromParams();
+  loadChannels(initialParams.channel).then(() => {
+    if (initialParams.term) {
+      runFrequency(false);
+    } else {
+      summaryDiv.textContent = "Enter a term to begin.";
+    }
+  });
+})();
+
--- a/static/index.html
+++ b/static/index.html
@ -0,0 +1,63 @@
+<!doctype html>
+<html>
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <title>This Little Corner (Python)</title>
+    <link rel="stylesheet" href="/static/style.css" />
+    <script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
+  </head>
+  <body>
+    <header>
+      <h1>This Little Corner — Elastic Search</h1>
+      <p class="muted">
+        Enter a phrase to query title, description, and transcript text.
+      </p>
+    </header>
+
+    <section class="controls">
+      <input id="q" type="text" placeholder="Search..." size="40" />
+      <details id="channelDropdown" class="channel-dropdown">
+        <summary id="channelSummary">All Channels</summary>
+        <div id="channelOptions" class="channel-options muted">
+          <div>Loading channels…</div>
+        </div>
+      </details>
+      <select id="sort">
+        <option value="relevant">Most relevant</option>
+        <option value="newer">Newest first</option>
+        <option value="older">Oldest first</option>
+      </select>
+      <select id="size">
+        <option value="10">10</option>
+        <option value="25">25</option>
+        <option value="50">50</option>
+      </select>
+      <button id="searchBtn">Search</button>
+    </section>
+    <section class="controls muted">
+      <label><input type="checkbox" id="exactToggle" checked /> Exact</label>
+      <label><input type="checkbox" id="fuzzyToggle" checked /> Fuzzy</label>
+      <label><input type="checkbox" id="phraseToggle" checked /> Phrase</label>
+      <label><input type="checkbox" id="queryStringToggle" /> Query string mode</label>
+    </section>
+
+    <section class="summary-row">
+      <div class="summary-left">
+        <section id="meta" class="muted"></section>
+        <section id="metrics">
+          <div id="metricsStatus" class="muted"></div>
+          <div id="metricsContent"></div>
+        </section>
+      </div>
+      <div class="summary-right">
+        <section id="frequencySummary" class="muted"></section>
+        <div id="frequencyChart"></div>
+      </div>
+    </section>
+
+    <section id="results"></section>
+
+    <script src="/static/app.js"></script>
+  </body>
+</html>
--- a/static/style.css
+++ b/static/style.css
@ -0,0 +1,225 @@
+body {
+  font-family: Arial, sans-serif;
+  margin: 24px;
+  color: #222;
+}
+
+header {
+  margin-bottom: 16px;
+}
+
+.controls {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 8px;
+  align-items: center;
+  margin-bottom: 12px;
+}
+
+.channel-dropdown {
+  position: relative;
+  min-width: 220px;
+  flex: 0 1 260px;
+}
+
+.channel-dropdown summary {
+  list-style: none;
+  cursor: pointer;
+  border: 1px solid #ccc;
+  border-radius: 4px;
+  padding: 6px 8px;
+  background: #fff;
+  color: #222;
+  display: inline-flex;
+  align-items: center;
+  min-height: 32px;
+  max-width: 100%;
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+}
+
+.channel-dropdown summary::-webkit-details-marker {
+  display: none;
+}
+
+.channel-dropdown[open] summary {
+  border-bottom-left-radius: 0;
+  border-bottom-right-radius: 0;
+}
+
+.channel-options {
+  margin-top: 4px;
+  padding: 8px;
+  border: 1px solid #ccc;
+  border-radius: 0 0 4px 4px;
+  background: #fff;
+  max-height: 240px;
+  overflow-y: auto;
+  box-shadow: 0 2px 6px rgba(0, 0, 0, 0.12);
+  min-width: 220px;
+  width: max(220px, 100%);
+}
+
+.channel-option {
+  display: flex;
+  align-items: center;
+  gap: 6px;
+  margin-bottom: 6px;
+  font-size: 12px;
+}
+
+.channel-option:last-child {
+  margin-bottom: 0;
+}
+
+input,
+select,
+button {
+  padding: 6px 8px;
+}
+
+.muted {
+  color: #666;
+  font-size: 12px;
+}
+
+#results .item {
+  border-bottom: 1px solid #ddd;
+  padding: 12px 0;
+}
+
+.summary-row {
+  display: flex;
+  gap: 16px;
+  flex-wrap: wrap;
+  align-items: flex-start;
+  margin-top: 12px;
+}
+
+.summary-left {
+  flex: 0 1 280px;
+  max-width: 360px;
+}
+
+.summary-right {
+  flex: 1 1 0%;
+  min-width: 0;
+  background: #f5f5f5;
+  padding: 12px;
+  border-radius: 8px;
+  box-shadow: 0 1px 2px rgba(0, 0, 0, 0.08);
+}
+
+#metrics {
+  margin-top: 12px;
+  display: flex;
+  flex-direction: column;
+  gap: 8px;
+}
+
+#metricsStatus {
+  min-height: 16px;
+}
+
+#metricsContent {
+  display: flex;
+  flex-direction: column;
+  gap: 6px;
+}
+
+#frequencyChart {
+  margin-top: 8px;
+}
+
+#frequencyChart svg {
+  max-width: 100%;
+}
+
+#frequencyChart .axis path,
+#frequencyChart .axis line {
+  stroke: #ccc;
+}
+
+#frequencyChart .freq-layer rect {
+  stroke: #fff;
+  stroke-width: 0.5px;
+}
+
+.freq-legend {
+  margin-top: 8px;
+  display: flex;
+  flex-wrap: wrap;
+  gap: 8px;
+  font-size: 12px;
+  color: #444;
+}
+
+.freq-legend-item {
+  display: flex;
+  align-items: center;
+  gap: 6px;
+}
+
+.freq-legend-swatch {
+  width: 12px;
+  height: 12px;
+  border-radius: 2px;
+  display: inline-block;
+}
+
+.transcript {
+  background: #fafafa;
+  padding: 8px;
+  margin-top: 6px;
+  max-height: 200px;
+  overflow-y: auto;
+}
+
+.highlight-list {
+  display: flex;
+  flex-direction: column;
+  gap: 8px;
+  max-height: none;
+  overflow: visible;
+}
+
+.highlight-row {
+  padding: 4px 0;
+  border-bottom: 1px solid #ececec;
+}
+
+.highlight-row:last-child {
+  border-bottom: none;
+}
+
+.transcript-wrapper {
+  margin-top: 8px;
+}
+
+.pager {
+  margin-top: 12px;
+  display: flex;
+  gap: 8px;
+}
+
+mark {
+  background: #ffe58a;
+  padding: 0 2px;
+}
+
+
+.badge-row {
+  margin-top: 6px;
+  display: flex;
+  gap: 4px;
+  flex-wrap: wrap;
+}
+
+.badge {
+  background: #0b6efd;
+  color: #fff;
+  border-radius: 999px;
+  padding: 2px 8px;
+  font-size: 12px;
+}
--- a/transcript_collector.py
+++ b/transcript_collector.py
@ -0,0 +1,226 @@
+"""
+Lightweight helpers for gathering video metadata and transcripts from YouTube.
+
+Usage:
+    python -m python_app.transcript_collector --channel UC123 --output data/raw
+
+Relies on:
+    - YouTube Data API v3 (requires YOUTUBE_API_KEY).
+    - youtube-transcript-api for transcript retrieval.
+Both libraries are optional at import time so the module can still be referenced
+when only working with existing JSON dumps.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import logging
+from dataclasses import asdict, dataclass
+from pathlib import Path
+from typing import Dict, Iterable, Iterator, List, Optional
+
+from .config import CONFIG
+
+try:
+    from googleapiclient.discovery import build as build_youtube  # type: ignore
+except ImportError:  # pragma: no cover - library optional
+    build_youtube = None
+
+try:
+    from youtube_transcript_api import YouTubeTranscriptApi  # type: ignore
+except ImportError:  # pragma: no cover - library optional
+    YouTubeTranscriptApi = None
+
+
+LOGGER = logging.getLogger(__name__)
+
+
+@dataclass
+class TranscriptSegment:
+    start: float
+    duration: float
+    text: str
+
+
+@dataclass
+class VideoRecord:
+    video_id: str
+    channel_id: str
+    channel_title: str
+    title: str
+    description: str
+    published_at: str
+    url: str
+    transcript: List[TranscriptSegment]
+
+
+def _ensure_youtube_client(api_key: Optional[str]):
+    if build_youtube is None:
+        raise RuntimeError(
+            "google-api-python-client not installed. "
+            "Install google-api-python-client to collect metadata."
+        )
+    if not api_key:
+        raise RuntimeError(
+            "Set YOUTUBE_API_KEY to collect metadata from YouTube."
+        )
+    return build_youtube("youtube", "v3", developerKey=api_key)
+
+
+def _ensure_transcript_api():
+    if YouTubeTranscriptApi is None:
+        raise RuntimeError(
+            "youtube-transcript-api not installed. "
+            "Install youtube-transcript-api to fetch transcripts."
+        )
+    return YouTubeTranscriptApi()
+
+
+def iter_channel_videos(
+    channel_id: str,
+    *,
+    api_key: Optional[str] = None,
+    max_pages: int = 10,
+) -> Iterator[Dict]:
+    """
+    Yield raw playlist items for the uploads playlist of the given channel.
+
+    Args:
+        channel_id: Target YouTube channel ID.
+        api_key: Explicit API key (defaults to config value).
+        max_pages: Hard cap on paginated playlist fetches to keep things simple.
+    """
+    client = _ensure_youtube_client(api_key or CONFIG.youtube.api_key)
+    channels = (
+        client.channels().list(id=channel_id, part="contentDetails").execute()
+    )
+    items = channels.get("items", [])
+    if not items:
+        raise ValueError(f"Channel {channel_id} not found.")
+    uploads_playlist = (
+        items[0]
+        .get("contentDetails", {})
+        .get("relatedPlaylists", {})
+        .get("uploads")
+    )
+    if not uploads_playlist:
+        raise ValueError(f"Channel {channel_id} missing uploads playlist.")
+
+    request = client.playlistItems().list(
+        playlistId=uploads_playlist, part="snippet", maxResults=50
+    )
+    page = 0
+    while request and page < max_pages:
+        response = request.execute()
+        for item in response.get("items", []):
+            yield item
+        page += 1
+        request = client.playlistItems().list_next(request, response)
+
+
+def fetch_transcript(
+    video_id: str, *, languages: Optional[Iterable[str]] = None
+) -> List[TranscriptSegment]:
+    """Return transcript segments for a video, if available."""
+    api = _ensure_transcript_api()
+    try:
+        transcripts = api.get_transcript(video_id, languages=languages)
+    except Exception as exc:  # broad catch keeps draft simple
+        LOGGER.warning("Transcript unavailable for %s: %s", video_id, exc)
+        return []
+    return [
+        TranscriptSegment(
+            start=entry.get("start", 0.0),
+            duration=entry.get("duration", 0.0),
+            text=entry.get("text", ""),
+        )
+        for entry in transcripts
+    ]
+
+
+def collect_channel(
+    channel_id: str,
+    output_dir: Path,
+    *,
+    api_key: Optional[str] = None,
+    max_pages: int = 2,
+    languages: Optional[List[str]] = None,
+) -> List[VideoRecord]:
+    """
+    Collect metadata + transcripts for a channel and store as JSON files.
+
+    Returns the in-memory list to make it easy to chain into ingestion.
+    """
+    output_dir.mkdir(parents=True, exist_ok=True)
+    videos: List[VideoRecord] = []
+    for item in iter_channel_videos(
+        channel_id, api_key=api_key, max_pages=max_pages
+    ):
+        snippet = item.get("snippet", {})
+        video_id = snippet.get("resourceId", {}).get("videoId")
+        if not video_id:
+            continue
+        segments = fetch_transcript(video_id, languages=languages)
+        record = VideoRecord(
+            video_id=video_id,
+            channel_id=snippet.get("channelId", channel_id),
+            channel_title=snippet.get("channelTitle", ""),
+            title=snippet.get("title", ""),
+            description=snippet.get("description", ""),
+            published_at=snippet.get("publishedAt", ""),
+            url=f"https://www.youtube.com/watch?v={video_id}",
+            transcript=segments,
+        )
+        videos.append(record)
+        dest = output_dir / f"{video_id}.json"
+        with dest.open("w", encoding="utf-8") as handle:
+            json.dump(asdict(record), handle, ensure_ascii=False, indent=2)
+        LOGGER.info("Saved %s", dest)
+    return videos
+
+
+def _build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description="Collect channel transcripts into JSON files."
+    )
+    parser.add_argument(
+        "--channel",
+        required=True,
+        help="YouTube channel ID (e.g. UCXYZ).",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=Path("data/raw"),
+        help="Directory to write per-video JSON files.",
+    )
+    parser.add_argument(
+        "--max-pages",
+        type=int,
+        default=2,
+        help="Number of paginated channel pages to pull (50 videos per page).",
+    )
+    parser.add_argument(
+        "--language",
+        dest="languages",
+        action="append",
+        help="Preferred transcript languages (can be repeated).",
+    )
+    return parser
+
+
+def main() -> None:
+    logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
+    args = _build_parser().parse_args()
+    collect_channel(
+        args.channel,
+        args.output,
+        max_pages=args.max_pages,
+        languages=args.languages,
+    )
+
+
+if __name__ == "__main__":  # pragma: no cover
+    main()
+