diff --git a/search_app.py b/search_app.py index 5a88482..c0ecd39 100644 --- a/search_app.py +++ b/search_app.py @@ -286,6 +286,24 @@ def parse_channel_params(values: Iterable[Optional[str]]) -> List[str]: return channels +def build_year_filter(year: Optional[str]) -> Optional[Dict]: + if not year: + return None + try: + year_int = int(year) + return { + "range": { + "date": { + "gte": f"{year_int}-01-01", + "lt": f"{year_int + 1}-01-01", + "format": "yyyy-MM-dd" + } + } + } + except (ValueError, TypeError): + return None + + def build_channel_filter(channels: Optional[Sequence[str]]) -> Optional[Dict]: if not channels: return None @@ -320,6 +338,7 @@ def build_query_payload( query: str, *, channels: Optional[Sequence[str]] = None, + year: Optional[str] = None, sort: str = "relevant", use_exact: bool = True, use_fuzzy: bool = True, @@ -333,6 +352,10 @@ def build_query_payload( if channel_filter: filters.append(channel_filter) + year_filter = build_year_filter(year) + if year_filter: + filters.append(year_filter) + if use_query_string: base_fields = ["title^3", "description^2", "transcript_full", "transcript_secondary_full"] qs_query = (query or "").strip() or "*" @@ -376,6 +399,8 @@ def build_query_payload( body["sort"] = [{"date": {"order": "desc"}}] elif sort == "older": body["sort"] = [{"date": {"order": "asc"}}] + elif sort == "referenced": + body["sort"] = [{"referenced_by_count": {"order": "desc"}}] return body if query: @@ -479,6 +504,8 @@ def build_query_payload( body["sort"] = [{"date": {"order": "desc"}}] elif sort == "older": body["sort"] = [{"date": {"order": "asc"}}] + elif sort == "referenced": + body["sort"] = [{"referenced_by_count": {"order": "desc"}}] return body @@ -570,6 +597,53 @@ def create_app(config: AppConfig = CONFIG) -> Flask: data.sort(key=lambda item: item["Name"].lower()) return jsonify(data) + @app.route("/api/years") + def years(): + body = { + "size": 0, + "aggs": { + "years": { + "date_histogram": { + "field": "date", + "calendar_interval": "year", + "format": "yyyy", + "order": {"_key": "desc"} + } + } + } + } + + if config.elastic.debug: + LOGGER.info( + "Elasticsearch years request: %s", + json.dumps({"index": index, "body": body}, indent=2), + ) + + response = client.search(index=index, body=body) + + if config.elastic.debug: + LOGGER.info( + "Elasticsearch years response: %s", + json.dumps(response, indent=2, default=str), + ) + + buckets = ( + response.get("aggregations", {}) + .get("years", {}) + .get("buckets", []) + ) + + data = [ + { + "Year": bucket.get("key_as_string"), + "Count": bucket.get("doc_count", 0), + } + for bucket in buckets + if bucket.get("doc_count", 0) > 0 + ] + + return jsonify(data) + @app.route("/api/search") def search(): query = request.args.get("q", "", type=str) @@ -578,6 +652,7 @@ def create_app(config: AppConfig = CONFIG) -> Flask: if legacy_channel: raw_channels.append(legacy_channel) channels = parse_channel_params(raw_channels) + year = request.args.get("year", "", type=str) or None sort = request.args.get("sort", "relevant", type=str) page = max(request.args.get("page", 0, type=int), 0) size = max(request.args.get("size", 10, type=int), 1) @@ -598,6 +673,7 @@ def create_app(config: AppConfig = CONFIG) -> Flask: payload = build_query_payload( query, channels=channels, + year=year, sort=sort, use_exact=use_exact, use_fuzzy=use_fuzzy, @@ -671,6 +747,8 @@ def create_app(config: AppConfig = CONFIG) -> Flask: "primary": bool(highlight_map.get("transcript_full")), "secondary": bool(highlight_map.get("transcript_secondary_full")), }, + "internal_references_count": source.get("internal_references_count", 0), + "referenced_by_count": source.get("referenced_by_count", 0), } ) @@ -716,6 +794,7 @@ def create_app(config: AppConfig = CONFIG) -> Flask: if legacy_channel: raw_channels.append(legacy_channel) channels = parse_channel_params(raw_channels) + year = request.args.get("year", "", type=str) or None interval = (request.args.get("interval", "month") or "month").lower() allowed_intervals = {"day", "week", "month", "quarter", "year"} if interval not in allowed_intervals: @@ -727,6 +806,9 @@ def create_app(config: AppConfig = CONFIG) -> Flask: channel_filter = build_channel_filter(channels) if channel_filter: filters.append(channel_filter) + year_filter = build_year_filter(year) + if year_filter: + filters.append(year_filter) if start or end: range_filter: Dict[str, Dict[str, Dict[str, str]]] = {"range": {"date": {}}} if start: diff --git a/static/app.js b/static/app.js index 9c564f3..faaeb0c 100644 --- a/static/app.js +++ b/static/app.js @@ -35,6 +35,7 @@ const channelDropdown = document.getElementById("channelDropdown"); const channelSummary = document.getElementById("channelSummary"); const channelOptions = document.getElementById("channelOptions"); + const yearSel = document.getElementById("year"); const sortSel = document.getElementById("sort"); const sizeSel = document.getElementById("size"); const exactToggle = document.getElementById("exactToggle"); @@ -140,8 +141,29 @@ } } + async function loadYears() { + if (!yearSel) return; + try { + const res = await fetch("/api/years"); + const data = await res.json(); + + // Keep the "All Years" option + yearSel.innerHTML = ''; + + data.forEach((item) => { + const option = document.createElement("option"); + option.value = item.Year; + option.textContent = `${item.Year} (${item.Count})`; + yearSel.appendChild(option); + }); + } catch (err) { + console.error("Failed to load years", err); + } + } + function setFromQuery() { qInput.value = qs.get("q") || ""; + yearSel.value = qs.get("year") || ""; sortSel.value = qs.get("sort") || "relevant"; sizeSel.value = qs.get("size") || "10"; pendingChannelSelection = parseChannelParams(qs); @@ -305,13 +327,18 @@ } } - function updateUrl(q, sort, channels, page, size, exact, fuzzy, phrase, queryMode) { + function updateUrl(q, sort, channels, year, page, size, exact, fuzzy, phrase, queryMode) { const next = new URL(window.location.href); next.searchParams.set("q", q); next.searchParams.set("sort", sort); next.searchParams.delete("channel_id"); next.searchParams.delete("channel"); channels.forEach((id) => next.searchParams.append("channel_id", id)); + if (year) { + next.searchParams.set("year", year); + } else { + next.searchParams.delete("year"); + } next.searchParams.set("page", page); next.searchParams.set("size", size); next.searchParams.set("exact", exact ? "1" : "0"); @@ -893,7 +920,7 @@ function renderFrequencyChart(buckets, channelTotals) { freqChart.appendChild(legend); } -async function updateFrequencyChart(term, channels, queryMode) { +async function updateFrequencyChart(term, channels, year, queryMode) { if (!freqChart || typeof d3 === "undefined") { return; } @@ -911,6 +938,9 @@ async function updateFrequencyChart(term, channels, queryMode) { params.set("term", trimmed); params.set("interval", "month"); (channels || []).forEach((id) => params.append("channel_id", id)); + if (year) { + params.set("year", year); + } if (queryMode) { params.set("query_string", "1"); } @@ -961,6 +991,13 @@ async function updateFrequencyChart(term, channels, queryMode) { const badges = []; if (item.highlightSource && item.highlightSource.primary) badges.push('primary transcript'); if (item.highlightSource && item.highlightSource.secondary) badges.push('secondary transcript'); + + // Add reference count badges + const refByCount = item.referenced_by_count || 0; + const refToCount = item.internal_references_count || 0; + if (refByCount > 0) badges.push(`${refByCount} backlink${refByCount !== 1 ? 's' : ''}`); + if (refToCount > 0) badges.push(`${refToCount} reference${refToCount !== 1 ? 's' : ''}`); + const badgeHtml = badges.length ? `